In [None]:
! pip install datasets einops sentencepiece tokenizers
! pip install git+https://github.com/huggingface/transformers
! pip install torch
! pip install protobuf
! pip install beautifulsoup4
! pip install zenpy

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

torch.cuda.empty_cache()
torch.cuda.set_device(1)
device = "cuda"
tokenizer = AutoTokenizer.from_pretrained(
    "Open-Orca/Mistral-7B-OpenOrca")
model = AutoModelForCausalLM.from_pretrained(
    "Open-Orca/Mistral-7B-OpenOrca").half().to(device)

In [2]:
import requests
import json
import os

url = "https://core.hivelocity.net/api/v1/products/displays/www/fast"

try:
    response = requests.get(url)
    if response.status_code == 200:
        data_json = json.loads(response.json())
        all_locations = data_json['ALL_LOCATIONS']
        path = os.getcwd() + "/data.json"
        with open(path, 'w') as file:
            json.dump(all_locations, file, indent=4)
    pass
        
except Exception as e:
    print(f"Error: {str(e)}")

In [3]:
from bs4 import BeautifulSoup

path = os.getcwd() + "/data.json"
with open(path, 'r') as file:
    data = json.load(file)
store_url = "https://store.hivelocity.net/product/"

def clean_data(data):
    """
    Cleans the input data by normalizing boolean values, removing HTML tags,
    copying 'cores' from processor_text to display_text, and removing processor_text.

    :param data: Dictionary containing the data to be cleaned.
    :return: Cleaned data as a dictionary.
    """
    def remove_html_tags(text):
        return BeautifulSoup(text, "html.parser").get_text()

    if 'processor_text' in data and 'display_text' in data:
        if 'cores' in data['processor_text']:
            cores_text = remove_html_tags(data['processor_text']['cores'])
            data['display_text']['cores'] = cores_text.replace('(', '').replace(')', '')
            
        del data['processor_text']
        data['description'] = data.pop('display_text')
        
    if 'id' in data:
        data['product_id'] = data.pop('id')
        data['product_store_url'] = f"{store_url}{data['product_id']}/customize"
        
    if 'category' in data and 'id' in data['category']:
        data['category']['category_id'] = data['category'].pop('id')
        
    for key, value in data.items():
        if isinstance(value, bool):
            data[key] = bool(value) 
        elif isinstance(value, dict):
            data[key] = clean_data(value)
            
    if 'original_price' in data and 'price' in data and data['price'] < data['original_price']:
        discount = data['original_price'] - data['price']
        data['discount'] = {
            'has_discount': True,
            'discount_amount': discount,
            'discount_percentage': round((discount / data['original_price']) * 100, 2)
        }
    return data

product_json = [clean_data(item) for item in data if item['stock'] > 2]
    
len(product_json)

103

In [None]:
import os
from zenpy import Zenpy

# Find real questions from clients and copy to prompt. (optinal)
creds = {
    'email': '',
    'token': ''
    'subdomain': 'hivelocity-hive'
}
zenpy_client = Zenpy(**creds)
search_results = zenpy_client.search(type='ticket', query='sales question', per_page=5)

for ticket in search_results:
    first_question_found=False
    if ticket.via.channel != 'chat':
        for audit in zenpy_client.tickets.audits(ticket=ticket):
            for event in audit.events:
                if event['type'] == 'Comment':
                    user = zenpy_client.users(id=event['author_id'])
                    if user.role == 'end-user':
                        print(f"Ticket id: {ticket.id} - Comment: {event['body']} - Event id: {event['id']}")
                        first_question_found=True
                        break
            if first_question_found:
                break

In [51]:
sys_prompt = """Given the budget constraint, identify and recommend the top 3 servers
              configurations available from Hivelocity Inc., a provider known for dedicated servers,
              bare metal, colocation, and cloud hosting services. Focus on options that offer the best
              value for money within this price range. Additionally, provide the store URL for each recommended
              configuration. Note that the recommendations should be based on general market trends and 
              common offerings in the low-cost server segment as of early 2023. product must be in stock.
              Advice if the product is smilar to the one requested but not exactly the same.
              If product is not in found recommend something similar. Return product store url and product detail in a list. Use JSON Data for this task.
              Add a reason why this servers works for your needs.
              """

In [None]:
# Product Prompt 

product_json = product_json[:25] # context only fits about 25 products
prompt = "I am looking for a server with 32GB of RAM, 2TB of storage, and 1Gbps of bandwidth."
prefix = "<|im_start|>"
suffix = "<|im_end|>\n"
sys_format = prefix + "system\n" + sys_prompt + "\nJSON Data:\n" + json.dumps(product_json) + suffix
user_format = prefix + "user\n" + prompt + suffix
assistant_format = prefix + "assistant\n"
input_text = sys_format + user_format + assistant_format

generation_config = GenerationConfig(
    max_length=8000, temperature=0.6, top_p=0.95, repetition_penalty=1,
    do_sample=True, use_cache=True,
    eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id,
    transformers_version="4.34.0.dev0")

inputs = tokenizer(input_text, return_tensors="pt", return_attention_mask=True).to(device)
outputs = model.generate(**inputs, generation_config=generation_config)
text = tokenizer.batch_decode(outputs)[0]
print(text)

In [None]:
# Multiple question and stop word prompt testing

mport threading
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig

# stop_words = set(["the", "is", "in", "that", "does", "do", "and", "a"]) # Total memory used: 1767.55 MB
# stop_words = set(["the", "is", "in", "that", "does", "do"]) # Total memory used: 2445.98 MB
stop_words = set(["the", "is", "in", "that", "does", "do", "of"]) # Sometimes removing stop words decreases memory usage


def remove_stop_words(text, stop_words):
    words = text.split()
    filtered_words = [word for word in words if word.lower() not in stop_words]
    return ' '.join(filtered_words)


def get_gpu_memory():
    torch.cuda.synchronize()
    return torch.cuda.memory_allocated()

device = "cuda"

total_memory_used =  []

# Function to run model inference
def run_model(prompt):
    # Tokenize input
    
    # Record initial memory usage
    initial_memory = get_gpu_memory()
    inputs = tokenizer(prompt, return_tensors="pt", return_attention_mask=True).to(device)
    
    # Generate output
    outputs = model.generate(**inputs, generation_config=generation_config)
    
    # Decode and print text
    final_memory = get_gpu_memory()
    memory_used = final_memory - initial_memory
    total_memory_used.append(memory_used)
    print(f"Memory used by this task: {memory_used / (1024**2):.2f} MB")
    text = tokenizer.batch_decode(outputs)[0]
    print(text)


generation_config = GenerationConfig(
    max_length=1026, temperature=1.1, top_p=0.95, repetition_penalty=1.0,
    do_sample=True, use_cache=True,
    eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id,
    transformers_version="4.34.0.dev0")

# Example prompts
prompts = [
    "A system about economics and investment. Specialize in macro-economics and bonds.\nTell me about the euro-dollar",
    "A system about economics and investment. Specialize in macro-economics and bonds.\nTell me about the TLT etf",
    "A system about economics and investment. Specialize in macro-economics and bonds.\nWhat is deflation",
    "A system about economics and investment. Specialize in macro-economics and bonds.\nwhat is inflation",
    "A system about economics and investment. Specialize in macro-economics and bonds.\nHow does the fed work",
    "A system about economics and investment. Specialize in macro-economics and bonds.\nwho is the fed chair",
    "A system about economics and investment. Specialize in macro-economics and bonds.\nWhat is the GDP of Germany",
    "A system about economics and investment. Specialize in macro-economics and bonds.\nwhat is the currency of Japan",
    "A system about economics and investment. Specialize in macro-economics and bonds.\nWhat is a central bank",
    "A system about economics and investment. Specialize in macro-economics and bonds.\nwho is the president of the ECB",
]
prompts = [remove_stop_words(prompt, stop_words) for prompt in prompts]

threads = []
for prompt in prompts:
    thread = threading.Thread(target=run_model, args=(prompt,))
    threads.append(thread)
    thread.start()
    
for thread in threads:
    thread.join()

total_memory_used = sum(total_memory_used)
print(f"Total memory used: {total_memory_used / (1024**2):.2f} MB")