In [1]:
from torch import cuda
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
import os

embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

embed_model = HuggingFaceEmbeddings(
    model_name=embed_model_id,
    model_kwargs={'device': device},
    encode_kwargs={'device': device, 'batch_size': 32}
)

  return self.fget.__get__(instance, owner)()


In [2]:
import json

json_file_path = '/home/hb/LLM-research/finetuning_dataset/BGP/PyBGPStream/PyBGPStream_main10K.json'

with open(json_file_path, 'r') as file:
    data = json.load(file)

# Concatenate instruction and output for each item
docs = [item['instruction'] + " " + item['output'] for item in data]

embeddings = embed_model.embed_documents(docs)

print(f"We have {len(embeddings)} doc embeddings, each with "
      f"a dimensionality of {len(embeddings[0])}.")


We have 10110 doc embeddings, each with a dimensionality of 384.


In [11]:
import os

os.environ['PINECONE_API'] = "d1d2683e-350c-4c65-8815-e256c0411bba"
os.environ['PINECONE_ENVIRONMENT'] = "gcp-starter"

In [12]:
from pinecone import Pinecone
from pinecone import PodSpec
from pinecone import ServerlessSpec, PodSpec

api_key = os.environ.get('PINECONE_API')
environment = os.environ.get('PINECONE_ENVIRONMENT')

pc = Pinecone(api_key=api_key)
spec = PodSpec(environment=environment)


In [13]:
import time

index_name = 'llama-2-rag'

if index_name in pc.list_indexes().names():
    pc.delete_index(index_name)

# we create a new index
pc.create_index(
        index_name,
        dimension=len(embeddings[0]),
        metric='cosine',
        spec=spec
    )

# wait for index to be initialized
while not pc.describe_index(index_name).status['ready']:
    time.sleep(1)

In [14]:
index = pc.Index(index_name)
index.describe_index_stats()

{'dimension': 384,
 'index_fullness': 0.0,
 'namespaces': {},
 'total_vector_count': 0}

In [15]:
from datasets import load_dataset

data = load_dataset("json", data_files="/home/hb/LLM-research/finetuning_dataset/BGP/PyBGPStream/BGP_real_cases_realtime_3500.json", split="train")
data

Found cached dataset json (/home/hb/.cache/huggingface/datasets/json/default-37672de3b108d008/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)


Dataset({
    features: ['instruction', 'input', 'output', 'most_similar_instructions', 'avg_similarity_score'],
    num_rows: 3532
})

In [16]:
from datasets import DatasetDict

data_df = data.to_pandas()

batch_size = 32
total_rows = len(data_df)

for i in range(0, total_rows, batch_size):
    i_end = min(len(data_df), i+batch_size)
    batch = data_df.iloc[i:i_end]
    # ids = [f"{x['instruction']}-{x['output']}" for i, x in batch.iterrows()]
    ids = [f"id_{j+1}" for j in range(i, i_end)]

    instructions = [row['instruction'] for index, row in batch.iterrows()]
    embeds = embed_model.embed_documents(instructions)
    # get metadata to store in Pinecone
    metadata = [
        {'instruction': row['instruction'],
         'input': row['input'],
         'output': row['output']} for index, row in batch.iterrows()
    ]
    # add to Pinecone
    index.upsert(vectors=zip(ids, embeds, metadata))

In [17]:
from torch import cuda, bfloat16
import transformers

model_id = 'meta-llama/Llama-2-13b-chat-hf'
# model_id = 'codellama/CodeLlama-7b-hf'
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

bnb_config = transformers.BitsAndBytesConfig(
    load_in_8bit=True,
)


# Need auth token for these
hf_auth = os.environ.get('hf_token')

model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)

# device_map = {"": 0}

model = transformers.AutoModelForCausalLM.from_pretrained(
    model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map="auto",
    use_auth_token=hf_auth
)

model.eval()
print(f"Model loaded on {device}")

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Model loaded on cuda:0


In [18]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    use_auth_token=hf_auth
)
 
# tokenizer.pad_token = tokenizer.eos_token
# tokenizer.padding_side = "right"

In [29]:
generate_text = transformers.pipeline(
    model=model, tokenizer=tokenizer,
    return_full_text=True,  # langchain expects the full text
    task='text-generation',
    # we pass model parameters here too
    temperature=0.5,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
    max_new_tokens=512,  # mex number of tokens to generate in the output
    repetition_penalty=1.1  # without this output begins repeating
)

In [30]:
from langchain.llms import HuggingFacePipeline

llm = HuggingFacePipeline(pipeline=generate_text)

In [31]:
from langchain.vectorstores import Pinecone

text_field = 'text'  # field in metadata that contains text content

vectorstore = Pinecone(
    index, embed_model.embed_query, text_field
)



In [32]:
query = 'Generate Python code to calculate 5G network performance KPI'

vectorstore.similarity_search(
    query=query,  # the search query
    k=3,  # returns top 3 most relevant chunks of text
    # namespace='my_namespace'
)

ValueError: The argument order for `query()` has changed; please use keyword arguments instead of positional arguments. Example: index.query(vector=[0.1, 0.2, 0.3], top_k=10, namespace='my_namespace')

In [33]:
from langchain.chains import RetrievalQA

rag_pipeline = RetrievalQA.from_chain_type(
    llm=llm, chain_type='stuff',
    retriever=vectorstore.as_retriever()
)

In [35]:
llm("Generate Python code to calculate 5G network performance KPIs: Total Network Capacity, Capacity per Area, Capacity per Point, Cost per Capacity, Cost per Area, and Surplus per Area. Load data from '5G_Infrastructure/demand_driven_postcode_data_results.csv'. Use keywords: 'capacity', 'cost', 'area', 'numpoints' to identify relevant columns.")

'\n\nI have attached the file "5G_Infrastructure/demand_driven_postcode_data_results.csv" which contains the data for the calculation of the 5G network performance KPIs.\n\nPlease provide me with the Python code to perform the calculations as mentioned above.\n\nThank you!\n\nHere is the file "5G_Infrastructure/demand_driven_postcode_data_results.csv":\n\n| Postcode | Latitude | Longitude | Demand (GB) | NumPoints |\n| --- | --- | --- | --- | --- |\n| SW1A 2AA | 51.498673 | -0.128874 | 200 | 10 |\n| SW1W 9TQ | 51.495795 | -0.135344 | 150 | 15 |\n| SW3 4RP | 51.481845 | -0.200892 | 300 | 20 |\n| SW3 5EA | 51.483893 | -0.206419 | 250 | 15 |\n| WC2N 5DU | 51.507346 | -0.126876 | 400 | 25 |\n| WC2H 7BG | 51.509888 | -0.134286 | 350 | 20 |\n\nNote that the demand values are in GB (gigabytes) and the number of points (NumPoints) is also provided for each postcode.\n\nPlease help me with the Python code to perform the calculations as mentioned above. Thank you!'