In [3]:
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModel
from langchain import PromptTemplate, HuggingFaceHub, LLMChain
from datasets import Dataset
from tqdm import tqdm
from openai.error import RateLimitError
import time
from langchain.llms import OpenAI
import re


In [4]:
df = pd.read_csv("pt_question_answers.csv")

In [5]:
df.shape

(14593, 11)

In [6]:
df[["pt_title", "pt_body", "pt_answer"]]

Unnamed: 0,pt_title,pt_body,pt_answer
0,Extracting the top-k value-indices from a 1-D ...,<p>Given a 1-D tensor in Torch (<code>torch.Te...,"<p>As of pull request <a href=""https://github...."
1,How to Display Custom Images in Tensorboard (e...,"<p>The <a href=""https://github.com/tensorflow/...",<p>It is quite easy to do if you have the imag...
2,Python wheels: cp27mu not supported,"<p>I'm trying to install pytorch (<a href=""htt...","<p>Yes, that is possible. Just create the obje..."
3,Loading Torch7 trained models (.t7) in PyTorch,<p>I am using Torch7 library for implementing ...,<p><code>view()</code> reshapes the tensor wit...
4,PyTorch: How to use DataLoaders for custom Dat...,<p>How to make use of the <code>torch.utils.da...,<p>While you will not get as detailed informat...
...,...,...,...
14588,How to disable Neptune callback in transformer...,"<p>After installing <a href=""https://docs.nept...",<p>To disable Neptune callback in transformers...
14589,BGR to RGB for CUB_200 images by Image.split(),<p>I am creating a PyTorch dataset and dataloa...,<p>I would strongly recommend you use skimage....
14590,Neural Networks Extending Learning Domain,<p>I have a simple function <strong>f</strong>...,<p>What you want is called extrapolation (as o...
14591,How do I multiply tensors like this?,<p>I am working on a project where I need to m...,<p>You should familiarize yourself with the te...


In [7]:
df["text"] = df["pt_title"] + "\n" + df["pt_body"] + "\n" + df["pt_answer"]

In [8]:
df = df[["text"]]

In [9]:
df

Unnamed: 0,text
0,Extracting the top-k value-indices from a 1-D ...
1,How to Display Custom Images in Tensorboard (e...
2,Python wheels: cp27mu not supported\n<p>I'm tr...
3,Loading Torch7 trained models (.t7) in PyTorch...
4,PyTorch: How to use DataLoaders for custom Dat...
...,...
14588,How to disable Neptune callback in transformer...
14589,BGR to RGB for CUB_200 images by Image.split()...
14590,Neural Networks Extending Learning Domain\n<p>...
14591,How do I multiply tensors like this?\n<p>I am ...


In [10]:
CLEANR = re.compile('<.*?>') 

def cleanhtml(raw_html):
  cleantext = re.sub(CLEANR, '', raw_html)
  return cleantext

In [11]:
df["text"] = df["text"].apply(lambda x: cleanhtml(x))

In [12]:
df["text"] = df["text"].str.lower()

In [13]:
df

Unnamed: 0,text
0,extracting the top-k value-indices from a 1-d ...
1,how to display custom images in tensorboard (e...
2,python wheels: cp27mu not supported\ni'm tryin...
3,loading torch7 trained models (.t7) in pytorch...
4,pytorch: how to use dataloaders for custom dat...
...,...
14588,how to disable neptune callback in transformer...
14589,bgr to rgb for cub_200 images by image.split()...
14590,neural networks extending learning domain\ni h...
14591,how do i multiply tensors like this?\ni am wor...


In [14]:
model_ckpt = "sentence-transformers/multi-qa-mpnet-base-dot-v1"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
model = AutoModel.from_pretrained(model_ckpt)

In [15]:
device = torch.device("cuda")
model.to(device)

MPNetModel(
  (embeddings): MPNetEmbeddings(
    (word_embeddings): Embedding(30527, 768, padding_idx=1)
    (position_embeddings): Embedding(514, 768, padding_idx=1)
    (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): MPNetEncoder(
    (layer): ModuleList(
      (0): MPNetLayer(
        (attention): MPNetAttention(
          (attn): MPNetSelfAttention(
            (q): Linear(in_features=768, out_features=768, bias=True)
            (k): Linear(in_features=768, out_features=768, bias=True)
            (v): Linear(in_features=768, out_features=768, bias=True)
            (o): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (intermediate): MPNetIntermediate(
          (dense): Linear(in_features

In [16]:
def cls_pooling(model_output):
    return model_output.last_hidden_state[:, 0]

def get_embeddings(text_list):
    encoded_input = tokenizer(
        text_list, padding=True, truncation=True, return_tensors="pt"
    )
    encoded_input = {k: v.to(device) for k, v in encoded_input.items()}
    model_output = model(**encoded_input)
    return cls_pooling(model_output)

In [17]:
dataset = Dataset.from_pandas(df)

In [18]:
embedding = get_embeddings(dataset["text"][0])
embedding.shape

torch.Size([1, 768])

In [171]:
embeddings_dataset = dataset.map(
    lambda x: {"embeddings": get_embeddings(x["text"]).detach().cpu().numpy()[0]}
)

  0%|          | 0/14593 [00:00<?, ?ex/s]

In [172]:
embeddings_dataset.add_faiss_index(column="embeddings")

  0%|          | 0/15 [00:00<?, ?it/s]

Dataset({
    features: ['text', 'embeddings'],
    num_rows: 14593
})

In [173]:
def get_context(question, truncate_length=512, k=5):

    question_embedding = get_embeddings([question]).cpu().detach().numpy()

    scores, samples = embeddings_dataset.get_nearest_examples(
        "embeddings", question_embedding, k=k
    )

    samples_df = pd.DataFrame.from_dict(samples)
    samples_df["scores"] = scores
    samples_df.sort_values("scores", ascending=False, inplace=True)
    samples_df["text"] = samples_df["text"].str[:truncate_length]

    return '\n'.join(samples_df.text.tolist())


In [174]:
questions_df = pd.read_csv("top100questions.csv")

In [175]:
questions_df.iloc[:10]

Unnamed: 0,question
0,How do I check if PyTorch is using the GPU?\n
1,How do I save a trained model in PyTorch?\n
2,What does .view() do in PyTorch?\n
3,Why do we need to call zero_grad() in PyTorch?\n
4,How do I print the model summary in PyTorch?\n
5,How do I initialize weights in PyTorch?\n
6,What does model.eval() do in pytorch?\n
7,What's the difference between reshape and view...
8,What does model.train() do in PyTorch?\n
9,What does .contiguous() do in PyTorch?\n


In [217]:
def run_qa(question, context):

    OPENAI_API_KEY = ""
    template = """from this context: {context} answer the question: {question}."""
    prompt = PromptTemplate(template=template, input_variables=["context", "question"])
    llm = OpenAI(model_name="text-davinci-003", openai_api_key=OPENAI_API_KEY, temperature=0)
    llm_chain = LLMChain(llm=llm, prompt=prompt)
    return llm_chain.predict(question=question, context=context)


In [None]:
#### using openai

In [223]:
list1 = []
i = 0
for question in questions_df.loc[:10, "question"]:
    print("*************************************************************************")
    question = question.rstrip()
    list2 = []
    list2.append(question)
    print('question: \n',question)
    context = get_context(question, k=5)
    list2.append(context)
    try:
        answer = run_qa(question, context)
        print('answer: \n', answer)
        list2.append(answer)
        i+=1
    except RateLimitError:
        print("Hit RateLimit @ i=",i)
        time.sleep(60)
    list1.append(list2)
df2 = pd.DataFrame(list1,columns=['question','context','answer'])

*************************************************************************
question: 
 How do I check if PyTorch is using the GPU?
answer: 
 

you can use the following code:

import torch

if torch.cuda.is_available():
    print("PyTorch is using the GPU")
else:
    print("PyTorch is not using the GPU")
*************************************************************************
question: 
 How do I save a trained model in PyTorch?
answer: 
 

The simplest way to save a trained model in PyTorch is to use the torch.save() function. This function takes in a serializable object and saves it to a file. For example, if you have a model called "model" that you have trained, you can save it to a file called "model.pt" by running the following command:

torch.save(model, "model.pt")

You can then load the model back into memory by running the following command:

model = torch.load("model.pt")
*************************************************************************
question: 
 What does .view() d

In [241]:
df2

Unnamed: 0,question,context,answer
0,How do I check if PyTorch is using the GPU?,pytorch isn't running on gpu while true\ni wan...,\n\nyou can use the following code:\n\nimport ...
1,How do I save a trained model in PyTorch?,how to save pytorch tensor in append mode\nhow...,\n\nThe simplest way to save a trained model i...
2,What does .view() do in PyTorch?,what does -1 mean in pytorch view?\nas the que...,\n\nThe .view() function in PyTorch is used to...
3,Why do we need to call zero_grad() in PyTorch?,shall i use grad.zero_() in pytorch with or wi...,\n\nThe zero_grad() function in PyTorch is use...
4,How do I print the model summary in PyTorch?,can't print model summary using pytorch?\nhell...,\n\nThe best way to print the model summary in...
5,How do I initialize weights in PyTorch?,pytorch: initializing weight with numpy array ...,\n\nYou can initialize weights in PyTorch by u...
6,What does model.eval() do in pytorch?,which pytorch modules are affected by model.ev...,\n\nmodel.eval() sets the model in evaluation ...
7,What's the difference between reshape and view...,what is the different for torchvision.models.r...,\n\nThe difference between reshape and view in...
8,What does model.train() do in PyTorch?,pytorch deep learning - class model() and trai...,\n\nModel.train() is a method in PyTorch that ...
9,What does .contiguous() do in PyTorch?,pytorch - connection between loss.backward() a...,\n\nThe .contiguous() function in PyTorch is u...


In [242]:
## saving generated answer with context to df

df2.to_csv('openAILLm_prediction.csv')

In [194]:
## checking using questions

question = 'PyTorch does not work on Windows 32-bit'
context = get_context(question, k=5)
answer = run_qa(question, context)
answer

' You must use a 64-bit version of Windows to install and use PyTorch.'

In [195]:
question = 'How do I make my experiment deterministic?'
context = get_context(question, k=5)
answer = run_qa(question, context)
answer

'\n\nThe best way to make your experiment deterministic is to set a fixed random seed for all random number generators used in your experiment. This includes the random number generators used by PyTorch, NumPy, and any other libraries you are using. Setting a fixed random seed ensures that the same random numbers are generated each time the experiment is run, making the results reproducible.'

In [199]:
question = 'How should I scale up my Pytorch models?'
context = get_context(question, k=5)
answer = run_qa(question, context)
answer

'\n\nScaling up your Pytorch models can be done in several ways. The most common approach is to increase the batch size, which can be done by increasing the number of data points in each batch. Additionally, you can increase the number of layers in the model, or increase the number of neurons in each layer. You can also increase the number of epochs used for training, or use a larger learning rate. Finally, you can also use more powerful hardware, such as GPUs, to speed up the training process.'