In [18]:
import pandas as pd
import re
import concurrent.futures
import requests
import openai
from tqdm import tqdm
tqdm.pandas()

In [2]:
def cleanhtml(raw_html):
    CLEANR = re.compile('<.*?>')
    cleantext = re.sub(CLEANR, '', raw_html)
    return cleantext
    
def clean_data(df):
    df["pt_answer"] = df["pt_answer"].apply(lambda x: cleanhtml(x))

    df["question"] = df["pt_title"].str.lower() + "\n" + df["pt_body"]
    df["question"] = df["question"].apply(lambda x: cleanhtml(x))
    df["answer"] = df["pt_answer"].str.lower()

    df = df[['pt_post_id','question', 'answer']]
    return df
    

## get qa and link to post
def get_url(df):
    url = []
    for index, row in df.iterrows():
        url.append(f"https://stackoverflow.com/questions/{row['pt_post_id']}/")
    
    df['source'] = url

    return df

### Download the file pt_question_answers.csv from https://github.com/chauhang/pt-experiments/blob/pytorch-qa/pytorch-qa/pt_question_answers.csv

In [3]:
df = pd.read_csv('pt_question_answers.csv')
df.shape

(10763, 12)

In [4]:
df = clean_data(df)
df = get_url(df)
df = df[['question','answer','source']]

In [5]:
df = df[:5000]

df.shape

(5000, 3)

### Set your openai key here

In [6]:
OPENAI_API_KEY=""

In [7]:
def get_qa_openai(prompt, index):
    try:
        completion = openai.ChatCompletion.create(
                model="gpt-3.5-turbo", api_key = OPENAI_API_KEY,
                messages=prompt
                )

        resp = completion.choices[0].message.content

    except requests.exceptions.RequestException as e:
        print(f'Request failed with error: {str(e)}.')
        print(f'Waiting for 3 minutes before trying again...')
        time.sleep(180)
    
    return (resp, index)

### Sample test

In [8]:
question = df.loc[5, "question"]
answer = df.loc[5, "answer"]


In [9]:
question

'what does .view() do in pytorch?\nWhat does .view() do to a tensor x? What do negative values mean?\nx = x.view(-1, 16 * 5 * 5)\n\n'

In [10]:
answer

"view() reshapes the tensor without copying memory, similar to numpy's reshape().\ngiven a tensor a with 16 elements:\nimport torch\na = torch.range(1, 16)\n\nto reshape this tensor to make it a 4 x 4 tensor, use:\na = a.view(4, 4)\n\nnow a will be a 4 x 4 tensor. note that after the reshape the total number of elements need to remain the same. reshaping the tensor a to a 3 x 5 tensor would not be appropriate.\nwhat is the meaning of parameter -1?\nif there is any situation that you don't know how many rows you want but are sure of the number of columns, then you can specify this with a -1. (note that you can extend this to tensors with more dimensions. only one of the axis value can be -1). this is a way of telling the library: &quot;give me a tensor that has these many columns and you compute the appropriate number of rows that is necessary to make this happen&quot;.\nthis can be seen in this model definition code. after the line x = self.pool(f.relu(self.conv2(x))) in the forward fu

In [11]:
prompt = [{"role":"system","content":"you are an intelligent system who has the skills of an pytorch engineer. take the question and answer as input and generate an answer for the question, include code snippets if necessary."},
          {"role":"assistant","content":"sure, give me the input question and answer"},
          {"role":"user","content":f"question: {question}, answer:{answer}"}]

In [12]:
prompt

[{'role': 'system',
  'content': 'you are an intelligent system who has the skills of an pytorch engineer. take the question and answer as input and generate an answer for the question, include code snippets if necessary.'},
 {'role': 'assistant',
  'content': 'sure, give me the input question and answer'},
 {'role': 'user',
  'content': "question: what does .view() do in pytorch?\nWhat does .view() do to a tensor x? What do negative values mean?\nx = x.view(-1, 16 * 5 * 5)\n\n, answer:view() reshapes the tensor without copying memory, similar to numpy's reshape().\ngiven a tensor a with 16 elements:\nimport torch\na = torch.range(1, 16)\n\nto reshape this tensor to make it a 4 x 4 tensor, use:\na = a.view(4, 4)\n\nnow a will be a 4 x 4 tensor. note that after the reshape the total number of elements need to remain the same. reshaping the tensor a to a 3 x 5 tensor would not be appropriate.\nwhat is the meaning of parameter -1?\nif there is any situation that you don't know how many ro

In [13]:
resp, index = get_qa_openai(prompt=prompt, index=5)

In [14]:
resp

'The .view() method in PyTorch reshapes a tensor without copying its memory, similar to the reshape() method in numpy. It takes the new shape as an argument. If the total number of elements in the tensor is the same as in the new shape, then the tensor is successfully reshaped. Otherwise, an error will be raised.\n\nNegative values can be passed as an argument to the .view() method and it indicates that PyTorch should infer the missing dimension based on the size of the input tensor and the other provided dimension. If there is any situation that you don\'t know the number of rows you want but are sure of the number of columns, then you can specify this with a -1. This is a way of telling the library: "give me a tensor that has these many columns and you compute the appropriate number of rows that is necessary to make this happen".\n\nHere\'s an example where .view() is used to reshape a tensor:\n\n```\nimport torch\n\n# create a tensor with 16 elements\na = torch.range(1, 16)\n\n# res

### Running it for 50 samples

In [15]:
df = df[:50]

In [16]:
df.shape

(50, 3)

In [19]:
questions_ans = []

with concurrent.futures.ThreadPoolExecutor() as executor:
    
    futures = []
    for index,i in df.iterrows():

        question = i["question"]
        answer = i["answer"]
    
        prompt = [{"role":"system","content":"you are an intelligent system who has the skills of an pytorch engineer. take the question and answer as input and generate an answer for the question, include code snippets if necessary."},
                  {"role":"assistant","content":"sure, give me the input question and answer"},
                  {"role":"user","content":f"question: {question}, answer:{answer}"}]

        futures.append(executor.submit(get_qa_openai, prompt, index))

    for future, (_, row) in tqdm(zip(concurrent.futures.as_completed(futures), df.iterrows()), total=len(df)):
        try:
            qa, ind = future.result()
            questions_ans.append((ind,qa))
        except Exception as exc:
            print(f'generated an exception: {exc}')

 98%|█████████▊| 49/50 [01:03<00:02,  2.88s/it]

generated an exception: The server is overloaded or not ready yet.


100%|██████████| 50/50 [05:37<00:00,  6.75s/it]

generated an exception: Bad gateway. {"error":{"code":502,"message":"Bad gateway.","param":null,"type":"cf_bad_gateway"}} 502 {'error': {'code': 502, 'message': 'Bad gateway.', 'param': None, 'type': 'cf_bad_gateway'}} {'Date': 'Mon, 26 Jun 2023 09:27:00 GMT', 'Content-Type': 'application/json', 'Content-Length': '84', 'Connection': 'keep-alive', 'X-Frame-Options': 'SAMEORIGIN', 'Referrer-Policy': 'same-origin', 'Cache-Control': 'private, max-age=0, no-store, no-cache, must-revalidate, post-check=0, pre-check=0', 'Expires': 'Thu, 01 Jan 1970 00:00:01 GMT', 'Server': 'cloudflare', 'CF-RAY': '7dd461a98f352e05-BOM', 'alt-svc': 'h3=":443"; ma=86400'}





In [22]:
len(questions_ans)

48

In [25]:
questions_ans[0]

(9,
 'To install torchtext, you can follow the steps below:\n\n1. Clone the GitHub repository of torchtext.\n```\ngit clone https://github.com/pytorch/text.git\n```\n\n2. Go into the torchtext directory.\n```\ncd text\n```\n\n3. Install the package using setuptools.\n```\npython setup.py install\n```\n\nAlternatively, you can install torchtext using pip by running the following command:\n```\npip install torchtext\n```\n\nHowever, if you are still facing the same error after installing torchtext, you may need to check if your PyTorch installation is compatible with your torchtext version. In some cases, torchtext may not be compatible with the latest version of PyTorch. You can try downgrading your PyTorch version and reinstalling torchtext.')

In [26]:
for index, qa in questions_ans:
    df.at[index, 'short_answer'] = qa

In [49]:
df.short_answer.str.contains("engineer").sum()

0

In [50]:
df.to_csv("Sample-Summarization.csv", index=False)