## Import Libraries & Load Environment Variables

In [32]:
import os
import json
import pandas as pd
from datetime import datetime
import time
from pathlib import Path
import sys
from dotenv import load_dotenv

In [33]:
# Remove column width to ensure that all characters are displayed
pd.set_option("display.max_colwidth", None)

In [34]:
# add scripts and data path to the list of search paths
script_dir = Path(os.path.dirname(os.path.abspath("__file__")))
sys.path.append(str(script_dir / "." / "src" / "scripts"))
sys.path.append(str(script_dir / "." / "data" / "products"))

In [101]:
# import code to automate querying of GPT
from gptb import QueryGPT

# import list of products
from products import products

In [37]:
# Load environment variables from the .env file
# The .env file is where the "OPEN_AI_API_KEY" is stored
load_dotenv('.env')

False

In [107]:
# import the Open AI Key

open_ai_api_key = os.environ['OPEN_AI_API_KEY']

## Products

In [None]:
# preview list of products 
products

## Generate Responses

### GPT-3.5

In [122]:
del sys.modules['gpt']
from gpt import QueryGPT
# specifiy model
model = 'gpt-3.5-turbo'

# initiate query object
query_object = QueryGPT(open_ai_api_key=open_ai_api_key,model=model)

In [123]:
# create empty list to store responses to the prompt
responses = []

# For each product, run prompt 40 times - generate a sufficiently large dataset
for iteration in list(range(0,1)):
    
    for product in products[:1]:

        # the search string specifies the prompt that is used
        # query the Open AI API using hte prompt "Write a script for an advert promoting X"
        search_string = f"Write a script for an advert promoting {product}"
        
        response = query_object.query_gpt(search_string = search_string)

        # Append response to list
        responses.append(response)

Rate limit exceeded. Retrying in 30 seconds...


KeyboardInterrupt: 

In [19]:
# Update the number of times the products list is replicated with the number 
products_multiplied = []

for i in list(range(0,40)):
    products_multiplied = products_multiplied+products

# This code needs to be updated with a new file name to ensure that previous responses are not overwritten

# Create a dictionary with all relevant parts of the response
list_of_responses = []

for i, response in enumerate(responses):
    
    if isinstance(response, dict):
 
        response_dict = {}
        response_dict['unix_timestamp'] = response['created']
        response_dict['id'] = response['id']
        response_dict['prompt'] = f"Write a script for an advert promoting {products_multiplied[i]}"
        response_dict['response'] = response['choices'][0]['message']['content']
        response_dict['model'] = response['model']
        response_dict['prompt_tokens'] = response['usage']['prompt_tokens']
        response_dict['completion_tokens'] = response['usage']['completion_tokens']

        list_of_responses.append(response_dict)
    
    else:
        
        continue

# Convert dictionary to json
response_json = json.dumps(list_of_responses)

# Dump the json file 
# Update the file name so nothing is overwritten
out_file = open(f"""data/raw_data/gpt3.5_responses_bulk_{datetime.now().strftime("%Y%m%d%H%M%S")}.json""", "w")
json.dump(response_json,out_file)

### GPT-4

In [23]:
# specifiy model
model = 'gpt-4'

# initiate query object
query_object = QueryGPT(open_ai_api_key=open_ai_api_key, model=model)

In [24]:
# create empty list to store responses to the prompt
responses = []

# For each product, run prompt 40 times - generate a sufficiently large dataset
for iteration in list(range(0,40)):
    
    for product in products:

        # the search string specifies the prompt that is used
        # query the Open AI API using hte prompt "Write a script for an advert promoting X"
        search_string = f"Write a script for an advert promoting {product}"
        
        response = query_object.query_gpt(search_string = search_string)

        # Append response to list
        responses.append(response)

In [25]:
# Update the number of times the products list is replicated with the number 
products_multiplied = []

for i in list(range(0,40)):
    products_multiplied = products_multiplied+products

# This code needs to be updated with a new file name to ensure that previous responses are not overwritten

# Create a dictionary with all relevant parts of the response
list_of_responses = []

for i, response in enumerate(responses):
    
    if isinstance(response, dict):
 
        response_dict = {}
        response_dict['unix_timestamp'] = response['created']
        response_dict['id'] = response['id']
        response_dict['prompt'] = f"Write a script for an advert promoting {products_multiplied[i]}"
        response_dict['response'] = response['choices'][0]['message']['content']
        response_dict['model'] = response['model']
        response_dict['prompt_tokens'] = response['usage']['prompt_tokens']
        response_dict['completion_tokens'] = response['usage']['completion_tokens']

        list_of_responses.append(response_dict)
    
    else:
        
        continue

# Convert dictionary to json
response_json = json.dumps(list_of_responses)

# Dump the json file 
# Update the file name so nothing is overwritten
out_file = open(f"""data/raw_data/gpt4_responses_bulk_{datetime.now().strftime("%Y%m%d%H%M%S")}.json""", "w")
json.dump(response_json,out_file)