In [1]:
import openai, configparser, json, re, psycopg2
from transformers import GPT2TokenizerFast
from matplotlib import pyplot as plt
import seaborn as sns

import pandas as pd
from tqdm.notebook import tqdm
from io import StringIO

#Get the API-key
config_obj = configparser.ConfigParser()
config_obj.read("config.ini")

keys = config_obj["OPENAI"]
openai.api_key = keys["API_KEY"]

pd.set_option('display.max_colwidth', None)

None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [2]:
database_logins = config_obj["POSTGRELOGINS"]

conn = psycopg2.connect(
    database="beers",
    user=database_logins['USERNAME'],
    password=database_logins['PASSWORD'],
    host=database_logins['HOST'],
    port=database_logins['PORT']
)

conn.autocommit = True

# GPT-3 Model
GPT-3 models can understand and generate natural language. 
There are four main models with different levels of power suitable for different tasks. Davinci is the most capable model, and Ada is the fastest. 

### Let's first create a completions function we can use to query the model:

In [3]:
def completions(prompt):
    '''
    This function will create an OpenAI completion engine, given an imput promt.
    '''

    # create a completion
    completion = openai.Completion.create(engine="text-davinci-002", #The model we wish to use
                                          prompt=prompt, # Our input (task) for the model
                                          temperature=0, # When temperature is above 0, results will be diffrent each time
                                          max_tokens=256, # Maximum 2048
                                          top_p=1, # nucleus sampling, 0.1 means only the tokens comprising the top 10% probability mass are considered
                                          frequency_penalty=0,
                                          presence_penalty=0
                                         )

    # print the completion
    return completion.choices[0].text

## Example 1: Generate a spreadsheet with beer names
Let's ask GPT-3 to create us a spreadsheet with 10 beers, their alcohol percentage, brewer and color.

In [4]:
response = completions("Create a 4 column spreadsheet with cars, their brand, horsepower, and price in euros. "\
                    "With 10 rows.")

In [6]:
# Let's load these beers into a pandas DataFrame
csvStringIO = StringIO(response)
cars = pd.read_csv(csvStringIO, sep="|")

# Remove spaces from column names
cars.columns = [i.strip() for i in cars.columns.to_list()]

# Remove the numbers from the beer names
cars.iloc[:,0] = cars.iloc[:,0].apply(lambda x: re.sub(r'\d.', '', x))

cars

Unnamed: 0,Cars,Brand,Horsepower,Price in Euros
0,Ford Mustang,Ford,300,"€25,000"
1,Chevrolet Camaro,Chevrolet,400,"€30,000"
2,Dodge Challenger,Dodge,375,"€28,000"
3,Audi R,Audi,430,"€120,000"
4,Porsche,Porsche,385,"€90,000"
5,Mercedes-Benz AMG GT,Mercedes-Benz,456,"€140,000"
6,Lamborghini Huracan,Lamborghini,610,"€180,000"
7,McLaren,McLaren,710,"€280,000"
8,Ferrari GTB,Ferrari,670,"€250,000"
9,. Bugatti Chiron,Bugatti,1500,"€2,700,000"


## Example 2: Create product descriptions

### Beer Dataset
- First we will generate a static description based on the attributes from the beer.
- We will then use the description to generate a unique tagline by the AI.

In [None]:
def car_type(p):
    if p < 50000:
        return 'family'
    if p >= 50000 and p < 100000:
        return 'premium'
    if p >= 1000000 :
        return 'luxery'
    
cars['Segment'] = cars['Price in Euros'].str.replace(',','').apply(lambda x: beer_percentages(int(x.strip())))

In [10]:
descriptions = []
for index, row in cars.iterrows():
    description = f"The car name is {row['Cars']}. The brand is {row['Brand']}. The car has {row['Horsepower']} horsepower." \
    f"It costs {row['Price in Euros']}."
    descriptions.append(description)
    
cars['description'] = descriptions

In [None]:
cars

In [None]:
beers[['description']].head(5)

In [None]:
responses = []

for description in beers['description']:
    input_text = f'Write a product description for following the drink: {description}'
    #print(input_text)
    result = completions(input_text)
    #print(result)
    try:
        responses.append(result)
    except Exception as e:
        print(e)
        print(result)
        
beers['gpt_description'] = responses

In [None]:
beers[['gpt_description']].head(5)

## Example 3: Question Answering
Answers (/answers) is a dedicated question-answering endpoint useful for applications that require high accuracy text generations based on sources of truth like company documentation and knowledge bases. The additional context can be provided either as a list of up to 200 documents or as a pre-uploaded file to go beyond that limit.

In [None]:
beer_stock_dataset = pd.read_csv('datasets/beer_dataset.csv')

In [None]:
beer_stock_dataset

In [None]:
with open('datasets/beer_stock.jsonl', 'w+') as f:
    for index, row in beer_stock_dataset.iterrows():
        description = f"The beer name is {row['Name']}. The brewery name is {row['Brewery']}. The color is {row['Type']}. There are {row['Stock']} items in stock. It's a {row['Strong']} beer."
        f.write(json.dumps({"text": description}) + '\n')

In [None]:
openai.File.create(file=open("datasets/beer_stock.jsonl"), purpose='answers')

In [None]:
result = openai.Answer.create(
    search_model="ada", 
    model="curie", 
    question="What's the color of the Aardbeien Lambic St. Louis from Van Honsebrouck ?", 
    file="file-jVGzSbVxvaiCeO2J2JH8bSoK", 
    examples_context="The beer name is Abdijbier Sint-Idesbald trippel. The brewery name is Damberd. "\
    "The color is brown. There are 91 items in stock. It's a strong beer.", 
    examples=[["Give me the color from Abdijbier Sint-Idesbald trippel from Damberd", \
               "The color of hte beer is brown."]], 
    max_rerank=1,
    max_tokens=10,
    stop=["\n", "<|endoftext|>"]
)

result.answers[0]

# Codex Model 
The Codex models are descendants of the GPT-3 models that can understand and generate code. Their training data contains both natural language and billions of lines of public code from GitHub.

They’re most capable in Python and proficient in over a dozen languages including JavaScript, Go, Perl, PHP, Ruby, Swift, TypeScript, SQL, and even Shell.
## Example 1: Translate natural language to SQL Queries


In [None]:
def codex_model(question):
    '''
    Here we use the Python API to call de Codex model. 
    The code-davinci-002 is the most capable engine for generating code
    '''
    response = openai.Completion.create(
        engine="code-davinci-002",
        prompt=question,
        temperature=0,
        max_tokens=150,
        top_p=1.0,
        frequency_penalty=0.0,
        presence_penalty=0.0,
        stop=["#", ";"])
    
    return response.choices[0].text.strip()
    

In [None]:
# Ask you question
question = 'Show me how many beers from the type brown are sold per brewery. With more then 5 beers'

In [None]:
cursor = conn.cursor()
# We define the table structure so the codex model knows how to write the query
prompt_input = f'''
### PostgreSQL tables, with their properties:
#
# inventory(Name, Brewery, Type, Alcohol, Stock, Strong, Country)
#
### {question}
SELECT
'''

query = 'SELECT ' + codex_model(prompt_input)
print(query, '\n')
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall())
df.columns = [desc[0] for desc in cursor.description]
print([desc[0] for desc in cursor.description])
df

In [None]:
cursor.close()

## Example 2: Generate Python code based on natural language
The result from the above query is stored in a dataframe named df, let's ask our model to create code to sort the dataframe alphabetically.

In [None]:
question = 'Create a function that generates a seaborn bar plot for a given dataframe.' \
'The first column should be on the y axis and the second column on the x axis.#'

In [None]:
generated_code = codex_model(question)
print(generated_code)
exec(generated_code)

## Example 3: Write a Python docstring for a function
We specify the Python version, paste in the code, and then ask within a comment for a docstring, and give a characteristic beginning of a docstring (""").

In [None]:
question = '# Create a high-quality docstring for the above function#'
print(codex_model(question))