In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from string import Template

import os
import openai

from data_apps_aws.password_manager import get_api_token
from data_apps_aws.utils import make_outside_legend

In [None]:
plt.rcParams['figure.figsize'] = 14, 6
sns.set()

In [None]:
openai.api_key = get_api_token('openai')

In [None]:
# !pip install openai

In [None]:
def gpt_request(prompt_text, stop_seq=None, temperature=0.7, top_p=1, frequency_penalty=0, presence_penalty=0, engine='davinci', response_length=64):

    if stop_seq is None:
        response_obj = openai.Completion.create(
            prompt=prompt_text,
            engine=engine,
            max_tokens=response_length,
            temperature=temperature,
            top_p=top_p,
            frequency_penalty=frequency_penalty,
            presence_penalty=presence_penalty,
        )
    else:
        response_obj = openai.Completion.create(
            prompt=prompt_text,
            engine=engine,
            max_tokens=response_length,
            temperature=temperature,
            top_p=top_p,
            frequency_penalty=frequency_penalty,
            presence_penalty=presence_penalty,
            stop=stop_seq,
        )

    response_text = response_obj.choices[0]['text']

    return response_text

In [None]:
company_info_prompt_template = Template("""Company info for Amazon: It focuses on e-commerce, cloud computing, digital streaming, and artificial intelligence.
Company info for Apple: It makes software, mobile apps, personal computers, and home appliances.
Company info for Microsoft: It makes computer software, consumer electronics, personal computers, and online services.
Company info for Tesla: It makes electric cars, solar panels, and battery products.
Company info for $company_name:""")

In [None]:
q_and_a_prompt_template = Template("""I am a highly intelligent question answering bot. If you ask me a question that is rooted in truth, I will give you the answer. If you ask me a question that is nonsense, trickery, or has no clear answer, I will respond with "Unknown".

Q: What is human life expectancy in the United States?
A: Human life expectancy in the United States is 78 years.

Q: Who was president of the United States in 1955?
A: Dwight D. Eisenhower was president of the United States in 1955.

Q: Which party did he belong to?
A: He belonged to the Republican Party.

Q: What is the square root of banana?
A: Unknown

Q: How does a telescope work?
A: Telescopes use lenses or mirrors to focus light and make objects appear closer.

Q: Where were the 1992 Olympics held?
A: The 1992 Olympics were held in Barcelona, Spain.

Q: How many squigs are in a bonk?
A: Unknown

Q: $question_text
A:""")

In [None]:
def get_company_info(this_company):

    company_info_prompt = company_info_prompt_template.substitute(company_name=this_company)
    answer = gpt_request(company_info_prompt, temperature=0, top_p=0.19, stop_seq=['Company info for'])
    
    return answer

In [None]:
def get_question_answer(question_text):
    
    q_and_a_prompt = q_and_a_prompt_template.substitute(question_text=question_text)
    answer = gpt_request(q_and_a_prompt, temperature=0, top_p=1, response_length=100, stop_seq=['\n'])
    
    return answer

In [None]:
get_question_answer('What is the capital of Uganda?')

In [None]:
get_question_answer('What is the most successful movie ever?')

In [None]:
get_question_answer('What is the highest mountain on the world?')

In [None]:
get_question_answer('What is a volcano?')

In [None]:
get_question_answer('When was Rome founded?')

In [None]:
get_question_answer('What is Bitcoin?')

In [None]:
get_question_answer('Explain how Bitcoin works?')

In [None]:
get_question_answer('What is a hangover?')

In [None]:
get_question_answer('What is the definition of the word beautiful?')

In [None]:
get_question_answer('How many people did during World War II?')

In [None]:
get_question_answer('Who is the best football player ever?')

In [None]:
get_question_answer('Who is the best soccer player ever?')

In [None]:
get_question_answer('Can you find me a recipe for New York Cheesecake?')

In [None]:
get_question_answer('What is the recipe of New York Cheesecake?')

In [None]:
get_question_answer('On which side do cars drive in Thailand?')

In [None]:
company_list = ['SAP', 'Scalable Capital', 'Kaufhof', 'Altana', 'Schering', 'Vitesco', 'Deutsche Bahn', 'HelloFresh', 'Porsche']

In [None]:
for this_company in company_list[0:1]:
    
    answer = get_company_info(this_company)
    print(f'{this_company}: {answer}')

### FAQ Classifications

In [None]:
faqs_raw = pd.read_csv('assets/scalable_capital_FAQs_ENG.csv')

In [None]:
faqs_raw

In [None]:
faqs_raw['metadata'] = faqs_raw['Category'] + ' ' + faqs_raw['Topic']
faqs_raw['text'] = faqs_raw['FAQ'] + ' ' + faqs_raw['Answer']

Write to disk

In [None]:
df_to_json = faqs_raw.loc[:, ['text', 'metadata']]
data = df_to_json.to_json(orient='records', lines=True)
data = data[:-1] # remove \n at end of string

In [None]:
with open('assets/scalable_capital_FAQs_ENG.jsonl', 'w') as f:
    print(data, file=f)

In [None]:
import jsonlines

def check_jsonl_file(fname):
    
    with jsonlines.open(fname) as reader:
        for obj in reader:
            obj['metadata']

In [None]:
def search_response_to_df(response):
    
    all_entry_list = []
    
    for this_entry in response.data:
        
        all_entry_list.append(pd.DataFrame.from_dict({0: dict(this_entry)}, orient='index'))
        
    out_df = pd.concat(all_entry_list, axis=0).loc[:, ['score', 'metadata', 'text']].sort_values('score', ascending=False).reset_index(drop=True)
        
    return out_df

In [None]:
check_jsonl_file('assets/scalable_capital_FAQs_ENG.jsonl')

In [None]:
file_upload_response = openai.File.create(file=open("assets/scalable_capital_FAQs_ENG_long.jsonl"), purpose="search")
file_id = file_upload_response.id

In [None]:
this_query = 'How can I set up 2FA?'
this_query = 'Can I transfer assets to Scalable?'
this_query = 'I did not yet receive my premium?'
this_query = 'How can I set up 2FA?'
this_query = 'Do I need to use multi-factor authentication'
this_query = 'What is a ETF'
this_query = 'how much does it cost'
this_query = 'how can I see the status of my tax allowance'
this_query = 'I want to open an account for my children?'

In [None]:
pd.set_option('display.max_colwidth', 4000)

In [None]:
%%time
response = openai.Engine("ada").search(
    search_model="ada", 
    query=this_query, 
    max_rerank=10,
    file=file_id,
    return_metadata=True
)

out_df = search_response_to_df(response)
out_df