# OpenAI, directy query

In [1]:
import openai
import os

In [2]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.getenv('OPENAI_API_KEY')

In [None]:
def get_completion(prompt, model="gpt-3.5-turbo"): # Andrew mentioned that the prompt/ completion paradigm is preferable for this class
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

In [23]:
text = f"""photographe   infographiste, chief scientist    , safety manager"""
prompt = f"""
Categorize each of the job title ```{text}``` deliminated by ',' into one of the following job functions. 
Make sure to use only the functions from the list, and use the exact name of the job functions from the following list, and reference the number. 
Put them in csv format, with column names as :   job title, functions, number
 2 finance              
 3 engineering          
 4 customer success     
 5 retail               
 6 operations           
 7 design               
 8 sales                
 9 marketing            
10 administration       
11 support              
12 product              
13 general management   
14 HR                   
15 commercial management
16 analytics            
17 recruiting           
18 legal 
"""

In [24]:
response = get_completion(prompt)
print(response)

job title, functions, number
photographe, design, 7
infographiste, design, 7
chief scientist, analytics, 16
safety manager, operations, 6


# LongChain

In [3]:
from langchain.llms import OpenAI

In [4]:
llm = OpenAI(temperature=0)

In [25]:
print(llm(prompt))


photographe, design, 7
infographiste, design, 7
chief scientist, analytics, 16
safety manager, administration, 10


## Use Chain

https://python.langchain.com/en/latest/getting_started/getting_started.html

In [11]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

llm = OpenAI(temperature=0)
prompt = PromptTemplate(
    input_variables=["titles"],
    template= """Categorize each of the job title {titles} deliminated by ',' into one of the following job functions. 
    Make sure to use only the functions from the list, and use the exact name of the job functions from the following list, and reference the number. 
    If you are not confident in the answer, please categorize as 'unknown'. For example, if the job title is 'manager', the answer should be: 
    'unknown'.
    Put them in csv format, with column names as :   job title, functions, number
    2 finance              
 3 engineering          
 4 customer success     
 5 retail               
 6 operations           
 7 design               
 8 sales                
 9 marketing            
10 administration       
11 support              
12 product              
13 general management   
14 HR                   
15 commercial management
16 analytics            
17 recruiting           
18 legal """,
)

In [12]:
from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=prompt)

In [13]:
chain.run("photographe   infographiste, chief scientist , safety manager, director")

Retrying langchain.llms.openai.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600).


'\n\nphotographe,design,7\ninfographiste,design,7\nchief scientist,unknown,unknown\nsafety manager,operations,6\ndirector,general management,13'

## Indexes
https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/csv.html

In [8]:
# load CSV
from langchain.document_loaders.csv_loader import CSVLoader
loader = CSVLoader(file_path='job_titles.csv', csv_args={
    'delimiter': ',',
    'quotechar': '"'
})

data = loader.load()

In [10]:
data

[Document(page_content='job_title: non executive director', metadata={'source': 'job_titles.csv', 'row': 0}),
 Document(page_content='job_title: chief scientist', metadata={'source': 'job_titles.csv', 'row': 1}),
 Document(page_content='job_title: test team lead', metadata={'source': 'job_titles.csv', 'row': 2}),
 Document(page_content='job_title: director of system test', metadata={'source': 'job_titles.csv', 'row': 3}),
 Document(page_content='job_title: safety manager', metadata={'source': 'job_titles.csv', 'row': 4}),
 Document(page_content='job_title: photographe   infographiste', metadata={'source': 'job_titles.csv', 'row': 5}),
 Document(page_content='job_title: program manager', metadata={'source': 'job_titles.csv', 'row': 6}),
 Document(page_content='job_title: manаger', metadata={'source': 'job_titles.csv', 'row': 7}),
 Document(page_content='job_title: supervisor operacional', metadata={'source': 'job_titles.csv', 'row': 8}),
 Document(page_content='job_title: senior deliver