# OpenAI, directy query

In [None]:
import openai
import os

In [None]:
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

openai.api_key  = os.getenv('OPENAI_API_KEY')

In [None]:
def get_completion(prompt, model="gpt-3.5-turbo"): # Andrew mentioned that the prompt/ completion paradigm is preferable for this class
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

In [23]:
text = f"""photographe   infographiste, chief scientist    , safety manager"""
prompt = f"""
Categorize each of the job title ```{text}``` deliminated by ',' into one of the following job functions. 
Make sure to use only the functions from the list, and use the exact name of the job functions from the following list, and reference the number. 
Put them in csv format, with column names as :   job title, functions, number
 2 finance              
 3 engineering          
 4 customer success     
 5 retail               
 6 operations           
 7 design               
 8 sales                
 9 marketing            
10 administration       
11 support              
12 product              
13 general management   
14 HR                   
15 commercial management
16 analytics            
17 recruiting           
18 legal 
"""

In [24]:
response = get_completion(prompt)
print(response)

job title, functions, number
photographe, design, 7
infographiste, design, 7
chief scientist, analytics, 16
safety manager, operations, 6


# LongChain

In [13]:
from langchain.llms import OpenAI

In [14]:
llm = OpenAI(temperature=0.9)

In [25]:
print(llm(prompt))


photographe, design, 7
infographiste, design, 7
chief scientist, analytics, 16
safety manager, administration, 10


## Use Chain

https://python.langchain.com/en/latest/getting_started/getting_started.html

In [6]:
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

llm = OpenAI(temperature=0.9)
prompt = PromptTemplate(
    input_variables=["titles"],
    template= """Categorize each of the job title {titles} deliminated by ',' into one of the following job functions. 
    Make sure to use only the functions from the list, and use the exact name of the job functions from the following list, and reference the number. 
    Put them in csv format, with column names as :   job title, functions, number
    2 finance              
 3 engineering          
 4 customer success     
 5 retail               
 6 operations           
 7 design               
 8 sales                
 9 marketing            
10 administration       
11 support              
12 product              
13 general management   
14 HR                   
15 commercial management
16 analytics            
17 recruiting           
18 legal """,
)

In [8]:
from langchain.chains import LLMChain
chain = LLMChain(llm=llm, prompt=prompt)

In [9]:
chain.run("photographe   infographiste, chief scientist    , safety manager")

'\n\nphotographe, design, 7 \ninfographiste, design, 7 \nchief scientist, engineering, 3 \nsafety manager, operations, 6'

## Indexes
https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/csv.html
https://js.langchain.com/docs/modules/indexes/document_loaders/examples/file_loaders/csv

In [None]:
# load CSV
from langchain.document_loaders.csv_loader import CSVLoader
loader = CSVLoader(file_path='./example_data/mlb_teams_2012.csv', csv_args={
    'delimiter': ',',
    'quotechar': '"',
    'fieldnames': ['MLB Team', 'Payroll in millions', 'Wins']
})

data = loader.load()