In [2]:
import openai
import pandas as pd
import numpy as np
import json
import os
import re
import time
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.prompts.prompt import PromptTemplate #where we change the AI "personality"
from dotenv import load_dotenv, find_dotenv

In [3]:
load_dotenv()
API_KEY = os.environ.get("OPENAI_API_KEY")

In [4]:
_ = load_dotenv(find_dotenv()) # read local .env file

#openai.api_key = os.getenv("api.txt")
COMPLETIONS_MODEL = "text-davinci-002"
BETTER_COMPLETIONS_MODEL = "text-davinci-003" #for my purposes, this is better
LONG_MODEL = "gpt-3.5-turbo-16k"
REGULAR_MODEL = "gpt-3.5-turbo"
GPT_4 = "gpt-4"

chat = ChatOpenAI(temperature=0.0,
                  openai_api_key = API_KEY,
                  verbose=True,
                  model_name=REGULAR_MODEL) #depending on how big of a task

#below, we give the AI a "personality"
template = """The following is a conversation between a human data scientist and an AI who specializes in data categorization. The AI is direct and provides concise responses. If the AI does not know the answer to a question, it truthfully says it does not know.

Current conversation:
{history}
Data Scientist: {input}
AI:"""

In [5]:
os.chdir('/Users/chrissoria/Documents/Research/openai_categorizations')
current_directory = os.getcwd()
print(current_directory)

/Users/chrissoria/Documents/Research/openai_categorizations


In [6]:
survey_participant_input = "a19i" #enter column name here

UCNets = pd.read_excel("/Users/chrissoria/Documents/Research/UCNets_Classification/data/Raw_Cond_for_Coding_all_waves.xlsx", engine='openpyxl',sheet_name="JOINT_DATA",usecols=[survey_participant_input])
UCNets = UCNets[survey_participant_input].dropna().unique()  # Drop NaN values and get unique elements

survey_participant_responses = '; '.join(str(item) for item in UCNets) #what we will feed to the model

UCNets = pd.DataFrame(UCNets, columns=[survey_participant_input])
UCNets[survey_participant_input] = UCNets[survey_participant_input].astype(str).str.lower()
UCNets[survey_participant_input] = UCNets[survey_participant_input].str.strip()
UCNets = UCNets[UCNets[survey_participant_input] != ''].reset_index(drop=True) #trimming all empty rows

UCNets = UCNets.iloc[:200]

UCNets.head()

Unnamed: 0,a19i
0,relocated back to east coast - closer to my sons
1,move in together with my partner
2,"out of living with my friends, and into living..."
3,to take a new job in new york city (both becau...
4,wanted to live in my own place outside my pare...


In [7]:
survey_input = UCNets['a19i'][1] 

category = """1. Associated with starting to live with or continuing to live with a partner or spouse \
for the sake of maintaining a relationship are encompassed in this category. \
It includes situations where individuals choose to move in with their partner or spouse or \
decide to continue cohabitation with them. \
2. Related to the end of a romantic relationship, including divorce, \
breakup, or other similar circumstances that result in the termination of the relationship. \
3. Associated with the person's job, school, or career. It covers situations such as job transfers, \
retirement, starting a new job, or wanting to be closer to the workplace for convenience or commuting purposes. \
4. Associated with the partner's job, school, or career. It covers situations such as partner job transfers, \
retirement, starting a new job, or wanting to be closer to the workplace for convenience or commuting purposes. \
5. Related to financial factors influencing housing decisions. It includes situations such as experiencing \
an increase in rent, being unable to afford the current housing expenses, finding a more affordable option, \
and receiving a pay raise that allows for a change in housing arrangements. \
6. Reasons related to specific housing features and preferences. It includes motivations such \
as the desire to purchase a house, downsizing to a smaller place, acquiring a larger residence, \
seeking a better-quality house, and the preference for having a yard."""

example_JSON = """{ \
"1": "0", \
"2": "1", \
"3": "0", \
"4": 1, \
"5": "0", \
"6": "0"
}"""

template_string = """A survey respondent was asked, "Why did you move?" \
They responded with: "{OBJECT}" \
First, please determine how many of the following reasons for moving are in the response: \
{CATEGORY} \
Next, provide your answer as a 1 if yes and a 0 if no in JSON format \
Here's an example of how you should format your response: \
{EXAMPLE}"""

prompt_template = ChatPromptTemplate.from_template(template_string)
prompt_template.messages[0].prompt #this will show us our prompt template

GPT_Responses = prompt_template.format_messages(
                    OBJECT=survey_input,
                    CATEGORY=category,
                    EXAMPLE=example_JSON)

print(GPT_Responses[0])

content='A survey respondent was asked, "Why did you move?" They responded with: "move in together with my partner" First, please determine how many of the following reasons for moving are in the response: 1. Associated with starting to live with or continuing to live with a partner or spouse for the sake of maintaining a relationship are encompassed in this category. It includes situations where individuals choose to move in with their partner or spouse or decide to continue cohabitation with them. 2. Related to the end of a romantic relationship, including divorce, breakup, or other similar circumstances that result in the termination of the relationship. 3. Associated with the person\'s job, school, or career. It covers situations such as job transfers, retirement, starting a new job, or wanting to be closer to the workplace for convenience or commuting purposes. 4. Associated with the partner\'s job, school, or career. It covers situations such as partner job transfers, retirement,

In [8]:
TEST = chat(GPT_Responses)
print(TEST.content)

{ "1": "1", "2": "0", "3": "0", "4": "0", "5": "0", "6": "0" }


In [9]:
for i in range(len(UCNets)):
    survey_input = UCNets['a19i'][i]
    response = prompt_template.format_messages(
                    OBJECT=survey_input,
                    CATEGORY=category,
                    EXAMPLE=example_JSON)

    response = chat(response)
    
    # Assuming a successful attempt means a non-empty response
    if response.content:
        print(f"Successful attempt for row number: {i}")
    
    UCNets.at[i, 'JSON'] = response.content

Successful attempt for row number: 0
Successful attempt for row number: 1
Successful attempt for row number: 2
Successful attempt for row number: 3
Successful attempt for row number: 4
Successful attempt for row number: 5
Successful attempt for row number: 6
Successful attempt for row number: 7
Successful attempt for row number: 8
Successful attempt for row number: 9
Successful attempt for row number: 10
Successful attempt for row number: 11
Successful attempt for row number: 12
Successful attempt for row number: 13
Successful attempt for row number: 14
Successful attempt for row number: 15
Successful attempt for row number: 16
Successful attempt for row number: 17
Successful attempt for row number: 18
Successful attempt for row number: 19
Successful attempt for row number: 20
Successful attempt for row number: 21
Successful attempt for row number: 22
Successful attempt for row number: 23
Successful attempt for row number: 24
Successful attempt for row number: 25
Successful attempt for

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600).


Successful attempt for row number: 56
Successful attempt for row number: 57
Successful attempt for row number: 58
Successful attempt for row number: 59
Successful attempt for row number: 60
Successful attempt for row number: 61
Successful attempt for row number: 62
Successful attempt for row number: 63
Successful attempt for row number: 64
Successful attempt for row number: 65
Successful attempt for row number: 66
Successful attempt for row number: 67
Successful attempt for row number: 68
Successful attempt for row number: 69
Successful attempt for row number: 70
Successful attempt for row number: 71
Successful attempt for row number: 72
Successful attempt for row number: 73
Successful attempt for row number: 74
Successful attempt for row number: 75
Successful attempt for row number: 76
Successful attempt for row number: 77
Successful attempt for row number: 78
Successful attempt for row number: 79
Successful attempt for row number: 80
Successful attempt for row number: 81
Successful a

In [10]:
normalized_data_list = []
error_lines = []

for i, json_str in enumerate(UCNets['JSON']):
    try:
        parsed_obj = json.loads(json_str)
        normalized_data_list.append(pd.json_normalize(parsed_obj))
    except json.JSONDecodeError:
        error_lines.append(i)
        continue

# Concatenate the normalized data into one DataFrame
normalized_data = pd.concat(normalized_data_list, ignore_index=True)

error_lines

[]

In [11]:
UCNets = pd.concat([UCNets, normalized_data], axis=1)
UCNets

Unnamed: 0,a19i,JSON,1,2,3,4,5,6
0,relocated back to east coast - closer to my sons,"{ ""1"": ""0"", ""2"": ""0"", ""3"": ""0"", ""4"": ""0"", ""5"":...",0,0,0,0,0,0
1,move in together with my partner,"{ ""1"": ""1"", ""2"": ""0"", ""3"": ""0"", ""4"": ""0"", ""5"":...",1,0,0,0,0,0
2,"out of living with my friends, and into living...","{ ""1"": ""1"", ""2"": ""0"", ""3"": ""0"", ""4"": ""0"", ""5"":...",1,0,0,0,0,0
3,to take a new job in new york city (both becau...,"{ ""1"": ""0"", ""2"": ""0"", ""3"": ""1"", ""4"": ""0"", ""5"":...",0,0,1,0,0,0
4,wanted to live in my own place outside my pare...,"{ ""1"": ""0"", ""2"": ""0"", ""3"": ""0"", ""4"": ""0"", ""5"":...",0,0,0,0,0,0
...,...,...,...,...,...,...,...,...
195,opportunity came up,"{ ""1"": ""0"", ""2"": ""0"", ""3"": ""1"", ""4"": ""0"", ""5"":...",0,0,1,0,0,0
196,we preferred the environment/climate in anothe...,"{ ""1"": ""0"", ""2"": ""0"", ""3"": ""0"", ""4"": ""0"", ""5"":...",0,0,0,0,0,1
197,we wanted to live in a location we'd like more.,"{ ""1"": ""0"", ""2"": ""0"", ""3"": ""0"", ""4"": ""0"", ""5"":...",0,0,0,0,0,1
198,downsizing/retiring,"{ ""1"": ""0"", ""2"": ""0"", ""3"": ""0"", ""4"": ""0"", ""5"":...",0,0,0,0,0,1


In [12]:
UCNets.to_csv('data/a19i_all_categories_descriptive.csv',index=False)