In [3]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.prompts import FewShotChatMessagePromptTemplate
from langchain_openai import ChatOpenAI
import os

from sklearn.metrics import mean_squared_error 

import pandas as pd
import numpy as np
import re
import neptune

# Example

In [105]:
prompt = ChatPromptTemplate.from_template("tell me a short joke about {topic}")
model = ChatOpenAI(model="gpt-3.5-turbo-1106", openai_api_key="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")
output_parser = StrOutputParser()

chain = prompt | model | output_parser

chain.invoke({"topic": "bread"})

'Why did the slice of bread go to the doctor? Because it was feeling crumby!'

# Parameters

In [12]:
post_type = 'post_travel'
narcism_type = 'adm'
model_used = "gpt-3.5-turbo-1106"
iterations = 10
number_of_shots = 5 # somewhere between 3 and 10
model_role = "You are a psychologist and you are assessing a patient's Narcissism. The patient is talking about their recent travel. Return only float number between 1 and 6."
train_path = "../data/split/train_data.csv"
validate_path = "../data/split/validate_data.csv"

# Code

Here we used the most basic implementation, there is also option to use Dynamic few-shot prompting, but to my knowledge is not needed is this context as we have only one type of posts.

In [124]:
# Get split data using pandas
path = "../data/split/train_data.csv"
df = pd.read_csv(path)

# Get the dictionary of the first x posts
example = df[[post_type,narcism_type]].iloc[0:number_of_posts]

example = example.to_dict(orient='records')

# Change the value name
for i in range(len(example)):
    example[i]['post'] = example[i].pop(post_type)
    example[i]['narcissism'] = example[i].pop(narcism_type)

example

[{'post': 'I wish I could travel 24/7 and get paid for it',
  'narcissism': 1.444},
 {'post': "Vacations are pricey these days but so worth it! I had the most amazing weekend at ABC resort. Everything about this place screams relaxation and luxury. I'm definitely going back next year. Would you like to come with me?",
  'narcissism': 3.889},
 {'post': 'I recently visited beautiful Stratford upon Avon as a pit-stop on my way to Minehead, Somerset. I made a point to leave my immediate surroundings and find the birthplace of Shakespeare. I found it interesting but ultimately over-commercialised.',
  'narcissism': 3.444}]

In [10]:
print(os.environ.get('OPENAI_API_KEY'))

None


In [11]:
# TODO - Load the API key from the environment
model = ChatOpenAI(model=model_used, openai_api_key="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")


In [139]:
# This is a prompt template used to format each example.
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{post}"),
        ("ai", "result: {narcissism}"),
    ]
)
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=example,
)

# print(few_shot_prompt.format())

#### Use train and validate dataset!!!

In [140]:
test = df[[post_type,narcism_type]].iloc[4]
input = test.iloc[0]
input

"I travel a lot for work, and I get to see all sorts of cool places. I didn't think the American south had as many hidden gems as it did, I think it's a beautiful region with some crazy cool people."

In [171]:
final_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "Return a narcissism number between 1 and 6."),
        few_shot_prompt,
        ("human", "{input}"),
    ]
)

final_prompt

ChatPromptTemplate(input_variables=['input'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template="You are a psychologist and you are assessing a patient's Narcissism. The patient is talking about their recent travel. Return a number between 1 and 6.")), FewShotChatMessagePromptTemplate(examples=[{'post': 'Looking forward to relaxing and new experiences on my travels', 'narcissism': 3.222}, {'post': 'Had the best time in Tenerife!', 'narcissism': 1.778}, {'post': 'Visiting Canada was amazing! So many wonderful landscapes and fabulous things to do. Grateful for the opportunity to share this with my family.', 'narcissism': 2.333}], example_prompt=ChatPromptTemplate(input_variables=['narcissism', 'post'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['post'], template='{post}')), AIMessagePromptTemplate(prompt=PromptTemplate(input_variables=['narcissism'], template='narcissism: {narcissism}'))])), HumanMessagePromptTemplat

In [143]:
chain = final_prompt | model

ai_message = chain.invoke({"input": input})

ai_message


AIMessage(content='result: 2.778', response_metadata={'finish_reason': 'stop', 'logprobs': None})

In [150]:
r = get_float(ai_message.content)
r

2.778

# Analyze the results

In [133]:
response = ai_message.content

response

'3.111'

In [132]:
test.iloc[1]

1.222

In [99]:
y_pred = []
y_true = []
y_pred.append(response)
y_true.append(test.iloc[1])

In [100]:
mse = mean_squared_error(y_true=y_true, y_pred=y_pred)
mse

4.0

# Implementation

In [17]:
# functions

# get random x posts
def get_random_x_posts(path, post_type, narcism_type, number_of_posts):

    df = pd.read_csv(path)
    example = df[[post_type,narcism_type]].sample(number_of_posts)
    example = example.to_dict(orient='records')

    # Change the value name
    for i in range(len(example)):
        example[i]['post'] = example[i].pop(post_type)
        example[i]['narcissism'] = example[i].pop(narcism_type)

    return example

# create a few shot prompt
def create_few_shot_prompt(example):
    example_prompt = ChatPromptTemplate.from_messages(
        [
            ("human", "{post}"),
            ("ai", "narcissism: {narcissism}"),
        ]
    )
    few_shot_prompt = FewShotChatMessagePromptTemplate(
        example_prompt=example_prompt,
        examples=example,
    )
    return few_shot_prompt

# create a final prompt
def create_final_prompt(few_shot_prompt,model_role):
    final_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", model_role),
            few_shot_prompt,
            ("human", "{input}"),
        ]
    )
    return final_prompt

# get float number from a string
def get_float(text):
    # Use regular expression to find numerical value
    match = re.search(r'\d+\.\d+', text)
    if match:
        float_number = float(match.group())
        return float_number
    else:
        return None

# get the response
def get_response(final_prompt, model, input):
    chain = final_prompt | model
    ai_message = chain.invoke({"input": input})
    response = ai_message.content
    print(response)
    # get a float number from a string
    response = get_float(response)
    return response

# get the mean squared error
def get_mse(y_pred, y_true):
    mse = mean_squared_error(y_true=y_true, y_pred=y_pred)
    return mse

Add Neptune experiment observation

In [None]:
# Run the functions
# TODO: Set API token in the environment variable
run = neptune.init_run(project = "NarcisissticTwitter/Twitter",
                       api_token = "eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJmYmEzZjU5ZS1kZDIzLTQwNTEtYjQ4Ni1hYTlhMTFjY2YzMjIifQ==")
# TODO: Add logging of input posts (and responses?) to Neptune
run["algorithm"] = "Few-shot learning"
params = {
    "model": model_used,
    "narc_type": narcism_type,
    "post_type": post_type,
    "prompt": model_role,
    "shots": number_of_shots
}
run["model/parameters"] = params
run.add_tags(["few-shot", "narcissism", narcism_type])
y_pred = []
y_true = []
for i in range(iterations):
    example = get_random_x_posts(train_path, post_type, narcism_type, number_of_shots)
    few_shot_prompt = create_few_shot_prompt(example)
    input_dic = get_random_x_posts(validate_path, post_type, narcism_type, 1)
    input = input_dic.pop(0)
    final_prompt = create_final_prompt(few_shot_prompt,model_role)
    response = get_response(final_prompt, model, input.get('post'))
    y_pred.append(response)
    y_true.append(input.get('narcissism'))

mse = get_mse(y_pred, y_true)
print(mse)
run["mse"] = mse
run.stop()

[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/NarcisissticTwitter/Twitter/e/TWIT-22
narcissism: 3.333
narcissism: 2.111
narcissism: 3.556
narcissism: 2.111
narcissism: 3.222
narcissism: 2.444
narcissism: 2.222
narcissism: 1.111
narcissism: 2.111
narcissism: 1.222
1.5430125
[neptune] [info   ] Shutting down background jobs, please wait a moment...
[neptune] [info   ] Done!
[neptune] [info   ] Waiting for the remaining 1 operations to synchronize with Neptune. Do not kill this process.
[neptune] [info   ] All 1 operations synced, thanks for waiting!
[neptune] [info   ] Explore the metadata in the Neptune app: https://app.neptune.ai/NarcisissticTwitter/Twitter/e/TWIT-22/metadata


[neptune] [error  ] Run TWIT-21 received stop signal. Exiting
[neptune] [info   ] Shutting down background jobs, please wait a moment...
[neptune] [info   ] Done!
[neptune] [info   ] All 0 operations synced, thanks for waiting!
[neptune] [info   ] Explore the metadata in the Neptune app: https://app.neptune.ai/NarcisissticTwitter/Twitter/e/TWIT-21/metadata


: 