In [25]:
"""
Experiments with 
Explainable Verbal Deception Detection using TransformersLoukas Ilias1, Felix Soldner2,3, and Bennett Kleinberg3,4
"""
import openai
import os

# from dotenv import load_dotenv, find_dotenv

# _ = load_dotenv(find_dotenv())

# openai.api_key = os.getenv('OPENAI_API_KEY')
openai.api_key = os.environ["OPENAI_API_KEY"]

new_line = '\n'

In [26]:
import pandas as pd
df = pd.read_csv ('sign_events_data_statements.csv')
# simple EDA
print(df)
print(df.columns)
print(f'shape: {df.shape}')  # should be 1640 x 6


def filter_by_class(df, category):
   return df[df['outcome_class']== category]

truth_df = filter_by_class(df, 't')
# print(truth_df)
print(f'truth df shape: {truth_df.shape}')  # should be 1640 x 6

# replace with a more expressive word, truthful
truth_df['outcome_class'] = df['outcome_class'].replace('t','truthful')
print(truth_df)

deceit_df = filter_by_class(df, 'd')
# print(deceit_df)
print(f'deceit df shape: {deceit_df.shape}')  # should be 1640 x 6

# replace with a more expressive word, deceitful
deceit_df['outcome_class'] = df['outcome_class'].replace('d','deceitful')
print(deceit_df)

# pick random non-repeating rows
def randon_non_repeating(df, quantity):
    import random
    rand_df = pd.DataFrame()
    random_list = random.sample(range(df.shape[0]), quantity)
    print("non-repeating random numbers are:")
    return df.iloc[random_list]

nb_samples_of_each = 5
random_truth_df = randon_non_repeating(truth_df, nb_samples_of_each)
print(f'random truth list:\n, {random_truth_df}')

random_deceit_df = randon_non_repeating(deceit_df, nb_samples_of_each)
print(f'random deceit list:\n, {random_deceit_df}')

random_truth_deceit_df = pd.concat([random_truth_df, random_deceit_df])



                                              signevent   
0                                   My brothers wedding  \
1                    Going to collect 2 new pet rabbits   
2     Getting dinner with my friend Shan and my boyf...   
3     mountain bike ride with my boyfriend and daughter   
4     I will be going to the cat cafe in Glasgow on ...   
...                                                 ...   
1635  Taking part in a 6 mile walk with a friend of ...   
1636               Taking my Nephew to a Zoology museum   
1637  Me and my best friend Cara are going to the zo...   
1638                          The RAF 100 Bicycle Trail   
1639        Going to a drum and bass rave with friends    

                                                     q1   
0     My little brother is getting married next Satu...  \
1     I will be driving for 80-90 minutes across Lon...   
2     We were planning to get dinner somewhere near ...   
3     We are going to cannock chase with the mountai...

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  truth_df['outcome_class'] = df['outcome_class'].replace('t','truthful')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  deceit_df['outcome_class'] = df['outcome_class'].replace('d','deceitful')


In [27]:
def get_completion(prompt, model="gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

In [28]:
intro = f"""
You are a detective presented with three pieces of information \
by a person from which you judge whether the response is truthful or deceitful.  
(1) The title of an activity. \
(2) Response to question #1: “Please describe your activity as specific as possible” \
(3) Response to question #2: “What information can you give us to reassure us that you are telling the truth” \
Here are a few examples:
"""

def construct_scenario(row):
    event_header = 'Event:'
    activity_description_header = 'Description of activity by person:'

    activity_reassurance_header = 'Information to reassure you that the person is telling the truth about their activity:'

    event = event_header + new_line + row['signevent'] + new_line
    q1 = activity_description_header + new_line + row['q1'] + new_line
    q2 = activity_reassurance_header + new_line + row['q2'] + new_line
    return event + q1 + q2

def construct_outcome(row):
    outcome = "Your Judgement:"
    return outcome + new_line + row['outcome_class'] + new_line * 2
    

def construct_few_shot_prompt(df, infer_row):
    messages = []
    messages.append(intro)

    for _, row in df.iterrows():
        messages.append(construct_scenario(row))
        messages.append(construct_outcome(row))
    
    messages.append(construct_scenario(infer_row))
    messages.append(construct_outcome(infer_row)) # has to have a black outcome to be filled by the llm
    return messages

infer_row = df.iloc[1171]  # pick a random row
print(f'Inferring the `class_outcome` for:\n{infer_row}')
ground_truth = 'truthful' if infer_row['outcome_class'] == 't' else 'deceitful'
# mask the `outcome_class` field since you want to predict it
infer_row['outcome_class'] = ''

prompt = construct_few_shot_prompt(random_truth_deceit_df, infer_row=infer_row)
prompt = ''.join(prompt)



Inferring the `class_outcome` for:
signevent        I am going to meet with my father in law who i...
q1                I have just had a great phone call from my hu...
q2               I can give you details of the phone call\nscre...
unid                                                      rj485773
id                                                            1172
outcome_class                                                    d
Name: 1171, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  infer_row['outcome_class'] = ''


In [29]:
print(f'{prompt}')
MODEL = "gpt-3.5-turbo"
response = get_completion(
    prompt=prompt,
    model=MODEL,
)    
        
print(f'ENTIRE RESPONSE:\n {response}')




You are a detective presented with the desciption of an activity and a supporting statement to reassure you that the description is truthful. You have to decide whether the description is truthful or deceitful.
Event:
I will be picking my daughter up from Manchester airport on Thursday at 7.55am, she has been away for 12 months travelli
Description of activity by person:
I will be picking my daughter up from Manchester airport on Thursday at 7.55am she has been travelling for 12 months from South America to Alaska, I can’t wait to see her! 
Information to reassure you that the person is telling the truth about their activity:
She is on flight mt2710 from Vegas to Manchester, she has to had to fly anchorage to Denver, Denver to Vegas then Vegas to Manchester 
Your Judgement:
truthful

Event:
Redecorating the bathroom with a cork finish for more comfort.
Description of activity by person:
The house is old and insulation is poor, the intent is to line out the bathroom internally with cor