In [1]:
"""
Experiments with dataset in paper,
Explainable Verbal Deception Detection using Transformers
Loukas Ilias, Felix Soldner and Bennett Kleinberg
uses LIWC-15
"""
import openai
import os

openai.api_key = os.environ["OPENAI_API_KEY"]  # source the ~/.zshrc file

# https://platform.openai.com/docs/guides/rate-limits/error-mitigation
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)  # for exponential backoff

# constants, until you change them ;-)
new_line = '\n'
nb_test_samples = 20
nb_few_shot_samples_of_each_class =3 # truth and deception
delimiter = '```\n'
# MODEL = "gpt-3.5-turbo"
# MODEL = "text-davinci-003"
MODEL = "gpt-4"


#### Get the dataset

In [2]:
import pandas as pd
df = pd.read_csv ('LIWC-15 Results - sign_events_data_statements - LIWC Analysis')
# simple EDA
# print(df)
# print(df.columns)
print(f'shape: {df.shape}')  # should be 1640 x 6
df.head


shape: (1640, 101)


<bound method NDFrame.head of                                               signevent   
0                                   My brothers wedding  \
1                    Going to collect 2 new pet rabbits   
2     Getting dinner with my friend Shan and my boyf...   
3     mountain bike ride with my boyfriend and daughter   
4     I will be going to the cat cafe in Glasgow on ...   
...                                                 ...   
1635  Taking part in a 6 mile walk with a friend of ...   
1636               Taking my Nephew to a Zoology museum   
1637  Me and my best friend Cara are going to the zo...   
1638                          The RAF 100 Bicycle Trail   
1639         Going to a drum and bass rave with friends   

                                                     q1   
0     My little brother is getting married next Satu...  \
1     I will be driving for 80-90 minutes across Lon...   
2     We were planning to get dinner somewhere near ...   
3     We are going to can

#### Get the LIWC markers

In [3]:

liwc_15 = pd.read_csv ('LIWC-15 Results - sign_events_data_statements - LIWC Analysis')
# simple EDA

print(f'shape: {liwc_15.shape}')  # should be 1640, 
print(liwc_15.columns)
cols = sorted(liwc_15.columns)
nb_attrib_per_line = 10
print_buf = []
for i, attrib in enumerate(cols):
    print_buf.append(attrib)
    if i % nb_attrib_per_line == 0:
        print(print_buf)
        print_buf = []

truth_markers = df[['ingest', 'bio', 'Analytic', 'number', 'leisure', 'focusfuture']]
deception_markers = df[['Apostro', 'focuspast', 'reward', 'WC', 'pronoun', 'ppron', 'Exclam', 'Tone']]
liwc_markers = df[['ingest', 'bio', 'Analytic', 'number', 'leisure', 'Apostro', 'focuspast', 'reward', 'WC', 'pronoun']]
# print(truth_markers)
# print(deception_markers)
print(liwc_markers)

shape: (1640, 101)
Index(['signevent', 'q1', 'q2', 'unid', 'id', 'outcome_class', 'Segment', 'WC',
       'Analytic', 'Clout',
       ...
       'Colon', 'SemiC', 'QMark', 'Exclam', 'Dash', 'Quote', 'Apostro',
       'Parenth', 'OtherP', 'Emoji'],
      dtype='object', length=101)
['AllPunc']
['Analytic', 'Apostro', 'Authentic', 'Clout', 'Colon', 'Comma', 'Dash', 'Dic', 'Emoji', 'Exclam']
['OtherP', 'Parenth', 'Period', 'QMark', 'Quote', 'Segment', 'SemiC', 'Sixltr', 'Tone', 'WC']
['WPS', 'achieve', 'adj', 'adverb', 'affect', 'affiliation', 'anger', 'anx', 'article', 'assent']
['auxverb', 'bio', 'body', 'cause', 'certain', 'cogproc', 'compare', 'conj', 'death', 'differ']
['discrep', 'drives', 'family', 'feel', 'female', 'filler', 'focusfuture', 'focuspast', 'focuspresent', 'friend']
['function', 'health', 'hear', 'home', 'i', 'id', 'informal', 'ingest', 'insight', 'interrog']
['ipron', 'leisure', 'male', 'money', 'motion', 'negate', 'negemo', 'netspeak', 'nonflu', 'number']
['outcome_c

#### Some quick test to see how the truth/deceit markers are bahaving

In [4]:
import textwrap
import json
# TODO see if this can be more friendlier if GPT does not know about LIWC
def construct_liwc_attributes_json(row):
    # print('class:', 'truthful' if df.iloc[row]['outcome_class'] == 't' else 'deceptive')
    # print(liwc_markers.iloc[row])
    # print(f'q1:\n {textwrap.fill(df.iloc[row]["q1"], 100)}')
    # print(f'q2:\n {textwrap.fill(df.iloc[row]["q2"], 100)}')
    
    attributes = ['ingest', 'bio', 'Analytic', 'number', 'leisure', 'Apostro', 'focuspast', 'reward', 'WC', 'pronoun']
    data = {}
    for attribute in attributes:
        data[attribute] = str(row[attribute])
    
    return json.dumps(data)

print(df.iloc[0])
print(construct_liwc_attributes_json(df.iloc[0].copy()))

signevent                                  My brothers wedding
q1           My little brother is getting married next Satu...
q2           My brother and Kate have a daughter who will b...
unid                                                  FU304384
id                                                           1
                                   ...                        
Quote                                                      0.0
Apostro                                                    0.0
Parenth                                                    0.0
OtherP                                                     0.0
Emoji                                                      0.0
Name: 0, Length: 101, dtype: object
{"ingest": "0.0", "bio": "0.88", "Analytic": "27.78", "number": "1.77", "leisure": "1.77", "Apostro": "0.0", "focuspast": "4.42", "reward": "3.54", "WC": "113", "pronoun": "15.04"}


In [5]:
def filter_by_class(df, category):
   return df[df['outcome_class']== category]

truth_df = filter_by_class(df, 't')
# print(truth_df)
print(f'truth df shape: {truth_df.shape}')  # should be 1640 x 6

# replace with a more expressive word, truthful
truth_df['outcome_class'] = df['outcome_class'].replace('t','truthful')
print(truth_df)

deceit_df = filter_by_class(df, 'd')
# print(deceit_df)
print(f'deceit df shape: {deceit_df.shape}')  # should be 1640 x 6

# replace with a more expressive word, deceitful
deceit_df['outcome_class'] = df['outcome_class'].replace('d','deceptive')
print(deceit_df)

# pick random non-repeating rows
def pick_randon_non_repeating(df, quantity):
    import random
    rand_df = pd.DataFrame()
    random_list = random.sample(range(df.shape[0]), quantity)
    print("non-repeating random numbers are:")
    return df.iloc[random_list], random_list

random_truth_df, truth_indices_list = pick_randon_non_repeating(truth_df, nb_few_shot_samples_of_each_class)
print(f'random truth list:\n, {random_truth_df}')
print(f'truth indices:" {truth_indices_list}')

random_deceit_df, deceit_indices_list = pick_randon_non_repeating(deceit_df, nb_few_shot_samples_of_each_class)
print(f'random deceit list:\n, {random_deceit_df}')
deceit_indices_list = [x + truth_df.shape[0] for x in deceit_indices_list] # do this to exclude from poriginal list
print(f'deceit indices: {deceit_indices_list}')

random_truth_deceit_df = pd.concat([random_truth_df, random_deceit_df])
few_shot_list = truth_indices_list + deceit_indices_list
print(f'truth + deceit indices" {few_shot_list}')



truth df shape: (783, 101)
                                             signevent   
0                                  My brothers wedding  \
1                   Going to collect 2 new pet rabbits   
2    Getting dinner with my friend Shan and my boyf...   
3    mountain bike ride with my boyfriend and daughter   
4    I will be going to the cat cafe in Glasgow on ...   
..                                                 ...   
778                          Go to NJ to visit cousins   
779  I am going to visit my mum and gran who I've n...   
780                                           Swimming   
781                    Going for coffee with the girls   
782                    Going to the cinema with Yasmin   

                                                    q1   
0    My little brother is getting married next Satu...  \
1    I will be driving for 80-90 minutes across Lon...   
2    We were planning to get dinner somewhere near ...   
3    We are going to cannock chase with the 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  truth_df['outcome_class'] = df['outcome_class'].replace('t','truthful')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  deceit_df['outcome_class'] = df['outcome_class'].replace('d','deceptive')


#### Setup the OpenAI call

In [6]:
def get_chat_completion(prompt, model="gpt-3.5-turbo"):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output
    )
    return response.choices[0].message["content"]

@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def get_chat_completion_with_backoff(prompt, model="gpt-3.5-turbe"):
    return get_chat_completion(prompt, model)

def get_completion(prompt, model='text-davinci-003'):
    response = openai.Completion.create(
        prompt=prompt,
        model=model
    )
    return response.choices[0]['text']

In [7]:
intro = f"""
You are tasked to classify the response to questions into two classes: truthful or deceptive.
You'll be presented with the following pieces of information on an activity:
(1) The title of the activity.
(2) An answwer to a question: Please describe your activity. Be as specific as possible.
(3) A answer to a follow-on question: What information can you give us to reassure us that you are telling the truth?
(4) A Linguistic Inquiry Word Count (LIWC) category values as JSON data
Using all four pieces of information, complete the response with either 'truthful' or 'deceptive'.

Here are a few examples delimited by triple backticks:

"""

response_1_heading = """Response #1:\n"""
response_2_heading = """Response #2:\n"""

liwc_header = """Linguistic Inquiry Word Count (LIWC):\n"""

def construct_activity_scenario(row):
    # activity_header = 'Title of the Activity: ' + new_line
    activity_header = 'Activity: ' + new_line
    activity_description_header = 'Question #1: \nPlease describe your activity. Be as specific as possible.'

    activity_reassurance_header = 'Question #2: \nWhat information can you give us to reassure us that you are telling the truth?'

    activity = activity_header + row['signevent'] + new_line
    q1 = activity_description_header + new_line + response_1_heading + row['q1'] + new_line
    q2 = activity_reassurance_header + new_line + response_2_heading + row['q2'] + new_line
    return activity + q1 + q2

def construct_outcome(row):
    outcome = "Is the response truthful or deceptive?\n"
    return outcome  + row['outcome_class'] + new_line

def construct_liwc_json(row):
    pass

def construct_few_shot_prompt(few_shot_df, infer_row):
    # constructed as a list
    prompt = []
    prompt.append(intro)
    
    for _, row in few_shot_df.iterrows():
        prompt.append(delimiter)
        prompt.append(construct_activity_scenario(row))
        prompt.append(liwc_header)
        prompt.append(construct_liwc_attributes_json(row))
        prompt.append(delimiter)
        prompt.append(construct_outcome(row))
        prompt.append(delimiter)
        prompt.append(new_line)    
        prompt.append(new_line)

    prompt.append(delimiter)
    prompt.append(construct_activity_scenario(infer_row))
    prompt.append(liwc_header)
    prompt.append(construct_liwc_attributes_json(row))
    prompt.append(delimiter) 
    prompt.append(construct_outcome(infer_row)) # has to have a blank outcome to be filled by the llm
    prompt.append(delimiter)


    return prompt



In [8]:
def create_test_indices(df, total, exclude_list):
    import random
    rand_list = []
    count = 0
    while count < total:
        rand_row = random.randrange(df.shape[0])
        if rand_row not in exclude_list:
            rand_list.append(rand_row)
            count += 1
    return rand_list

In [9]:
test_indices = create_test_indices(df, nb_test_samples, few_shot_list)  # exclude the ones in the few shot list
# test_indices = [1435]
print(f'test indices: {test_indices}')


test indices: [1175, 1477, 993, 1068, 102, 701, 1525, 1433, 1524, 914]


In [10]:
y_ground_truth = []  # for computing F1-score
y_predicted = []

for index in test_indices:
    infer_row = df.loc[index].copy()
    # print(f'Inferring the `class_outcome` for:\n{infer_row}')
    ground_truth = 'truthful' if infer_row['outcome_class'] == 't' else 'deceptive'
    # mask the `outcome_class` field since you want to predict it
    infer_row['outcome_class'] = ''

    # print(f'Original\n:{df.loc[index]}')
    # print(f'infer row\n: {infer_row}')

    prompt = construct_few_shot_prompt(random_truth_deceit_df, infer_row)
    prompt = ''.join(prompt)
    
    # print(f'Prompt:\n{prompt}')

    response = get_chat_completion_with_backoff(
        prompt=prompt,
        model=MODEL,
    )    
        
    print(f'INDEX: {index} GROUND TRUTH: {ground_truth}, RESPONSE: {response} - {"wrong" if ground_truth != response else "correct"}')
    y_ground_truth.append(ground_truth)
    y_predicted.append(response)


INDEX: 1175 GROUND TRUTH: deceptive, RESPONSE: truthful - wrong
INDEX: 1477 GROUND TRUTH: deceptive, RESPONSE: truthful - wrong
INDEX: 993 GROUND TRUTH: deceptive, RESPONSE: deceptive - correct
INDEX: 1068 GROUND TRUTH: deceptive, RESPONSE: deceptive - correct
INDEX: 102 GROUND TRUTH: truthful, RESPONSE: truthful - correct
INDEX: 701 GROUND TRUTH: truthful, RESPONSE: deceptive - wrong
INDEX: 1525 GROUND TRUTH: deceptive, RESPONSE: truthful - wrong
INDEX: 1433 GROUND TRUTH: deceptive, RESPONSE: deceptive - correct
INDEX: 1524 GROUND TRUTH: deceptive, RESPONSE: truthful - wrong
INDEX: 914 GROUND TRUTH: deceptive, RESPONSE: truthful - wrong


### Compute metrics

In [11]:
from sklearn.metrics import f1_score
print('Weighted F1-score:', f1_score(y_ground_truth, y_predicted, average='weighted'))

Weighted F1-score: 0.45
