### Hypothesis: The Perspective API will fail to mark indirect insulting comments on appearances as toxic.

###1. Load test dataset

In [163]:
import pandas as pd

test_df = pd.read_csv("coding_assignment3_dataset_csv_ver.csv")
test_df

Unnamed: 0,Comments,Direct?,Toxic?
0,He is ugly.,Yes,Yes
1,She looks like a pig.,Yes,Yes
2,"No offense, but you look like shit.",Yes,Yes
3,"eat some veg, you fat bitch!",Yes,Yes
4,If I looked like her I would rather kill mysel...,Yes,Yes
5,You are hella cute.,Yes,No
6,"Damn, you have a really beautiful smile.",Yes,No
7,You look fucking fabulous!,Yes,No
8,You look stunning today.,Yes,No
9,You have a really attractive and distinctive s...,Yes,No


###2. Transform data
Yes = 1, No = 0

In [164]:
def featurize(df):
  X = df[['Comments', 'Direct?', 'Toxic?']]
  X['Direct?'] = [1 if x == 'Yes' else 0 for x in df['Direct?']]
  X['Toxic?'] = [1 if x == 'Yes' else 0 for x in df['Toxic?']]

  return X

new_table = featurize(test_df)
new_table

Unnamed: 0,Comments,Direct?,Toxic?
0,He is ugly.,1,1
1,She looks like a pig.,1,1
2,"No offense, but you look like shit.",1,1
3,"eat some veg, you fat bitch!",1,1
4,If I looked like her I would rather kill mysel...,1,1
5,You are hella cute.,1,0
6,"Damn, you have a really beautiful smile.",1,0
7,You look fucking fabulous!,1,0
8,You look stunning today.,1,0
9,You have a really attractive and distinctive s...,1,0


###3. Test the API using the dataset
First, we will test all the comments using the Perspective API and get their toxicity scores.

In [165]:
# import APIs
from googleapiclient import discovery
import json


# make an empty list to store toxicity scores the comments
test_results = []
comments = [comment for comment in test_df['Comments']]

# check if read the comments correctly
print('comments:', comments)
print(len(comments))


API_KEY = 'AIzaSyBeOqQSimgpk7SWa9-sKPF3wl4nPGjJ8s4'
client = discovery.build(
    "commentanalyzer",
    "v1alpha1",
    developerKey=API_KEY,
    discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
    static_discovery=False,
)

# make requests to test the toxicity scores of the comments
for comment in comments:
  analyze_request = {
    'comment': {'text': comment},
    'requestedAttributes': {'TOXICITY': {}}
  }

  response = client.comments().analyze(body=analyze_request).execute()
  toxicity_score = response['attributeScores']['TOXICITY']['summaryScore']['value']
  test_results.append(toxicity_score)


# check the result
print('Toxicity test results:', test_results)
print(len(test_results))

comments: ['He is ugly.', 'She looks like a pig.', 'No offense, but you look like shit.', 'eat some veg, you fat bitch!', 'If I looked like her I would rather kill myself than being ugly.', 'You are hella cute.', 'Damn, you have a really beautiful smile.', 'You look fucking fabulous!', 'You look stunning today.', 'You have a really attractive and distinctive sense of beauty.', "You won't catch a man unless you lose weight.", "I wish I could put your personality into my girlfriend's body, then I would have a perfect girlfriend.", "You'd be more attractive if you wore your make-up a bit heavier.", "You don't have a strange face like many other black people.", "you don't look feminine enough", 'You have a really refreshing and positive energy.', "It's impressive how you always manage to look so polished.", 'I admire how confident you carry yourself.', "I've noticed you have a great sense of style. Your outfits are always so well put together.", 'You look really good in that dress.']
20
To

Store the results in the data frame.

In [166]:
new_table['Toxicity Score'] = test_results
new_table

Unnamed: 0,Comments,Direct?,Toxic?,Toxicity Score
0,He is ugly.,1,1,0.628937
1,She looks like a pig.,1,1,0.833343
2,"No offense, but you look like shit.",1,1,0.944597
3,"eat some veg, you fat bitch!",1,1,0.956375
4,If I looked like her I would rather kill mysel...,1,1,0.698991
5,You are hella cute.,1,0,0.041173
6,"Damn, you have a really beautiful smile.",1,0,0.457611
7,You look fucking fabulous!,1,0,0.518691
8,You look stunning today.,1,0,0.034277
9,You have a really attractive and distinctive s...,1,0,0.109568


Add the predicted values to the dataframe to test the accuracy later. Then save the data as a csv file.

In [167]:
print(new_table.dtypes)

# 1 if toxic; 0 if not toxic
scores = [1 if x >= 0.5000 else 0 for x in new_table['Toxicity Score']]
# print(scores)
new_table['Predicted'] = scores

# save the file
new_table.to_csv('Predictive_API_test_results.csv')

new_table

Comments           object
Direct?             int64
Toxic?              int64
Toxicity Score    float64
dtype: object


Unnamed: 0,Comments,Direct?,Toxic?,Toxicity Score,Predicted
0,He is ugly.,1,1,0.628937,1
1,She looks like a pig.,1,1,0.833343,1
2,"No offense, but you look like shit.",1,1,0.944597,1
3,"eat some veg, you fat bitch!",1,1,0.956375,1
4,If I looked like her I would rather kill mysel...,1,1,0.698991,1
5,You are hella cute.,1,0,0.041173,0
6,"Damn, you have a really beautiful smile.",1,0,0.457611,0
7,You look fucking fabulous!,1,0,0.518691,1
8,You look stunning today.,1,0,0.034277,0
9,You have a really attractive and distinctive s...,1,0,0.109568,0


### 4. Test the accuracy

Make lists of predicted scores and actual scores.


In [168]:
direct_toxic = []
direct_not_toxic = []
indirect_toxic = []
indirect_not_toxic = []

for i in new_table.index:
  if new_table['Direct?'][i] == 1:
    if new_table['Toxic?'][i] == 1:
      direct_toxic.append(new_table['Predicted'][i])
    else:
      direct_not_toxic.append(new_table['Predicted'][i])
  if new_table['Direct?'][i] == 0:
    if new_table['Toxic?'][i] == 1:
      indirect_toxic.append(new_table['Predicted'][i])
    else:
      indirect_not_toxic.append(new_table['Predicted'][i])

# check
print('predicted:')
print(direct_toxic)
print(direct_not_toxic)
print(indirect_toxic)
print(indirect_not_toxic)

# compute true positive and negative
true_direct_toxic = []
true_direct_not_toxic = []
true_indirect_toxic = []
true_indirect_not_toxic = []

for i in new_table.index:
  if new_table['Direct?'][i] == 1:
    if new_table['Toxic?'][i] == 1:
      true_direct_toxic.append(new_table['Toxic?'][i])
    else:
      true_direct_not_toxic.append(new_table['Toxic?'][i])
  if new_table['Direct?'][i] == 0:
    if new_table['Toxic?'][i] == 1:
      true_indirect_toxic.append(new_table['Toxic?'][i])
    else:
      true_indirect_not_toxic.append(new_table['Toxic?'][i])

# check
print('true:')
print(true_direct_toxic)
print(true_direct_not_toxic)
print(true_indirect_toxic)
print(true_indirect_not_toxic)

predicted:
[1, 1, 1, 1, 1]
[0, 0, 1, 0, 0]
[0, 1, 0, 1, 0]
[0, 0, 0, 0, 0]
true:
[1, 1, 1, 1, 1]
[0, 0, 0, 0, 0]
[1, 1, 1, 1, 1]
[0, 0, 0, 0, 0]


Now we need to test fairness in the model. We are computing true/false positive and negative.

In [170]:
def class_wise_acc_pos(actual, predicted):
  total_p = 0
  true_p = 0
  for i in range(len(predicted)):
    if actual[i] == 1:
      total_p += 1
      if actual[i] == predicted[i]:
        true_p += 1
  return(true_p/total_p)

def class_wise_acc_neg(actual, predicted):
  total_n = 0
  true_n = 0
  for i in range(len(predicted)):
    if actual[i] == 0:
      total_n += 1
      if actual[i] == predicted[i]:
        true_n += 1
  return(true_n/total_n)

acc_direct_toxic = class_wise_acc_pos(true_direct_toxic, direct_toxic)
acc_direct_not_toxic = class_wise_acc_neg(true_direct_not_toxic, direct_not_toxic)
acc_indirect_toxic = class_wise_acc_pos(true_indirect_toxic, indirect_toxic)
acc_indirect_not_toxic = class_wise_acc_neg(true_indirect_not_toxic, indirect_not_toxic)

print(f'Accuracy for direct insulting comments = {acc_direct_toxic}')
print(f'Accuracy for direct non-insulting comments = {acc_direct_not_toxic}')
print(f'Accuracy for indirect insulting comments = {acc_indirect_toxic}')
print(f'Accuracy for indirect non-insulting comments = {acc_indirect_not_toxic}')

Accuracy for direct insulting comments = 1.0
Accuracy for direct non-insulting comments = 0.8
Accuracy for indirect insulting comments = 0.4
Accuracy for indirect non-insulting comments = 1.0


###Results:

The model was predicted to fail to mark indirect insulting comments as toxic.

Overall, the model shows relatively high accuracy in determining the toxicity level of comments, where some test cases show a 100% accuracy.

However, it is demonstrated that the hypothesis is valid; many indirect insulting comments were not given a high enough toxicity score.