In [None]:
!pip install google-api-python-client
!pip install scikit-learn
!pip install pandas
!pip install datasets


In [3]:
from sklearn.metrics import classification_report
from getpass import getpass
import time

### Test the google perspective API

In [5]:
from googleapiclient import discovery
import json

API_KEY = getpass('print enter the perspective API key')

client = discovery.build(
  "commentanalyzer",
  "v1alpha1",
  developerKey=API_KEY,
  discoveryServiceUrl="https://commentanalyzer.googleapis.com/$discovery/rest?version=v1alpha1",
  static_discovery=False,
)

def call_perspective_api(text):
    analyze_request = {
      'comment': { 'text': text},
      'requestedAttributes': {
        'TOXICITY': {}, 
        'SEVERE_TOXICITY':{},
        'IDENTITY_ATTACK':{}, 
        'INSULT':{}, 
        'PROFANITY':{}, 
        'THREAT':{}
        },
      'doNotStore': True
    }

    response = client.comments().analyze(body=analyze_request).execute()
    result = response["attributeScores"]
    return {
      'SEVERE_TOXICITY': result['SEVERE_TOXICITY']["summaryScore"]['value'],
      'TOXICITY':result['TOXICITY']["summaryScore"]['value'],
      'IDENTITY_ATTACK':result['IDENTITY_ATTACK']["summaryScore"]['value'],
      'INSULT':result['INSULT']["summaryScore"]['value'],
      'PROFANITY':result['PROFANITY']["summaryScore"]['value'],
      'THREAT':result['THREAT']["summaryScore"]['value'],
    }

In [6]:
call_perspective_api("This is a test sentence")

{'SEVERE_TOXICITY': 0.0012397766,
 'TOXICITY': 0.017843807,
 'IDENTITY_ATTACK': 0.0032928092,
 'INSULT': 0.008747884,
 'PROFANITY': 0.016479544,
 'THREAT': 0.0076507707}

### Load dataset

In [19]:
from datasets import load_dataset

dataset = load_dataset("grammarly/detexd-benchmark", split='test').to_pandas()

Found cached dataset csv (/Users/yichen/.cache/huggingface/datasets/grammarly___csv/grammarly--detexd-benchmark-bf0a011730149f79/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d)


In [9]:
API_result = []

for text in dataset["text"]:
    time.sleep(3)
    API_result.append(call_perspective_api(text))

In [13]:
ATTRIBUTES_LIST = ['SEVERE_TOXICITY', 'TOXICITY', 'IDENTITY_ATTACK', 'INSULT', 'PROFANITY', 'THREAT']
for attribute in ATTRIBUTES_LIST:
    column_data = {attribute : [item[attribute] for item in API_result]}
    dataset = dataset.assign(
        **column_data
    )

In [17]:
 # Flag the input if either attribute's probability is greater than 0.5
dataset['perspective-flag'] = dataset.apply(
    lambda row: sum([int(row[attribute] > 0.5) for attribute in ATTRIBUTES_LIST]) >= 1, 
    axis=1
)

In [18]:
classification_report(dataset['label'], dataset['perspective-flag'], output_dict=True)['1']

{'precision': 0.746268656716418,
 'recall': 0.2976190476190476,
 'f1-score': 0.425531914893617,
 'support': 336}

Note: the discrepency between the number reported in the paper and the number here might caused by the different date(thus different version of perspective api) performed on the dataset, overall difference is not significant