In [None]:
import torch
from transformers import pipeline
from transformers import AutoModelForCausalLM, AutoTokenizer
from openai import OpenAI

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

In [None]:
import pandas
import re

In [None]:
import pickle

In [None]:
# Create an OpenAI client with your deepinfra token and endpoint
openai = OpenAI(
    api_key="<key here>",
    base_url="https://api.deepinfra.com/v1/openai",
)
# API spec here: https://deepinfra.com/mistralai/Mistral-7B-Instruct-v0.1/api?example=openai-python

In [None]:
def query_mistral(prompt, temperature=0.0):
    chat_completion = openai.chat.completions.create(
        model="mistralai/Mistral-7B-Instruct-v0.1",
        messages=[{"role": "user", "content": prompt}],
        temperature=temperature,
    )
    return chat_completion.choices[0].message.content

In [None]:
train_doc_ids = pandas.read_csv('data/claudette_train.tsv', sep='\t')['document'].unique()
val_doc_ids = pandas.read_csv('data/claudette_val.tsv', sep='\t')['document'].unique()
test_doc_ids = pandas.read_csv('data/claudette_test.tsv', sep='\t')['document'].unique()

In [None]:
df = pandas.read_csv('data/tos_dataset.csv')
df_train = df.loc[df['document'].isin(train_doc_ids)]
df_train_neg = df_train.loc[df_train['label'] == 0]
df_val = df.loc[df['document'].isin(val_doc_ids)]
df_test = df.loc[df['document'].isin(test_doc_ids)]
unfairness_categories = ['A', 'CH', 'CR', 'J', 'LAW', 'LTD', 'TER', 'USE']

In [None]:
train_texts = list(df_train['text'])
train_neg_texts = list(df_train_neg['text'])
val_texts = list(df_val['text'])
test_texts = list(df_test['text'])

In [None]:
vectorizer = TfidfVectorizer(stop_words='english', min_df=2)
train_tfidf = vectorizer.fit_transform(train_texts)
train_neg_tfidf = vectorizer.transform(train_neg_texts)
val_tfidf = vectorizer.transform(val_texts)
test_tfidf = vectorizer.transform(test_texts)

nbh = NearestNeighbors(n_neighbors=2, metric='cosine')
nbh.fit(train_neg_tfidf)
train_neg_neighbors = nbh.kneighbors(train_tfidf)
val_neg_neighbors = nbh.kneighbors(val_tfidf)
test_neg_neighbors = nbh.kneighbors(test_tfidf)

In [None]:
df_train.insert(len(df_train.columns), 'nearest_neg_train_iloc_tfidf', train_neg_neighbors[1][:,1])
df_val.insert(len(df_val.columns), 'nearest_neg_train_iloc_tfidf', val_neg_neighbors[1][:,0])
df_test.insert(len(df_test.columns), 'nearest_neg_train_iloc_tfidf', test_neg_neighbors[1][:,0])
df_train_pos = df_train.loc[df_train['label'] == 1]

In [None]:
answer_instruction = 'Answer yes or no and justify your response in 50 words.'

In [None]:
legal_standards = {
    # extract value of 1 is unfair, 0 is fair
    'A': {
        'fairness_q': 'Does this clause describe an arbitration dispute resolution process that is not fully optional to the consumer?',
        'response_res': [r'[Yy]es[\.,\s]', r'clause is unfair'],
        'extract_value': True,
        'default_value': False
    },
    'CH': {
        'fairness_q': 'Does this clause specify conditions under which the service provider could amend and modify the terms of service and/or the service itself?',
        'response_res': [r'[Yy]es[\.,\s]', r'clause is unfair'],
        'extract_value': True,
        'default_value': False
    },
    'CR': {
        'fairness_q': "Does this clause indicate conditions for content removal in the service provider's full discretion, and/or at any time for any or no reasons and/or without notice nor possibility to retrieve the content.",
        'response_res': [r'[Yy]es[\.,\s]', r'clause is unfair'],
        'extract_value': True,
        'default_value': False
    },
    'J': {
        'fairness_q': "Does this clause state that any judicial proceeding is to be conducted in a place other than the consumer's residence (i.e. in a different city, different country)?",
        # => f ~ .38
        'response_res': [r'[Yy]es[\.,\s]', r'clause is unfair'],
        'extract_value': True,
        'default_value': False
    },
    'LAW': {
        'fairness_q': 'Does the clause define the applicable law as different from the law of the consumer’s country of residence?',
        'response_res': [r'[Yy]es[\.,\s]', r'clause is unfair'],
        'extract_value': True,
        'default_value': False
    },
    'LTD': {
        'fairness_q': 'Does this clause stipulate that duties to pay damages by the provider are limited or excluded?',
        'response_res': [r'[Yy]es[\.,\s]', r'clause is unfair'],
        'extract_value': True,
        'default_value': False
    },
    'TER': {
        'fairness_q': 'Does this clause stipulate that the service provider may suspend or terminate the service at any time for any or no reasons and/or without notice?',
        'response_res': [r'[Yy]es[\.,\s]', r'clause is unfair'],
        'extract_value': True,
        'default_value': False
    },
    'USE': {
        'fairness_q': 'Does this clause stipulate that the consumer is bound by the terms of use of a specific service, simply by using the service, without even being required to mark that he or she has read and accepted them?',
        'response_res': [r'[Yy]es[\.,\s]', r'clause is unfair'],
        'extract_value': True,
        'default_value': False
    },
}

In [None]:
print(f"{len(train_doc_ids)} / {len(val_doc_ids)} / {len(test_doc_ids)}")
print(f"{len(df_train)} / {len(df_val)} / {len(df_test)}")

40 / 40 / 20
7607 / 8279 / 3784


In [None]:
def label_experiment(df, legal_standards, df_neg = None, categories = None, max_gen_len = 50, random_negatives = False):
    if categories is None:
        categories = legal_standards.keys()
    results = {}
    for category in categories:
        print(f'=== Testing for category `{category}` ===')
        legal_standard = legal_standards[category]
        pos_dset = df.loc[df[category] == 1]
        if random_negatives:
            neg_dset = df_train.loc[df_train[category] == 0].sample(len(pos_dset), random_state=42)
        elif df_neg is not None:
            neg_dset = df_neg.iloc[list(set(pos_dset.nearest_neg_train_iloc_tfidf))]
        else:
            print('either allow random negatives or provide `df_neg`')
            break
        dsets = [[pos_dset, 1],
                 [neg_dset, 0]]
        print(f'{len(pos_dset)} positive samples')
        print(f'{len(neg_dset)} negative samples')
        tp, fp, tn, fn = 0, 0, 0, 0
        fps = []
        fns = []
        for df, label in dsets:
            for i, ex in df.iterrows():
                index = ex.iloc[0]
                print(f'data point at index {index}')
                prompt = ('Consider the following online terms of service clause: "'
                           + ex['text']
                           + '\n'
                           +legal_standard['fairness_q']
                           +' '+answer_instruction)
                print("P: "+prompt)
                # Mistral 7B Instruct
                chat_completion = openai.chat.completions.create(
                    model="mistralai/Mistral-7B-Instruct-v0.1",
                    messages=[{"role": "user", "content": prompt}],
                )
                gen_text = chat_completion.choices[0].message.content
                print("R: "+gen_text)
                unfair = legal_standard['default_value']
                for rex in legal_standard['response_res']:
                    if re.search(rex, gen_text) is not None:
                        unfair = legal_standard['extract_value']
                if unfair:
                    print(f'=> Unfair: {gen_text[:50]}')
                    if label == 1:
                        tp += 1
                    else:
                        fp += 1
                        print(f'false positive! {index}')
                        fps.append([ex, legal_standard['fairness_q'], gen_text])
                else:
                    print(f'Fair: {gen_text[:50]}')
                    if label == 1:
                        fn += 1
                        print(f'false negative! {index}')
                        fns.append([ex, legal_standard['fairness_q'], gen_text])
                    else:
                        tn += 1
                print('===')
        print(f"TP: {tp}, FP: {fp}, TN: {tn}, FN: {fn}")
        acc = (tp+tn) / (fp+fn)
        prec = tp / (tp+fp)
        rec = tp / (tp+fn)
        f1 = 2* (prec * rec)/(prec + rec)
        results[category] = {
            'pos_n': len(pos_dset),
            'neg_n': len(neg_dset),
            'TP': tp,
            'FP': fp,
            'TN': tn,
            'FN': fn,
            'acc': acc,
            'prec': prec,
            'rec': rec,
            'f1': f1,
            'fps': fps,
            'fns': fns,
        }
    return results

In [None]:
results = {}
for uc in unfairness_categories:
    results[uc] = label_experiment(df_train_pos, legal_standards, df_neg = df_train_neg, random_negatives= False, categories=[uc])

=== Testing for category `A` ===
39 positive samples
30 negative samples
data point at index 601
P: Consider the following online terms of service clause: "disputes arising hereunder will be resolved by binding arbitration .
Does this clause describe an arbitration dispute resolution process that is not fully optional to the consumer? Answer yes or no and justify your response in 50 words.
R:  No, the clause describes a mandatory arbitration dispute resolution process. It states that any disputes "arising hereunder" will be resolved through arbitration, which means that arbitration is the only option available for resolving disputes, and it cannot be avoided.
Fair:  No, the clause describes a mandatory arbitration 
false negative! 601
===
data point at index 824
P: Consider the following online terms of service clause: "the company -lrb- and its respective corporate parents , subsidiaries , affiliates , predecessors in interest , successors , permitted assigns -rrb- , you and any other

In [None]:
def results_report(r):
    for label in r.keys():
        print(label)
        print(f"- pos_n {r[label]['pos_n']}")
        print(f"- neg_n {r[label]['neg_n']}")
        print(f"- prec {r[label]['prec']}")
        print(f"- rec {r[label]['rec']}")
        print(f"- f1 {r[label]['f1']}")
        print(f"- TP {r[label]['TP']} TN {r[label]['TN']} FP {r[label]['FP']} FN {r[label]['FN']}")

In [None]:
#pickle.dump({'legal_standards': legal_standards,
#             'results': results},
#            open( "results/r1.pkl", "wb" ) )

In [None]:
for r in results.values():
    results_report(r)

A
- pos_n 39
- neg_n 30
- prec 0.6666666666666666
- rec 0.46153846153846156
- f1 0.5454545454545455
- TP 18 TN 21 FP 9 FN 21
CH
- pos_n 112
- neg_n 87
- prec 0.7983870967741935
- rec 0.8839285714285714
- f1 0.8389830508474576
- TP 99 TN 62 FP 25 FN 13
CR
- pos_n 79
- neg_n 63
- prec 0.6867469879518072
- rec 0.7215189873417721
- f1 0.7037037037037036
- TP 57 TN 37 FP 26 FN 22
J
- pos_n 47
- neg_n 31
- prec 0.8333333333333334
- rec 0.5319148936170213
- f1 0.6493506493506493
- TP 25 TN 26 FP 5 FN 22
LAW
- pos_n 54
- neg_n 43
- prec 0.631578947368421
- rec 0.6666666666666666
- f1 0.6486486486486486
- TP 36 TN 22 FP 21 FN 18
LTD
- pos_n 246
- neg_n 194
- prec 0.8235294117647058
- rec 0.34146341463414637
- f1 0.48275862068965514
- TP 84 TN 176 FP 18 FN 162
TER
- pos_n 137
- neg_n 115
- prec 0.6847826086956522
- rec 0.45985401459854014
- f1 0.5502183406113537
- TP 63 TN 86 FP 29 FN 74
USE
- pos_n 115
- neg_n 87
- prec 0.6149068322981367
- rec 0.8608695652173913
- f1 0.717391304347826
- TP 99 