In [None]:
!pip install transformers
!pip install accelerate
!pip install OpenAI

In [None]:
from openai import OpenAI
import pickle, sys, copy, re, math
import pandas as pd
import concurrent.futures
from collections import Counter
import numpy as np
import time


In [None]:
#mounting google drive to avoid uploading files each runtime
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')
#Read the csv dataset as a pandas dataframe
path='/content/drive/MyDrive/Data/claudette_train_multi_label.csv'
df_train = pd.read_csv(path)
#Print to check if this is the correct dataset
df_train.head()

In [None]:
feature_name = 'legalbert' # 'sbert' or 'legalbert'
path='/content/drive/MyDrive/Data/'+feature_name+'.pkl'
features = np.load(path, allow_pickle=True)

In [None]:
#trim features to get ONLY training data and append embeddings
selected_features_train = features[df_train["Index"].tolist()]
selected_features_train=pd.DataFrame(selected_features_train)

dummy=selected_features_train.values.tolist()
df_train["LegalBert"]=dummy

In [None]:
def get_closest_negative(embedding1, df, label):


    query = embedding1
    df_train_neg = df[~df["Labels"].str.contains(label)]

    # Stack the embeddings into a 2D array
    embeddings_stack = np.stack(df_train_neg['LegalBert'].to_numpy())

    # Calculate cosine similarity in batches
    dot_products = np.dot(query, embeddings_stack.T)
    query_norm = np.linalg.norm(query)
    features_norms = np.linalg.norm(embeddings_stack, axis=1)
    similarities = dot_products / (query_norm * features_norms)

    # Use argmax to get the index of the top similarity
    top_index = np.argmax(similarities)

    return df.iloc[top_index]

In [None]:
def sample_n_clauses(df, n, label):
    df_pos = df[df["Labels"].apply(lambda x: label in x)]

    sampled_neg = pd.DataFrame()

    sampled_pos = df_pos.sample(n//2, replace=False)

    for index in sampled_pos.index:
        pos_sent = sampled_pos['LegalBert'][index]
        neg_sent = get_closest_negative(pos_sent, df, label)
        sampled_neg = pd.concat([sampled_neg, neg_sent.to_frame().T], ignore_index=True)

    # Reset the index of sampled_neg before concatenating
    sampled_neg = sampled_neg.reset_index(drop=True)

    result_df = pd.concat([sampled_pos, sampled_neg], axis=0, ignore_index=True)

    return result_df
# sampled_df= sample_n_clauses(df_train, 4,'J')
# print(sampled_df)

In [None]:
#Query to mistral API:
openai = OpenAI(
    api_key="XqPUITUxARnzuvDPvLWMQW86dKIAS1Ih",
    base_url="https://api.deepinfra.com/v1/openai",
)

def query_mistral(prompt, temperature=0.0, max_tokens = 50, role='user'):
    chat_completion = openai.chat.completions.create(
        model="mistralai/Mistral-7B-Instruct-v0.1",
        messages=[{"role": role, "content": prompt},
                  {"role": "system",
                 "content": "[INST] Limit your response to 30 words.[INST]"}
            ],
        temperature=temperature,
        max_tokens=max_tokens,
    )
    return chat_completion.choices[0].message.content

print(query_mistral("Hello"))

 Hello! How can I assist you today?


In [None]:
#Compare the answer of the classification LLM to the true label to find false postives and false negatives
def is_fpnp(answer,true_label):
  if "yes" in answer.lower() and true_label=="positive":
    return "tp"
  elif "no" in answer.lower() and true_label=="positive":
    return "fn"
  elif "yes" in answer.lower() and true_label=="negative":
    return "fp"
  elif "no" in answer.lower() and true_label == "negative":
    return "tn"
  return "None"
#Tests
print("tp expected : ",is_fpnp("YES","positive"))
print("fp expected : ",is_fpnp("YES","negative"))
print("fn expected : ",is_fpnp("NO","positive"))
print("tn expected : ",is_fpnp("NO","negative"))



tp expected :  tp
fp expected :  fp
fn expected :  fn
tn expected :  tn


In [None]:
def get_GT(row,label_class):

  if label_class in row[2]:
    return 'positive'
  return 'negative'
#test
# print(get_GT(sampled_df.iloc[3],'J'))

In [None]:
def get_f1(tp, fp, fn):
    f1_score = 2 * tp / (2 * tp + fp + fn) if (2 * tp + fp + fn) != 0 else 0
    return f1_score

#test
print("F1 Score (direct calculation):",get_f1(tp=10, fp=0, fn=0))


F1 Score (direct calculation): 1.0


# Expand & Select





In [None]:
example_beam=[{'prompt':"Does this jurisdiction clause imposed a requirement for judicial proceedings to take place in a location other than the consumer's residence (i.e. in a different city, different country)?",'score':0.88},{'prompt':" Is the online terms of service clause 'You agree to...bring any judicial proceedings....in the state courts in the city and county of San Francisco, California, or federal court for the northern district of California' likely to present a challenge to residents of other countries who may not live in San Francisco or be arbitrating?",'score':0.5},{'prompt':"Examine this online terms of service clause and identify any provision that stipulates that any judicial proceeding is to be conducted in a place other than the consumer's residence (i.e. in a different city, different country).</instr> Is the online terms of service clause unfair under European Law, given that it specifies the governing law and jurisdiction without regard to any potential inconvenience to the consumer?",'score':0}]

In [None]:
example_beam2=[{'prompt':"Does this jurisdiction clause imposed a requirement for judicial proceedings to take place in a location other than the consumer's residence (i.e. in a different city, different country)?",'score':0.88},{'prompt':" Is the online terms of service clause 'You agree to...bring any judicial proceedings....in the state courts in the city and county of San Francisco, California, or federal court for the northern district of California' likely to present a challenge to residents of other countries who may not live in San Francisco or be arbitrating?",'score':0.5},{'prompt':"Examine this online terms of service clause and identify any provision that stipulates that any judicial proceeding is to be conducted in a place other than the consumer's residence (i.e. in a different city, different country).</instr> Is the online terms of service clause unfair under European Law, given that it specifies the governing law and jurisdiction without regard to any potential inconvenience to the consumer?",'score':0}]

In [None]:
########### OUTDATED ##############
# #Expand the beam by creating new prompts using scores as gradients
# def expand(beam,new_prompts=10,label="J"):

#   LABELS={"A":"arbitration","CH":"unilateral change","CR":"content removal","J":"jurisdiction","LAW":"choice of law","LTD":"limitation of liability","TER":"unilateral termination","USE":"contract by using"}

#   candidates = beam

#   meta_prompt = f'Online service contracts can contain clauses that may be deemed potentially unfair under European Law."\n'\
#                 +f'<INS> Your task is to generate the instruction for a ' + LABELS[label] +f' clause classification task. <\INS> Below are some previous instructions with their scores, ranging from 0 to 1.\n'\

#   for index in range(len(beam)):
#     prompt=beam[index]['prompt']
#     score=beam[index]['score']

#     meta_prompt = meta_prompt + f'<InstructionScorePair{index}>' + "Instruction: "+ prompt + "F1-Score : "+ f'{score}' + f'<\\InstructionScorePair{index}>, \n'

# #expansion at each step
#   for i in range(new_prompts):

#     candidate={}

#     #get new prompts from LLM usinf META prompt
#     new_prompt=query_mistral(meta_prompt,1,max_tokens=50)


#     candidate['prompt']=new_prompt
#     candidate['score']=0

#     #append new prompt,score pair
#     candidates.append(candidate)

#   print(f'Candidates : ',candidates)
#   return candidates


# #test
# print(expand([{'prompt':",Does this jurisdiction clause imposed a requirement for judicial proceedings to take place in a location other than the consumer's residence (i.e. in a different city, different country)?",'score':0}]))

In [None]:


def expand_parallel(beam, new_prompts_number, label):
    # Initialize candidates with a copy of the beam to avoid modifying the original list
    LABELS={"A":"arbitration","CH":"unilateral change","CR":"content removal","J":"jurisdiction","LAW":"choice of law","LTD":"limitation of liability","TER":"unilateral termination","USE":"contract by using"}

    candidates = list(beam)

    meta_prompt_base = "Online service contracts can contain clauses that may be deemed potentially unfair under European Law. "\
                       f'<INS> Your task is to generate the instruction for a ' + LABELS[label] +f' clause classification task. <\INS> Below are some previous instructions with their scores, ranging from 0 to 1.\n'

    # Function to generate a single new prompt
    def generate_new_prompt(_):
        meta_prompt = meta_prompt_base
        for index, item in enumerate(beam):
            prompt = item['prompt']
            score = item['score']
            meta_prompt += f'<InstructionScorePair{index}>Instruction: {prompt} F1-Score: {score}<\\InstructionScorePair{index}>, \n'

        # Assume query_mistral generates a new prompt based on meta_prompt
        new_prompt = query_mistral(meta_prompt,1,max_tokens=50)
        return {'prompt': new_prompt, 'score': 0}

    # Using ThreadPoolExecutor to generate new prompts in parallel
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future_to_index = {executor.submit(generate_new_prompt, i): i for i in range(new_prompts_number)}
        for future in concurrent.futures.as_completed(future_to_index):
            # Each new prompt is generated in parallel, but we append them sequentially to maintain order
            candidates.append(future.result())

    print(f'Candidates: ', candidates)
    return candidates


# #SPEED TEST
# import time
# start_time = time.time()
# result=expand([{'prompt':"Does this jurisdiction clause imposed a requirement for judicial proceedings to take place in a location other than the consumer's residence (i.e. in a different city, different country)?",'score':0.88},{'prompt':" Is the online terms of service clause 'You agree to...bring any judicial proceedings....in the state courts in the city and county of San Francisco, California, or federal court for the northern district of California' likely to present a challenge to residents of other countries who may not live in San Francisco or be arbitrating?",'score':0.5},{'prompt':"Examine this online terms of service clause and identify any provision that stipulates that any judicial proceeding is to be conducted in a place other than the consumer's residence (i.e. in a different city, different country).</instr> Is the online terms of service clause unfair under European Law, given that it specifies the governing law and jurisdiction without regard to any potential inconvenience to the consumer?",'score':0}])
# print("RESULTT", len(result))
# end_time = time.time()
# select_time = end_time - start_time
# print(f"The basic selection function took {select_time} s to complete its task")

# start_time = time.time()
# result=expand_parallel([{'prompt':"Does this jurisdiction clause imposed a requirement for judicial proceedings to take place in a location other than the consumer's residence (i.e. in a different city, different country)?",'score':0.88},{'prompt':" Is the online terms of service clause 'You agree to...bring any judicial proceedings....in the state courts in the city and county of San Francisco, California, or federal court for the northern district of California' likely to present a challenge to residents of other countries who may not live in San Francisco or be arbitrating?",'score':0.5},{'prompt':"Examine this online terms of service clause and identify any provision that stipulates that any judicial proceeding is to be conducted in a place other than the consumer's residence (i.e. in a different city, different country).</instr> Is the online terms of service clause unfair under European Law, given that it specifies the governing law and jurisdiction without regard to any potential inconvenience to the consumer?",'score':0}],3,'J')
# print("RESULT", len(result))
# end_time = time.time()
# select_time = end_time - start_time
# print(f"The parallelized selection function took {select_time} s to complete its task")

In [None]:
test_dict = [{"prompt":"Does this clause state that any judicial proceeding is to be conducted in a place other than the consumer's residence (i.e. in a different city, different country)?",
              "score": 0},
             {"prompt":"Is this clause fair in that it states that any judicial proceeding must take place in a location other than the customer\'s residence?",
              "score":0},
             {"prompt":"Under what circumstances may a clause require that a judicial proceeding be conducted in a place other than the consumer's residence?",
              "score":0},
              {"prompt":"Comprehensive instruction for a jurisdiction clause classification task: Assess whether each clause specifies a location for any legal proceedings in a manner deemed",
               "score":0},
               {"prompt":">Does this clause require the consumer to undergo judicial proceedings in a location other than their residence, and is it considered unfair under your standard?",
                "score":0}]

In [None]:
#SELECT K top performing prompts

# def select(beam,top=3,minibatchsize=20, shortened_label='J'):

#   print(f'\n SELECTING top {top} from the beam \n')

#   minibatch=sample_n_clauses(df_train,minibatchsize,label=shortened_label)

#   for index in range(len(beam)):

#     #update scores for each prompt
#     prompt=beam[index]['prompt']
#     beam[index]['score']=get_score(prompt,minibatch)

#   #sort descending and select k best
#   top_k=sorted(beam,key=lambda x:x['score'], reverse=True)

#   print(f'The top {top} selected prompts from beam are: ',top_k[:top])
#   return top_k[:top]

# #test
# result=select(example_beam,1)
# print(result)




In [None]:
def get_score_parallel(prompt, minibatch_df, shortened_label):
    # Initialize a thread-safe Counter to aggregate response counts
    response_counter = Counter({'tp': 0, 'tn': 0, 'fp': 0, 'fn': 0})

    # Function to process each minibatch item in parallel
    def process_minibatch_item(row):
        # Unpacking the row data
        data = row[1]
        query_string = f'Consider the following online terms of service clause: {data[3]}'\
                       f'{prompt} \n Start your answer with "yes" or "no" and then justify your response in no more than 30 words.'

        answer = query_mistral(query_string,0)  # Assuming this function is defined elsewhere
        res = is_fpnp(answer, get_GT(data, shortened_label))  # Assuming is_fpnp and get_GT functions are defined elsewhere

        # Return a counter with the result to aggregate later
        return Counter({res: 1})

    # Using ThreadPoolExecutor to process minibatch items in parallel
    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Map each minibatch item to the processing function
        counters = list(executor.map(process_minibatch_item, minibatch_df.iterrows()))

    # Aggregate results from all counters
    for counter in counters:
        response_counter.update(counter)

    # Compute F1 score
    score = get_f1(response_counter['tp'], response_counter['fp'], response_counter['fn'])  # Assuming get_f1 is defined elsewhere
    return score

#SPEED TEST
# import time

# start_time = time.time()
# result=get_score("Does this clause state that any judicial proceeding is to be conducted in a place other than the consumer's residence (i.e. in a different city, different country)?",sampled_df, "J")
# print("Result", result)
# end_time = time.time()
# select_time = end_time - start_time
# print(f"The basic selection function took {select_time} s to complete its task")

# start_time = time.time()
# result=get_score_parallel("Does this clause state that any judicial proceeding is to be conducted in a place other than the consumer's residence (i.e. in a different city, different country)?",sampled_df, "J")
# print("Result", result)
# end_time = time.time()
# select_time = end_time - start_time
# print(f"The parallelized selection function took {select_time} s to complete its task")

In [None]:
def select_parallel(beam, top, minibatchsize, shortened_label):
    print(f'\nSELECTING top {top} from the beam\n')

    # Assuming df_train and sample_n_clauses are defined elsewhere
    minibatch_df = sample_n_clauses(df_train, minibatchsize, shortened_label)

    # Function to get score in parallel for each beam item
    def score_beam_item(beam_item):
        prompt = beam_item['prompt']
        # Correctly call the get_score_parallel function with the necessary arguments
        score = get_score_parallel(prompt, minibatch_df, shortened_label)  # Make sure this matches the actual function signature
        return {'prompt': prompt, 'score': score}

    # Using ThreadPoolExecutor to compute scores in parallel
    with concurrent.futures.ThreadPoolExecutor() as executor:
        # Map returns results in the order that the calls were made
        result = list(executor.map(score_beam_item, beam))

    # Update beam with scores
    for index, scored_item in enumerate(result):
        beam[index]['score'] = scored_item['score']

    # Sort descending and select top k
    top_k = sorted(beam, key=lambda x: x['score'], reverse=True)[:top]

    print(f'The top {top} selected prompts from beam are:', top_k)
    return top_k

# Assuming 'example_beam' and 'get_score' function are defined elsewhere along with 'sample_n_clauses' and 'df_train'
# result = select_parallel(example_beam, 1)
# print(result)


#SPEED TEST
# import time

# start_time = time.time()
# result=select(test_dict)
# end_time = time.time()
# select_time = end_time - start_time
# print(f"The basic selection function took {select_time} s to complete its task")

# start_time = time.time()
# result=select_parallel(test_dict,3,4,'J')
# end_time = time.time()
# select_time = end_time - start_time
# print(f"The parallelized selection function took {select_time} s to complete its task")

In [None]:
# def get_score(prompt,minibatch_df, shortened_label):

#   response_dict={'tp':0,'tn':0,'fp':0,'fn':0}

#   #evaluate prompts on entire minibatch
#   for data in minibatch_df.iterrows():
#     #print(data[1][3])
#     query_string=f'Consider the following online terms of service clause: {data[1][3]}'\
#                 +f'{prompt} \n Start your answer with "yes" or "no" and then justify your response in no more than 30 words.'

#     answer=query_mistral(query_string)
#     #print(answer)
#     res=is_fpnp(answer,get_GT(data[1],shortened_label))
#     response_dict[res]+=1

#   #copmute F1 score
#   score=get_f1(response_dict['tp'],response_dict['fp'],response_dict['fn'])
#   #print(score)
#   return score

#test
#print(get_score("Does this clause state that any judicial proceeding is to be conducted in a place other than the consumer's residence (i.e. in a different city, different country)?",sampled_df))

In [None]:
import json

def save_to_file(dict_res,label,method):

  # Convert dictionary to JSON string
  json_str = json.dumps(dict_res, indent=2)

  # Specify the file path
  file_path = f"result_{label}_{method}.json"

  # Write JSON string to a text file
  with open(file_path, 'w') as file:
      file.write(json_str)

  print(f"Dictionary has been saved to {file_path}")

# Main function

In [None]:
#main function : ALGORITHM 1

def optimization(OGprompt,stopping_criterion,max_iter, shortened_label):
  result_dict={}
  #initial prompt in beam
  init_dict={'prompt':OGprompt,'score':0}
  beam=[]
  beam.append(init_dict)
  minibatch_df=sample_n_clauses(df_train,40,shortened_label)
  beam[0]['score']=get_score_parallel(beam[0]['prompt'],minibatch_df, shortened_label)

  eval=0

  print("Initial beam: ", beam)
  i=0

  while(eval<=stopping_criterion and  i<max_iter):

    print(f'****************Iteration {i+1} of {max_iter}****************\n')
    beam=expand_parallel(beam,7, shortened_label)
    beam=select_parallel(beam,3,40,shortened_label)
    result_dict[f"iteration_{i}"]=beam
    print(f'****************End of iteration {i+1} ****************\n')

    eval=beam[0]['score']

    i+=1
  best=select_parallel(beam,1,40,shortened_label)
  result_dict["best_prompt"]=best
  save_to_file(result_dict,shortened_label,"Scores_as_gradients")
  return best

#test
# result=optimization("Does this clause stipulate that duties to pay damages by the provider are limited or excluded?",0.98,10, "LTD")
# print(result)

In [None]:
# TEST
result_A=optimization('Does this clause describe an arbitration dispute resolution process that is not fully optional to the consumer?',0.98,10,"A")
print(f"RESULT ARBITRATION : {result_A}")
result_CH=optimization('Does this clause specify conditions under which the service provider could amend and modify the terms of service and/or the service itself?',0.98,10,"CH")
print(f"RESULT UNILATERAL CHANGE : {result_CH}")
result_CR=optimization("Does this clause indicate conditions for content removal in the service provider's full discretion, and/or at any time for any or no reasons and/or without notice nor possibility to retrieve the content.",0.98,10,"CR")
print(f"RESULT CONTENT REMOVAL : {result_CR}")
result_J=optimization("Does this clause state that any judicial proceeding is to be conducted in a place other than the consumer's residence (i.e. in a different city, different country)?",0.98,10,"J")
print(f"RESULT JURISDICTION : {result_J}")
result_LAW=optimization('Does the clause define the applicable law as different from the law of the consumer’s country of residence?',0.98,10,"LAW")
print(f"RESULT CHOICE OF LAW : {result_LAW}")
result_LTD=optimization('Does this clause stipulate that duties to pay damages by the provider are limited or excluded?',0.98,10,"LTD")
print(f"RESULT LIMITATION OF LIABILITY : {result_LTD}")
result_TER=optimization('Does this clause stipulate that the service provider may suspend or terminate the service at any time for any or no reasons and/or without notice?',0.98,10,"TER")
print(f"RESULT UNILATERAL TERMINATION : {result_TER}")
result_USE=optimization('Does this clause stipulate that the consumer is bound by the terms of use of a specific service, simply by using the service, without even being required to mark that he or she has read and accepted them?',0.98,10,"USE")
print(f"RESULT AGREEMENT BY USING : {result_USE}")

Initial beam:  [{'prompt': 'Does this clause stipulate that the consumer is bound by the terms of use of a specific service, simply by using the service, without even being required to mark that he or she has read and accepted them?', 'score': 0.7241379310344828}]
****************Iteration 1 of 10****************

Candidates:  [{'prompt': 'Does this clause stipulate that the consumer is bound by the terms of use of a specific service, simply by using the service, without even being required to mark that he or she has read and accepted them?', 'score': 0.7241379310344828}, {'prompt': ' Classify the following clause: "By accessing our online platform, you agree to our terms and conditions." <F1-score: 0.6', 'score': 0}, {'prompt': ' The instruction provided should specify the use of the clause in the online service contract, with no mention of any terms or conditions required for the consumer to be aware of when using the service.', 'score': 0}, {'prompt': " <InstructionScorePair1>Instru

In [None]:
# TEST
print(f"RESULT ARBITRATION : {result_A}")
print(f"RESULT UNILATERAL CHANGE : {result_CH}")
print(f"RESULT CONTENT REMOVAL : {result_CR}")
print(f"RESULT JURISDICTION : {result_J}")
print(f"RESULT CHOICE OF LAW : {result_LAW}")
print(f"RESULT LIMITATION OF LIABILITY : {result_LTD}")
print(f"RESULT UNILATERAL TERMINATION : {result_TER}")
print(f"RESULT AGREEMENT BY USING : {result_USE}")

RESULT ARBITRATION : [{'prompt': ' Suggested Task: Identify and classify arbitration clauses according to their type (expedited, default, etc.).', 'score': 0.8292682926829268}]
RESULT UNILATERAL CHANGE : [{'prompt': 'Does this clause specify conditions under which the service provider could amend and modify the terms of service and/or the service itself?', 'score': 0.8717948717948718}]
RESULT CONTENT REMOVAL : [{'prompt': " <InstructionScorePair3>Instruction:  Task: Evaluate online service contracts for content removal practices.\nInstructions:\n\n1. Does this clause allow for content removal without prior notice, and/or at the service provider's discret", 'score': 0.926829268292683}]
RESULT JURISDICTION : [{'prompt': ' Instruction: Identify clauses in online service contracts that indicate a jurisdiction requirement for judicial proceedings outside of the consumer\'s primary residence. Use the examples below to test your F1-Score: \n\n- "In the event', 'score': 1.0}]
RESULT CHOICE OF 