In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os
os.chdir('/users/sgdbareh/volatile/ECHR_Importance')
import glob
import numpy as np
from API_key import openai_key
from openai import OpenAI
client = OpenAI(api_key=openai_key)

### Read in Data

In [2]:
data_directory = './Data/'

In [3]:
# Set the directory for the txt files
questions = data_directory +'questions/'
subject_matter = data_directory + 'subject_matter/'

# Initialize an empty list to store the data
questions_list = []
subject_matter_list = []

# Iterate over the subject matter txt files
for file in os.listdir(subject_matter):
    # Read the txt file into a dataframe
    with open(subject_matter + file, 'r') as text:
        data = [file, text.read()]
    # Append the data to the respective lists
    subject_matter_list.append(data)

# Iterate over the question txt files
for file in os.listdir(questions):
    # Read the txt file into a dataframe
    with open(questions + file, 'r') as text:
        data = [file, text.read()]
    # Append the data to the respective lists
    questions_list.append(data)

#questions_list
questions_df = pd.DataFrame(questions_list,columns=['Filename','Questions'])
subject_matter_df = pd.DataFrame(subject_matter_list,columns=['Filename','Subject Matter'])

In [4]:
subject_matter_df['Filename'] = subject_matter_df['Filename'].str.replace('.txt', '')
questions_df['Filename'] = questions_df['Filename'].str.replace('.txt', '')

### Word Count

In [5]:
questions_df['Word Count'] = questions_df['Questions'].str.split().str.len()
subject_matter_df['Word Count'] = subject_matter_df['Subject Matter'].str.split().str.len()

In [10]:
subject_matter_df[subject_matter_df['Word Count'] <100].sort_values(by='Word Count', ascending=False)

Unnamed: 0,Filename,Subject Matter,Word Count
794,001-115941,THE FACTS\nThe applicant is a Libyan national ...,99
6689,001-214177,The application concerns the domestic authorit...,99
5595,001-196071,By a Supreme Court judgment of 12 February 201...,99
3321,001-164840,The application concerns the right of the appl...,99
5114,001-188508,The application concerns the refusal of the ap...,99
...,...,...,...
5008,001-187206,The application concerns the applicant’s inabi...,18
4390,001-179588,The application concerns five Sudanese nationa...,18
6970,001-223412,The applications concern the length of crimina...,17
5148,001-189301,The application concerns the deportation to Af...,17


### Link Data

In [7]:
# create merged df
df = pd.merge(questions_df, subject_matter_df, on='Filename')

In [8]:
#create unmatched df to check for missing data
unmatched_df = pd.merge(questions_df, subject_matter_df, on='Filename', how='outer', indicator=True)
unmatched_rows = unmatched_df[unmatched_df['_merge'] != 'both']
len(unmatched_rows)

273

In [9]:
#see how missingness is distributed according to the files
missing_data = []

for file in os.listdir(data_directory):
    # Read the txt file into a dataframe
    if file.endswith('.txt'):
        with open(data_directory + file, 'r') as text:
            data = pd.read_csv(text, sep=' ',header=None)
            missing_data.append(data)
            print(f'filename {file}; count {len(data)}')

filename questions_missing.txt; count 141
filename subject_missing.txt; count 339


In [None]:
questions_missing = missing_data[0]
subject_missing = missing_data[1]

In [None]:
#merge togehter the missing data to see if it affects both files or just one for a case
x = pd.merge(subject_missing, questions_missing, on=0, how='outer', indicator=True)

In [None]:
x.rename(columns={0: 'Filename'}, inplace=True)

In [None]:
#use these lists to manually check data quality
both = x[x['_merge'] == 'both']
right = x[x['_merge'] == 'right_only']
left = x[x['_merge'] == 'left_only']

### Data Processing

In [None]:
df = df.rename(columns={'Word Count_x': 'Question_Count', 'Word Count_y': 'Subject_Matter_Count'})


In [None]:
#remove whitespace
df['Questions'] = df['Questions'].str.strip()
df['Subject Matter'] = df['Subject Matter'].str.strip()

In [None]:
#remove repeat phrases
df['Subject Matter'] = df['Subject Matter'].str.replace('THE FACTS\n', '')
df['Subject Matter'] = df['Subject Matter'].str.replace('\n', '')
df['Questions'] = df['Questions'].str.replace('\n', '')

In [None]:
#link outcome label
labels = pd.read_csv(data_directory + 'importance_labels.csv')
labels = labels.rename(columns={'itemid':'Filename'})

In [None]:
df = pd.merge(df, labels, on='Filename')

In [None]:
df.to_pickle('full_data_w_court')

In [None]:
# check importance counts
importance_counts = df['importance'].value_counts()


In [None]:
importance_counts = pd.DataFrame(importance_counts)

In [None]:
importance_counts['pct'] = importance_counts.apply(lambda x: x / x.sum())

In [None]:
importance_counts['sample_cases'] = importance_counts['pct'].apply(lambda x: round(x * 50))

In [None]:
importance_counts

In [None]:
def sampleHelper(df, importance, size):

    return df[df['importance'] == importance].sample(size, random_state=154)

In [None]:
def sample(df):

    sample_list = []

    for imp in df['importance'].unique():
        if imp == 1:
            sample_list.append(sampleHelper(df, imp, 5))
        elif imp == 2:
            sample_list.append(sampleHelper(df, imp, 5))
        elif imp == 3:
            sample_list.append(sampleHelper(df, imp, 11))
        else:
            sample_list.append(sampleHelper(df, imp, 31))

    return pd.concat(sample_list)

In [None]:
sample_df = sample(df)

In [None]:
sample_df

In [None]:
valid_data = sample_df[['Filename','Questions','Subject Matter','importance']]

In [None]:
test_data = df.drop(sample_df.index)
test_data = test_data[['Filename','Questions','Subject Matter','importance']]

In [None]:
valid_data

### GPT Tests

#### Experiments

In [None]:
# read in data
df = pd.read_pickle('valid_data.pkl')

In [None]:
df_main = df[:-4]
df_examples = df[-4:]

In [None]:
df_main

In [None]:
JSON_SCHEMAS = [{"Case Importance":"int (1-4)","Summary":"string (description of the case)","Reasoning":"string (give your reason for the importance)" },
                {"Case Importance":"int (1-4)"}
                ]

In [None]:
#To Do:
#- explore different roles
#- explore different prompts both few-shot and zero-shot
#- explore different temperature and max_tokens
#- 

### Experiment 1: What does GPT-4 know about the ECtHR? Give it the case id and appNo and see if it knows the importance of the case already...
#TO DO:
## Write the classification code
## Write the code
## COST it

### Experiment 2: Prediction of the importance of the case based on the questions and subject matter
#TO DO:
## Write the code - inc processing the JSON responses and saving them
## COST it

##### Experiment 1

In [None]:
metadata = pd.read_json('/users/sgdbareh/volatile/ECHR_Importance/Data/overlap_cases/pruned_COMMUNICATEDCASES_meta.json',lines=True)

In [None]:
metadata.rename(columns={'itemid':'Filename'}, inplace=True)

In [None]:
exp1_data = pd.merge(df_main, metadata, on='Filename')

In [None]:
exp1_data.columns

In [None]:
exp1_data = exp1_data[['Filename','importance_x','appno','docname']]

In [None]:
exp1_data.rename(columns={'importance_x':'importance'}, inplace=True)

In [None]:
def exp1_get_prompt(schema, name, appnos, prompt_type='first'):

    match prompt_type:
        #option to say don't know, no info on HUDOC given
        case 'first':
            exp1_prompt = f''' You are going to tell me the importance of the cases in the European Court of Human Rights. These values are given to every case after a judgment
                or decision has been received. 
                Using the information given to you tell me the case importance giving a response of either key case, 1, 2 or 3. 
                If you do not know the importance, state that you do not have enough information.
                The output should be given directly in JSON format, with the following schema: {schema}.
                The name of the case is {name} and the application number/s is/are {appno}.
                '''
        #option to say don't know, info on HUDOC given
        case 'second':
            exp1_prompt = f''' You are going to tell me the importance of the cases in the European Court of Human Rights. These values are given to every case after a judgment
                or decision has been received. The case importance is part of the metadata on HUDOC.
                Using the information given to you tell me the importance of the case giving a response of either key case, 1, 2 or 3. 
                If you do not know the importance, state that you do not have enough information.
                The output should be given directly in JSON format, with the following schema: {schema}.
                The name of the case is {name} and the application number/s is/are {appno}.
                '''
        #no option to say don't know, info on HUDOC given
        case 'third':
            exp1_prompt = f''' You are going to tell me the importance of the cases in the European Court of Human Rights. These values are given to every case after a judgment
                or decision has been received. The case importance is part of the metadata on HUDOC.
                Using the information given to you tell me the importance of the case giving a response of either key case, 1, 2 or 3. 
                The output should be given directly in JSON format, with the following schema: {schema}.
                The name of the case is {name} and the application number/s is/are {appno}.
                '''
        #no option to say don't know, no info on HUDOC given
        case 'fourth':
            exp1_prompt = f''' You are going to tell me the importance of the cases in the European Court of Human Rights. These values are given to every case after a judgment
                or decision has been received. 
                Using the information given to you tell me the importance of the case giving a response of either key case, 1, 2 or 3. 
                The output should be given directly in JSON format, with the following schema: {schema}.
                The name of the case is {name} and the application number/s is/are {appno}.
                '''
        #demands LLM to use HUDOC information, no option to say don't know
        case 'fifth':
            exp1_prompt = f''' You are going to tell me the importance of the cases in the European Court of Human Rights.
                Find the information from HUDOC and tell me the importance of the case giving a response of either key case, 1, 2 or 3. 
                The output should be given directly in JSON format, with the following schema: {schema}.
                The name of the case is {name} and the application number/s is/are {appno}.
                '''

    return exp1_prompt

In [None]:
exp1_data.index[1]

In [None]:
test_case = exp1_data.iloc[0]

In [None]:
test_case

In [None]:
prompt_exp1 = exp1_get_prompt(JSON_SCHEMAS[1], test_case['docname'], test_case['appno'],'first')

In [None]:
prompt_exp1

In [None]:
response = client.chat.completions.create(
  model="gpt-4o",
  messages=[{"role": "user", "content": prompt_exp1}],
  response_format={'type': 'json_object'},
  max_tokens=500,
  temperature=0
)

In [None]:
response

In [None]:
x = response.choices[0].message.content

In [None]:
try:
    data = json.loads(x)
except (json.JSONDecodeError, IndexError):
    raise IndexError(f'Error in decoding JSON response: {x}')

In [None]:
test_df = pd.DataFrame()

In [None]:
# Create a DataFrame from the loaded JSON data
temp_df = pd.DataFrame(data,index=[0])

# Concatenate the DataFrame to 'test_df'
test_df = pd.concat([test_df, temp_df], ignore_index=True)


In [None]:

pd.DataFrame(data, index=[0])

In [None]:
#evaluation of json files
pd.read_pickle('exp_1_output.pkl')

##### Experiment 2

In [None]:
from GPT_Experiments import create_examples

In [None]:
JSON_SCHEMAS = [{
    "Case Importance": {"Importance":"int (1-4)","Summary":"string (description of the case)","Reasoning":"string (give your reason for the importance)" }},
    {"Case Importance": {"Importance":"int (1-4)"}}]

In [None]:
examples = create_examples(df[-4:],text=3)


In [None]:
def get_prompt(row, zero_shot:bool =True, text:int = 3, examples:list = [], schema:dict = JSON_SCHEMAS[0]):

    '''
    Function to generate a prompt for the GPT-4o model.
    
    Parameters: 
    row: pd.Series
        A row from the dataframe containing the data.
    zero_shot: bool
        A boolean to determine if the prompt is for zero-shot learning.
    text: int
        The section/s of the text to include in the prompt:
            1 = Subject Matter
            2 = Questions
            3 = Both
    examples: list
        A list of the examples to include in the prompt.
        
    Returns:
    prompt: str
        The prompt to be used for the GPT-4o model.
    '''

    match text:
        case 1:
            text = row['Subject Matter']
            text_amount = 'subject matter of the case'
        case 2:
            text = row['Questions']
            text_amount = 'questions asked to the parties'
        case 3:
            text = row['Subject Matter'] + ' ' + row['Questions']
            text_amount = 'subject matter of the case and the questions asked to the parties'
        case _:
            raise ValueError('Invalid text value. Please enter a value between 1 and 3.')

    if zero_shot:
        additional_context = ''
    else:
        #examples = [f'Importance: {i}\n{e}' for i, e in zip(row['importance'], examples)]
        additional_context = f'''You are also given a number of examples for each level of importance. 
                                Level 1: {examples[0]}; Level 2: {examples[1]}; Level 3: {examples[2]}; Level 4: {examples[3]}'''

    importance_levels = '''1: These are the most important and have been selected as key cases and have been selected for publication in the Court\'s official reports; 
                           2: The case is of high importance. The case makes a significant contribution to the development, clarification or modification of its case law, either generally or in relation to a particular case; 
                           3: The case is of medium importance. The case while not making a significant contribution to the case-law, nevertheless it goes beyond merely applying existing case law; 
                           4: The case is of low importance. The case is of limited interest and simply applies existing case law'''
    
    prompt = f''' You are a lawyer in the European Court of Human Rights, and your goal is to predict the importance of a case, based on information provided from a communicated case. Importance in a legal setting refers to the significance of a case in terms of its impact on the development of case law.
    The following information is provided to you:
    You will be given a communicated case, including the {text_amount}.
    You are given a description of the different levels of importance: {importance_levels}. 1 is the highest and 4 is the lowest.
    We would expect that there would be fewer cases of higher importance than lower importance.
    {additional_context}.
    Based only on the information given to you predict the importance of the case according to the criteria given, giving a response of either 1, 2, 3 or 4. 
    If you do not have enough information to make a prediction, state that you do not have enough information.
    The output should be given directly in JSON format, with the following schema: {schema}.
    The communicated case information you should base your judgement on is as follows: {text}.
    '''

    return prompt

In [None]:
#basic test
prompt = get_prompt(df.iloc[0],True,3,examples=examples,schema=JSON_SCHEMAS[0])

In [None]:
sub = df.iloc[0]['Subject Matter']
q = df.iloc[0]['Questions']

In [None]:
second_prompt = f'The communicated case information you should base your judgement on is as follows: {sub} {q}.'

In [None]:
second_prompt

In [None]:
prompt.strip()

In [None]:
response = client.chat.completions.create(
  model="gpt-4o",
  messages=[{"role": "system", "content": prompt}],
  response_format={'type': 'json_object'},
  seed=42,
  max_tokens=500,
  temperature=0,
  top_p=1
  
)

In [None]:
print(response.choices[0].message.content)


#### MONITOR BATCH API

In [3]:
client.batches.list(limit=3)

SyncCursorPage[Batch](data=[Batch(id='batch_iERO0PhrkfTR0wDP6ZJRC7n7', completion_window='24h', created_at=1720518920, endpoint='/v1/chat/completions', input_file_id='file-8Fl8zcTcNKjUUuwkXI3fBDum', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1720518930, error_file_id=None, errors=None, expired_at=None, expires_at=1720605320, failed_at=None, finalizing_at=1720518928, in_progress_at=1720518921, metadata={'description': 'Experiment chamber_GC:  experiment_chamber_GC_False_3_10_.jsonl'}, output_file_id='file-1ht4nTfQImfl45JfxHh9DZFe', request_counts=BatchRequestCounts(completed=54, failed=0, total=54)), Batch(id='batch_ubqHnFqHNdZBmgB4uAe9oSib', completion_window='24h', created_at=1720518919, endpoint='/v1/chat/completions', input_file_id='file-mi3tYuwgs51vws86KXvTpSyL', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1720518930, error_file_id=None, errors=None, expired_at=None, expires_at=1720605319, fail

### RESULTS CHECK

In [3]:
from results import combine_results, process_results, confusion_matrix, data_process, process_as_binary, score_results

data = pd.read_pickle('valid_data.pkl')
data = data_process(data[:-4])   

results = combine_results('./Results/experiment_2_valid','Case Importance')
for name,result in results.items():
    print(name)
    y_pred, y_true = process_results(result,data,experiment=2)
    score_results(y_true,y_pred)
    #confusion_matrix(y_true,y_pred)
    print('\n')

{'  experiment_2_False_3_1.jsonl': {'001-179979': 3, '001-180761': 2, '001-169580': 2, '001-165054': 2, '001-209011': 3, '001-110558': 2, '001-187172': 4, '001-118892': 3, '001-200157': 3, '001-220247': 2, '001-168613': 4, '001-147363': 2, '001-189701': 4, '001-158667': 4, '001-202689': 3, '001-175367': 2, '001-200394': 4, '001-178986': 3, '001-145890': 4, '001-167965': 2, '001-177312': 2, '001-127205': 3, '001-111096': 2, '001-208396': 3, '001-193615': 2, '001-110357': 3, '001-184561': 2, '001-189564': 2, '001-163792': 4, '001-126480': 4, '001-159740': 3, '001-175020': 3, '001-200399': 2, '001-203593': 2, '001-192530': 2, '001-209204': 2, '001-145320': 2, '001-115227': 4, '001-191601': 3, '001-111408': 4, '001-187541': 1, '001-155132': 2, '001-149172': 2, '001-157677': 3, '001-112348': 2, '001-114705': 2, '001-193400': 2, '001-175517': 2, '001-117778': 2, '001-112158': 2}, '  experiment_2_True_3_1.jsonl': {'001-179979': 2, '001-180761': 2, '001-169580': 2, '001-165054': 2, '001-209011

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
data = pd.read_pickle('valid_data.pkl')
data = data_process(data[:-4])   

results = combine_results('./Results/experiment_2_valid_binary')
for name,result in results.items():
    print(name)
    y_pred, y_true = process_results(result,data,experiment=2)
    y_true = process_as_binary(y_true)
    #y_pred, y_true = process_as_binary(y_pred,y_true)
    #confusion_matrix(y_true,y_pred)
    print('\n')

In [None]:
data = pd.read_pickle('valid_data.pkl')
data = data_process(data[:-4])   

results = combine_results('./Results/experiment_1_valid')
for name,result in results.items():
    print(name)
    y_pred, y_true = process_results(result,data,experiment=1)
    #y_true = process_as_binary(y_true)
    #y_pred, y_true = process_as_binary(y_pred,y_true)
    #confusion_matrix(y_true,y_pred)
    print('\n')

In [None]:
data = pd.read_pickle('valid_data.pkl')
data = data_process(data[:-4])   

results = combine_results('./Results/experiment_sample')

results_list = []

for name,result in results.items():
    print(name)
    y_pred, y_true = process_results(result,data,experiment=1)
    #y_true = process_as_binary(y_true)
    #y_pred, y_true = process_as_binary(y_pred,y_true)
    scores,mcc = score_results(y_true,y_pred)
    results_list.append([name,scores[2],mcc])
    print('\n')

In [None]:
data = pd.read_pickle('valid_data.pkl')
data = data_process(data[:-4])   

results = combine_results('./Results/experiment_key_case')

#results_list = []

#for name,result in results.items():
#    print(name)
#    y_pred, y_true = process_results(result,data,experiment=1)
    #y_true = process_as_binary(y_true)
    #y_pred, y_true = process_as_binary(y_pred,y_true)
#    scores,mcc = score_results(y_true,y_pred)
 #   results_list.append([name,scores[2],mcc])
    print('\n')

In [None]:
data = pd.read_pickle('valid_data.pkl')
data = data_process(data[:-4])   

results = combine_results('./Results/experiment_summ_pred')

results_list = []

for name,result in results.items():
    print(name)
    y_pred, y_true = process_results(result,data,experiment=1)
    #y_true = process_as_binary(y_true)
    #y_pred, y_true = process_as_binary(y_pred,y_true)
    scores,mcc = score_results(y_true,y_pred)
    results_list.append([name,scores[2],mcc])
    print('\n')

In [None]:
data

### Produce Summarisations

In [None]:
from GPT_Experiments import save_file, Experiment_2

df = pd.read_pickle('valid_data.pkl')
exp = Experiment_2(data=df,content='both')
data = exp.data
output = []

for i in range(len(data)):

    sub = data.iloc[i]['Subject Matter']
    q = data.iloc[i]['Questions']
    schema = {"Case Summary": "string (description of the case)"}

    prompt = f"""You are a lawyer in the European Court of Human Rights and you need to summarise a communicated case. A communicated case is an inter-state application that has been communicated to the respondent Government. It typically outlines the facts of the case, legal issues, alleged violations, relevant legal arguments and questions to the parties.
    I will provide you both parts of a communicated case, the subject matter and the questions to the parties. Please provide a summary of the case in your own words.
    The output should be given directly in JSON format, with the following schema: {schema} .
    The communicated case information you should base your summary on is as follows: {sub} {q}."""
    filename = data.iloc[i]['Filename']
    template = {"custom_id": f'{filename}', "method": "POST", "url": "/v1/chat/completions", "body": {"model": "gpt-4o", "messages": [{"role": "user", "content": prompt}],'response_format':{'type': 'json_object'},'max_tokens':500,'temperature':0,'top_p':0.5, 'seed':42}}
    output.append(template)

save_file(output,f'./Batches/experiment_summarise',batch_name='experiment_summarise')



In [None]:
os.getcwd()

In [None]:
import json

results = {}

for file in os.listdir(f'/users/sgdbareh/volatile/ECHR_Importance/Results/experiment_summarise'):
        
    if file.endswith('.jsonl'):

        individual_result = {}
        data = pd.read_json(f'/users/sgdbareh/volatile/ECHR_Importance/Results/experiment_summarise/{file}',lines=True)
        data = data[['custom_id','response']]
        
        for i in range(len(data)):

            result = data['response'][i]['body']['choices'][0]['message']['content']
            #print(result)
            result = json.loads(result)
            #print(data["custom_id"][i])
            individual_result[f'{data["custom_id"][i]}'] = result['Case Summary']
            #print(individual_result)

        results[file] = individual_result

In [None]:
for name,summaries in results.items():
    print(summaries)
    print('\n')
    df = pd.DataFrame(summaries.items(), columns=['Filename','Case Summary'])

In [None]:
data.reset_index(drop=True, inplace=True)

In [None]:
merged_df = df.merge(data, left_index=True, right_index=True)

In [None]:
merged_df = merged_df[['Case Summary','Filename_y']]

In [None]:
merged_df.rename(columns={'Filename_y':'Filename'}, inplace=True)

In [None]:
x = pd.read_pickle('valid_data.pkl')

merged_2_df = x.merge(merged_df, on='Filename')

In [None]:
merged_2_df = merged_2_df[['Filename','importance','Case Summary']]

In [None]:
merged_2_df.to_pickle('summarise_data.pkl')

In [None]:
merged_2_df

### Decision vs Judgment Experiment

In [4]:
os.getcwd()

'/mnt/data1/users/sgdbareh/ECHR_Importance'

In [6]:
file_mapping = {
    'pruned_ADMISSIBILITY_meta.json': 'Decision',
    'pruned_CHAMBER_meta.json': 'Judgment',
    'pruned_GRANDCHAMBER_meta.json': 'Judgment',
    'pruned_COMMITTEE_meta.json': 'Judgment',
    'pruned_DECGRANDCHAMBER_meta.json': 'Decision',
    'pruned_ADMISSIBILITYCOM_meta.json': 'Decision'
    }

data['Court'] = data['source_file'].map(file_mapping)

In [None]:
data.to_pickle('Court_Labelled_Data.pkl')

In [25]:
from results import combine_results, process_results, confusion_matrix, data_process, process_as_binary, score_results

df = pd.read_pickle('valid_data.pkl')
court_label_data = pd.read_pickle('Court_Labelled_Data.pkl')
df = pd.merge(df, court_label_data, on=['Filename','Questions','Subject Matter','importance'],how='left')
#print(df_main)
df_main = df[:-4]

df_main = df_main[['Filename','Court']]
df_main = df_main.rename(columns={'Court':'real_court'})   

results = combine_results('./Results/experiment_court_pred_2',keyword='Court')
results_list = []

for name,result in results.items():
    print(name)
    y_pred, y_true = process_results(result,df_main,experiment=2,y_true_keyword='real_court',y_pred_keyword='Court')

    #print(y_pred.value_counts(),'\n', y_true.value_counts())
    print(y_pred,'\n', y_true)

    #y_true = process_as_binary(y_true)
    #y_pred, y_true = process_as_binary(y_pred,y_true)
    #scores,mcc = score_results(y_true,y_pred)
    #results_list.append([name,scores[2],mcc])
    #confusion_matrix(y_true,y_pred)
    print('\n')

{'  experiment_court_pred_3_False_3_8_.jsonl': {'001-179979': 'Judgment', '001-180761': 'Judgment', '001-169580': 'Judgment', '001-165054': 'Judgment', '001-209011': 'Judgment', '001-110558': 'Judgment', '001-187172': 'Judgment', '001-118892': 'Judgment', '001-200157': 'Judgment', '001-220247': 'Judgment', '001-168613': 'Judgment', '001-147363': 'Judgment', '001-189701': 'Judgment', '001-158667': 'Judgment', '001-202689': 'Judgment', '001-175367': 'Judgment', '001-200394': 'Judgment', '001-178986': 'Judgment', '001-145890': 'Decision', '001-167965': 'Judgment', '001-177312': 'Judgment', '001-127205': 'Judgment', '001-111096': 'Judgment', '001-208396': 'Judgment', '001-193615': 'Judgment', '001-110357': 'Judgment', '001-184561': 'Judgment', '001-189564': 'Judgment', '001-163792': 'Judgment', '001-126480': 'Judgment', '001-159740': 'Judgment', '001-175020': 'Judgment', '001-200399': 'Judgment', '001-203593': 'Judgment', '001-192530': 'Judgment', '001-209204': 'Judgment', '001-145320': 'J

### Chamber v GS v Com

In [11]:
#data prep
data = pd.read_pickle('./full_data_w_court')
file_mapping = {
    'pruned_ADMISSIBILITY_meta.json': 'Chamber',
    'pruned_CHAMBER_meta.json': 'Chamber',
    'pruned_GRANDCHAMBER_meta.json': 'Grand Chamber',
    'pruned_COMMITTEE_meta.json': 'Committee',
    'pruned_DECGRANDCHAMBER_meta.json': 'Grand Chamber',
    'pruned_ADMISSIBILITYCOM_meta.json': 'Committee'
    }

data['Court'] = data['source_file'].map(file_mapping)

In [14]:
data.to_pickle('Chamber_Data_1.pkl')

In [11]:
from results import combine_results, process_results, confusion_matrix, data_process, process_as_binary, score_results
from GPT_Experiments import generate_example_candidates

df = pd.read_pickle('valid_data.pkl')
court_label_data = pd.read_pickle('Chamber_Data_1.pkl')
df = pd.merge(df, court_label_data, on=['Filename','Questions','Subject Matter','importance'],how='left')
#print(df_main)
#df_main = df[:-4]

df_example = generate_example_candidates(df,keyword='Court',labels =['Committee','Chamber','Grand Chamber'])
df_main = df.drop(df_example.index)

df_main = df_main[['Filename','Court']]
df_main = df_main.rename(columns={'Court':'real_court'})   

results = combine_results('./Results/experiment_chamber_GC',keyword='Court')
results_list = []

for name,result in results.items():
    print(name)
    y_pred, y_true = process_results(result,df_main,experiment=2,y_true_keyword='real_court',y_pred_keyword='Court')

    print(y_pred.value_counts(),'\n', y_true.value_counts())
    #print(y_pred,'\n', y_true)

    #y_true = process_as_binary(y_true)
    #y_pred, y_true = process_as_binary(y_pred,y_true)
    #scores,mcc = score_results(y_true,y_pred)
    #results_list.append([name,scores[2],mcc])
    #confusion_matrix(y_true,y_pred)
    print('\n')

{'  experiment_chamber_GC_False_3_10_.jsonl': {'001-179979': 'Chamber', '001-180761': 'Grand Chamber', '001-169580': 'Chamber', '001-165054': 'Chamber', '001-209011': 'Chamber', '001-110558': 'Chamber', '001-187172': 'Chamber', '001-118892': 'Chamber', '001-200157': 'Chamber', '001-220247': 'Grand Chamber', '001-168613': 'Committee', '001-147363': 'Chamber', '001-189701': 'Committee', '001-158667': 'Committee', '001-202689': 'Chamber', '001-175367': 'Grand Chamber', '001-200394': 'Chamber', '001-178986': 'Chamber', '001-145890': 'Committee', '001-167965': 'Chamber', '001-177312': 'Grand Chamber', '001-127205': 'Chamber', '001-111096': 'Grand Chamber', '001-208396': 'Chamber', '001-193615': 'Chamber', '001-110357': 'Chamber', '001-184561': 'Chamber', '001-189564': 'Chamber', '001-163792': 'Chamber', '001-126480': 'Chamber', '001-159740': 'Chamber', '001-175020': 'Chamber', '001-200399': 'Chamber', '001-203593': 'Chamber', '001-192530': 'Grand Chamber', '001-209204': 'Chamber', '001-1453

### Chamber v Com

In [15]:
#data prep
#data prep
data = pd.read_pickle('./full_data_w_court')
file_mapping = {
    'pruned_ADMISSIBILITY_meta.json': 'Chamber',
    'pruned_CHAMBER_meta.json': 'Chamber',
    'pruned_GRANDCHAMBER_meta.json': 'Chamber',
    'pruned_COMMITTEE_meta.json': 'Committee',
    'pruned_DECGRANDCHAMBER_meta.json': 'Chamber',
    'pruned_ADMISSIBILITYCOM_meta.json': 'Committee'
    }

data['Court'] = data['source_file'].map(file_mapping)

In [16]:
data.to_pickle('Chamber_Data_2.pkl')

In [16]:
from results import combine_results, process_results, confusion_matrix, data_process, process_as_binary, score_results
from GPT_Experiments import generate_example_candidates

df = pd.read_pickle('valid_data.pkl')
court_label_data = pd.read_pickle('Chamber_Data_2.pkl')
df = pd.merge(df, court_label_data, on=['Filename','Questions','Subject Matter','importance'],how='left')
#print(df_main)
#df_main = df[:-4]

df_example = generate_example_candidates(df,keyword='Court',labels =['Committee','Chamber'])
df_main = df.drop(df_example.index)

df_main = df_main[['Filename','Court']]
df_main = df_main.rename(columns={'Court':'real_court'})   

results = combine_results('./Results/experiment_chamber_com',keyword='Court')
results_list = []

for name,result in results.items():
    print(name)
    y_pred, y_true = process_results(result,df_main,experiment=2,y_true_keyword='real_court',y_pred_keyword='Court')

    #print(y_pred.value_counts(),'\n', y_true.value_counts())
    print(y_pred,'\n', y_true)

    #y_true = process_as_binary(y_true)
    #y_pred, y_true = process_as_binary(y_pred,y_true)
    #scores,mcc = score_results(y_true,y_pred)
    #results_list.append([name,scores[2],mcc])
    #confusion_matrix(y_true,y_pred)
    print('\n')

{'  experiment_chamber_com_False_3_12_.jsonl': {'001-179979': 'Chamber', '001-180761': 'Chamber', '001-169580': 'Chamber', '001-165054': 'Chamber', '001-209011': 'Committee', '001-110558': 'Chamber', '001-187172': 'Chamber', '001-118892': 'Chamber', '001-200157': 'Chamber', '001-220247': 'Chamber', '001-168613': 'Committee', '001-147363': 'Chamber', '001-189701': 'Committee', '001-158667': 'Committee', '001-202689': 'Chamber', '001-175367': 'Chamber', '001-200394': 'Committee', '001-178986': 'Chamber', '001-145890': 'Committee', '001-167965': 'Chamber', '001-177312': 'Chamber', '001-127205': 'Chamber', '001-111096': 'Chamber', '001-208396': 'Chamber', '001-193615': 'Chamber', '001-110357': 'Chamber', '001-184561': 'Chamber', '001-189564': 'Chamber', '001-163792': 'Committee', '001-126480': 'Chamber', '001-159740': 'Chamber', '001-175020': 'Chamber', '001-200399': 'Chamber', '001-203593': 'Chamber', '001-192530': 'Chamber', '001-209204': 'Chamber', '001-145320': 'Chamber', '001-115227':

### CoT Reasoning

In [7]:
val_data = pd.read_pickle('valid_data.pkl')

In [8]:
other_data = pd.read_json('/users/sgdbareh/volatile/ECHR_Importance/Data/overlap_cases/pruned_COMMUNICATEDCASES_meta.json',lines=True)

In [10]:
other_data.rename(columns={'itemid':'Filename'}, inplace=True)

In [14]:
combined = pd.merge(val_data, other_data, on='Filename')

In [16]:
combined_sub = combined[['Filename','appno','docname']]

In [21]:
combined_sub

Unnamed: 0,Filename,appno,docname
0,001-179979,73487/12,TAŞ (ÇAKAR) v. TURKEY
1,001-180761,79065/13,KRDŽALIJA AND OTHERS v. MONTENEGRO
2,001-169580,43529/15,BAȘTUREA v. ROMANIA
3,001-165054,18320/09,KHADZHYRADOVY v. UKRAINE
4,001-209011,52299/14,LYSAK v. UKRAINE
5,001-110558,77407/11,CRACIUNEAC v. MOLDOVA
6,001-187172,38549/07,ÖZDEMIR v. TURKEY
7,001-118892,15275/11;76058/12,COLLOREDO MANNSFELD v. THE CZECH REPUBLIC and ...
8,001-200157,1666/19,Y AND OTHERS v. BULGARIA
9,001-220247,8793/19,RIPILOSKI v. NORTH MACEDONIA


In [20]:
combined_sub[combined_sub['appno']=='19866/21']

Unnamed: 0,Filename,appno,docname
52,001-217348,19866/21,SARAC v. DENMARK


In [7]:
from results import combine_results, process_results, confusion_matrix, data_process, process_as_binary, score_results

data = pd.read_pickle('valid_data.pkl')
data = data_process(data[:-4])   

results = combine_results('./Results/experiment_2_CoT','Case Importance')
for name,result in results.items():
    print(name)
    y_pred, y_true = process_results(result,data,experiment=1)
    score_results(y_true,y_pred)
    #confusion_matrix(y_true,y_pred)
    print('\n')

{'  experiment_2_CoT_False_3_13_.jsonl': {'001-179979': '2', '001-180761': '1', '001-169580': '2', '001-165054': '1', '001-209011': '2', '001-110558': '2', '001-187172': '2', '001-118892': '2', '001-200157': '2', '001-220247': '2', '001-168613': '3', '001-147363': '1', '001-189701': '2', '001-158667': '2', '001-202689': '1', '001-175367': '2', '001-200394': '2', '001-178986': '2', '001-145890': '3', '001-167965': 'key_case', '001-177312': '1', '001-127205': '2', '001-111096': '1', '001-208396': '2', '001-193615': '2', '001-110357': '2', '001-184561': '2', '001-189564': '1', '001-163792': '2', '001-126480': '2', '001-159740': '2', '001-175020': '2', '001-200399': '2', '001-203593': '2', '001-192530': 'key_case', '001-209204': '1', '001-145320': 'key_case', '001-115227': '2', '001-191601': '2', '001-111408': '2', '001-187541': 'key_case', '001-155132': '2', '001-149172': '1', '001-157677': '2', '001-112348': 'key_case', '001-114705': '1', '001-193400': 'key_case', '001-175517': '2', '001

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
