In [2]:
import numpy as np
import pandas as pd
from convokit import Corpus, download

**Team:** Amari Bauer, Matt Ryan, Francesca Vescia

**Data:** We will train our model on transcripts from the 1024 Roberts court cases with clear winners and available utterance data. We chose to focus on Roberts court cases because we hope to eventually use our model to predict the outcomes of new cases before rulings are issued. John Roberts is the current Chief Justice of the United States, so upcoming Supreme Court cases will be argued before his court. We expect a model trained exclusively on past Roberts court cases will have higher predictive accuracy for these cases than a model trained on the full corpus, which includes transcripts from older cases heard before other courts in notably different times. Our goal is to predict whether or not a case will be decided favorably for a petitioner, so we exclude cases where information about the winning side is unclear or altogether unavailable. We will use features of case utterances to predict case outcomes, so we exclude cases with no available utterance data.

In [3]:
corpus = Corpus(filename=download('supreme-corpus'))

Dataset already exists at /Users/fvescia/.convokit/downloads/supreme-corpus


In [4]:
# All cases
cases = pd.read_json(path_or_buf='data/cases.jsonl', lines=True)

# Cases with clear winners
df = cases.loc[cases.loc[:, 'win_side'].isin([1, 0])]

# Roberts court cases with clear winners
roberts = df.loc[df.loc[:, 'court'] == 'Roberts Court', :]

# All utterances
all_utts = corpus.get_utterances_dataframe()

# Roberts court case utterances
ids = roberts.loc[:, 'id'].unique()
utts = all_utts.loc[all_utts.loc[:, 'meta.case_id'].isin(ids)]

# Roberts court cases with clearn winners and utterance data
cases = utts.groupby(['meta.case_id'])
len(cases)

1024

In [5]:
speaker_types = utts.loc[utts.loc[:, 'meta.case_id'].isin(ids)].groupby(['meta.speaker_type']).size()
print(speaker_types)
print('Advocates:', 116108 / (116108 + 125594))
print('Judges:', 125594 / (116108 + 125594))

meta.speaker_type
A    116108
J    125594
dtype: int64
Advocates: 0.4803766621707723
Judges: 0.5196233378292278


In [6]:
sides = utts.loc[utts.loc[:, 'meta.case_id'].isin(ids)].groupby(['meta.side']).size()
print(sides)
print('Respondent:', 52498 / (52498 + 53130 + 8043 + 2437))
print('Petitioners:', 53130 / (52498 + 53130 + 8043 + 2437))
print('Amicus:', 8043 / (52498 + 53130 + 8043 + 2437))
print('Unknown:', 2437 / (52498 + 53130 + 8043 + 2437))

meta.side
0    52498
1    53130
2     8043
3     2437
dtype: int64
Respondent: 0.4521480001378027
Petitioners: 0.4575912081854825
Amicus: 0.06927171254349399
Unknown: 0.020989079133220793
