# Supreme Court Judgement

In [1]:
import os
import numpy as np
import pandas as pd
import re

DATA_DIR = os.path.join(os.environ["HOME"],"Datasets","HTX_Hackathon","Supreme_Court_Judgement")

pd.set_option("display.max_columns",100)

# Read Judgement

In [2]:
judge = pd.read_csv(os.path.join(DATA_DIR, "Supreme_Court_Judgement.csv"))

display(judge.shape)
display(judge.head())

(3303, 15)

Unnamed: 0,ID,name,href,docket,term,first_party,second_party,facts,facts_len,majority_vote,minority_vote,first_party_winner,decision_type,disposition,issue_area
0,50606,Roe v. Wade,https://api.oyez.org/cases/1971/70-18,70-18,1971,Jane Roe,Henry Wade,"<p>In 1970, Jane Roe (a fictional name used in...",501,7,2,True,majority opinion,reversed,
1,50613,Stanley v. Illinois,https://api.oyez.org/cases/1971/70-5014,70-5014,1971,"Peter Stanley, Sr.",Illinois,<p>Joan Stanley had three children with Peter ...,757,5,2,True,majority opinion,reversed/remanded,Civil Rights
2,50623,Giglio v. United States,https://api.oyez.org/cases/1971/70-29,70-29,1971,John Giglio,United States,<p>John Giglio was convicted of passing forged...,495,7,0,True,majority opinion,reversed/remanded,Due Process
3,50632,Reed v. Reed,https://api.oyez.org/cases/1971/70-4,70-4,1971,Sally Reed,Cecil Reed,"<p>The Idaho Probate Code specified that ""male...",378,7,0,True,majority opinion,reversed/remanded,Civil Rights
4,50643,Miller v. California,https://api.oyez.org/cases/1971/70-73,70-73,1971,Marvin Miller,California,"<p>Miller, after conducting a mass mailing cam...",305,5,4,True,majority opinion,vacated/remanded,First Amendment


# Text Functions

In [3]:
import nltk
from nltk import sent_tokenize
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string

def clean_text_all(input_text):
    stemmer = nltk.stem.SnowballStemmer('english')
    # remove tags and mentions, '@'
    input_text = re.sub("@[A-Za-z0-9_]+","", input_text)
    # remove hashtags, '#'
    input_text = re.sub("#[A-Za-z0-9_]+","", input_text)
    # remove html tags, '<'
    input_text = re.sub("<[A-Za-z0-9_]+","", input_text)
    # remove html, xml tags, URL
    input_text = re.sub(r"http\S+", "", input_text)
    input_text = re.sub(r"www.\S+", "", input_text)
    input_text = stemmer.stem(input_text)
    tokens = word_tokenize(input_text)
    # convert to lower case
    tokens = [w.lower() for w in tokens]
    # remove punctuation from each word
    table = str.maketrans('', '', string.punctuation)
    stripped = [w.translate(table) for w in tokens]
    # remove remaining tokens that are not alphabetic
    words = [word for word in stripped if word.isalpha()]
    # remove stop words
    stop_words = set(stopwords.words('english'))
    words = [w for w in words if not w in stop_words]
    cleaned_sentense = ' '.join(words)
    # the first 5 words are heading, remove them
    return cleaned_sentense

# Relabel First Party Winner

In [4]:
judge_df = judge.copy()

judge_df["FACTS"] = judge_df["facts"].apply(clean_text_all)

display(judge_df.shape)
display(judge_df.head())

(3303, 16)

Unnamed: 0,ID,name,href,docket,term,first_party,second_party,facts,facts_len,majority_vote,minority_vote,first_party_winner,decision_type,disposition,issue_area,FACTS
0,50606,Roe v. Wade,https://api.oyez.org/cases/1971/70-18,70-18,1971,Jane Roe,Henry Wade,"<p>In 1970, Jane Roe (a fictional name used in...",501,7,2,True,majority opinion,reversed,,jane roe fictional name used court documents p...
1,50613,Stanley v. Illinois,https://api.oyez.org/cases/1971/70-5014,70-5014,1971,"Peter Stanley, Sr.",Illinois,<p>Joan Stanley had three children with Peter ...,757,5,2,True,majority opinion,reversed/remanded,Civil Rights,joan stanley three children peter stanley stan...
2,50623,Giglio v. United States,https://api.oyez.org/cases/1971/70-29,70-29,1971,John Giglio,United States,<p>John Giglio was convicted of passing forged...,495,7,0,True,majority opinion,reversed/remanded,Due Process,john giglio convicted passing forged money ord...
3,50632,Reed v. Reed,https://api.oyez.org/cases/1971/70-4,70-4,1971,Sally Reed,Cecil Reed,"<p>The Idaho Probate Code specified that ""male...",378,7,0,True,majority opinion,reversed/remanded,Civil Rights,idaho probate code specified males must prefer...
4,50643,Miller v. California,https://api.oyez.org/cases/1971/70-73,70-73,1971,Marvin Miller,California,"<p>Miller, after conducting a mass mailing cam...",305,5,4,True,majority opinion,vacated/remanded,First Amendment,miller conducting mass mailing campaign advert...


In [5]:
judge_df["first_party_winner"] = judge_df["first_party_winner"].astype('str')
judge_df["first_party_winner"] = np.where(judge_df["first_party_winner"] == "True", 1, 0)

judge_df["first_party_winner"].value_counts()

1    2140
0    1163
Name: first_party_winner, dtype: int64

In [6]:
judge_df.drop(columns = ["facts"], inplace = True)
judge_df = judge_df.loc[judge_df["first_party_winner"].isin([0,1])]

# number of chars must exceed 10
judge_df = judge_df.loc[judge_df["FACTS"].str.len() > 10]

display(judge_df.shape)
display(judge_df.head())

(3303, 15)

Unnamed: 0,ID,name,href,docket,term,first_party,second_party,facts_len,majority_vote,minority_vote,first_party_winner,decision_type,disposition,issue_area,FACTS
0,50606,Roe v. Wade,https://api.oyez.org/cases/1971/70-18,70-18,1971,Jane Roe,Henry Wade,501,7,2,1,majority opinion,reversed,,jane roe fictional name used court documents p...
1,50613,Stanley v. Illinois,https://api.oyez.org/cases/1971/70-5014,70-5014,1971,"Peter Stanley, Sr.",Illinois,757,5,2,1,majority opinion,reversed/remanded,Civil Rights,joan stanley three children peter stanley stan...
2,50623,Giglio v. United States,https://api.oyez.org/cases/1971/70-29,70-29,1971,John Giglio,United States,495,7,0,1,majority opinion,reversed/remanded,Due Process,john giglio convicted passing forged money ord...
3,50632,Reed v. Reed,https://api.oyez.org/cases/1971/70-4,70-4,1971,Sally Reed,Cecil Reed,378,7,0,1,majority opinion,reversed/remanded,Civil Rights,idaho probate code specified males must prefer...
4,50643,Miller v. California,https://api.oyez.org/cases/1971/70-73,70-73,1971,Marvin Miller,California,305,5,4,1,majority opinion,vacated/remanded,First Amendment,miller conducting mass mailing campaign advert...


# Write to CSV

In [7]:
judge_df.to_csv(os.path.join(DATA_DIR, "SupremeCourtJudgement_Cleaned.csv"), index = False)