In [53]:
import gensim
import gensim.downloader as api
nlp = api.load('word2vec-google-news-300')

In [54]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report, f1_score, confusion_matrix, recall_score, precision_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import KFold
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE

from sklearn.model_selection import train_test_split

In [55]:
import pandas as pd
import numpy as np

In [56]:
import matplotlib.pyplot as plt


In [57]:
doc = pd.read_csv('cleaned_legal_case.csv')

In [58]:
doc

Unnamed: 0.1,Unnamed: 0,DocumentId,Postures,HeadText,Paragraph
0,0,31944,On Appeal,other,Plaintiff Dwight Watson (“Husband”) appeals fr...
1,1,31944,On Appeal,facts,Husband and Wife were married in November 1989...
2,2,31944,On Appeal,reasoning/analysis,Husband argues that the trial court erred in v...
3,3,31944,On Appeal,issues,Although Husband does not clearly identify an ...
4,4,31944,On Appeal,other,Husband contends that the trial court’s findin...
...,...,...,...,...,...
39924,39924,39930,Motion to Compel Arbitration,other,Because Romero’s STELA claim is governed by th...
39925,39925,39931,Motion for Attorney's Fees,other,The legal question at the core of this appeal ...
39926,39926,39932,Motion to Dismiss,other,"Order and judgment (one paper), Supreme Court,..."
39927,39927,39933,Motion for Relief from Order or Judgment,other,U.S. Bank National Association (USBNA) appeals...


In [59]:
legal_case = doc.loc[(doc['Paragraph'].notna()) & (doc['HeadText'] != 'other'),:]

In [60]:
legal_case.HeadText.value_counts()

facts                 3223
oder and decision     2435
reasoning/analysis    2211
issues                1322
rules                 1126
procedural history     818
Name: HeadText, dtype: int64

In [61]:
drop_duplicate_df = legal_case.drop_duplicates()

In [62]:
drop_duplicate_df.shape

(11135, 5)

# Convert text to vector

# revmove stopwords from the documents

In [63]:
#Remove stopwords from the documents
import nltk
from nltk.corpus import stopwords

In [64]:
myFile = open('cleaned_documents_stopwords.txt','r')

In [65]:
documents = []
for line in myFile:
    documents.append(line.rstrip('\n'))
myFile.close()    

In [66]:
len(documents)

11135

# a. Word2Vec Approach

In [67]:
# Download and import essential libraries
#!pip install stop-words
#from stop_words import get_stop_words
#stopwords = get_stop_words('en')
# from textblob import Word
import re
import nltk
# nltk.download('wordnet')
# nltk.download('punkt')
import logging

nlp.init_sims(replace=True) # calling for using syn0norm


# Tokenizing the document text, return a list of word for each document
def w2v_tokenize_text(text):
    tokens = []
    for sent in nltk.sent_tokenize(text, language='english'):
        for word in nltk.word_tokenize(sent, language='english'):
            if len(word) < 2:
                continue
            tokens.append(word)
    return tokens

# Defining a function to get pretrained embedding for each token
# and take an average of word2vec embeddings of all tokens in the document 
# as vector representation for that document.
def word_averaging(wv, words):
    all_words, mean = set(), []
    for word in words:
        if isinstance(word, np.ndarray):
            mean.append(word)
        elif word in wv:
            mean.append(wv.vectors[wv.key_to_index[word]]) # appending each 300-D embedding for each word
            all_words.add(wv.key_to_index[word])
    if not mean:
        logging.warning("no input %s", words)
        return np.zeros(wv.vector_size,)
    # computing the mean of the embedding list
    mean = gensim.matutils.unitvec(np.array(mean).mean(axis=0)).astype(np.float32)
    return mean

# Defining a function to stack together all document embeddings into one
def  word_averaging_list(wv, text_list):
   stack = np.vstack([word_averaging(wv, post) for post in text_list ])
   return stack


  nlp.init_sims(replace=True) # calling for using syn0norm


In [68]:
x_tokens = []
for doc in documents:
    x_tokens.append(w2v_tokenize_text(doc))

In [69]:
x_w2v_vectors = word_averaging_list(nlp,x_tokens)



In [70]:
np.shape(x_w2v_vectors)

(11135, 300)

# b. TF-IDF Approach

In [71]:
from sklearn.feature_extraction.text import TfidfVectorizer
def get_tfidf_vectors(x):
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform(x)
    #feature_name = vectorizer.get_feature_names()
    dense = vectors.todense()
    denselist = dense.tolist()
    x_tfidf_vector = np.array(denselist)
    return x_tfidf_vector

In [72]:
x_tfidf_vectors = get_tfidf_vectors(documents)

In [73]:
np.shape(x_tfidf_vectors)

(11135, 27264)

# Run and train the model

In [74]:
def print_performance_metric(actual_y,predict_y):
    accuracy_on_test_dataset = accuracy_score(actual_y, predict_y)
    f1_score_test_dataset = f1_score(actual_y, predict_y, average='macro',zero_division=1)
    recall_score_test_dataset = recall_score(actual_y, predict_y, average = 'macro', zero_division=1)
    precision_score_test_dataset = precision_score(actual_y, predict_y, average ='macro', zero_division=1)
    confu_matrix = confusion_matrix(actual_y, predict_y)
    classify_report = classification_report(actual_y, predict_y)
    print('Accuracy score on the test dataset: ',accuracy_on_test_dataset)
    print('Recall score on the test dataset:', recall_score_test_dataset)
    print('Precision score on the test dataset:', precision_score_test_dataset)
    print('F1 score on the test dataset:', f1_score_test_dataset)
    print(confu_matrix)
    print(classify_report)
    return accuracy_on_test_dataset, f1_score_test_dataset,recall_score_test_dataset,precision_score_test_dataset
    

In [76]:
def model_score(name, model,x_train_vectors,x_test_vectors,y_train,y_test):
    cv = StratifiedKFold(n_splits = 5, random_state = 42, shuffle = True)
    scores = []
    f1_scores = []
    oversampler = SMOTE(random_state=42)
    for train_fold_index, val_fold_index in cv.split(x_train_vectors,y_train):
        x_train_fold = x_train_vectors[train_fold_index]
        y_train_fold = y_train.iloc[train_fold_index]
        x_val_fold = x_train_vectors[val_fold_index]
        y_val_fold = y_train.iloc[val_fold_index]

        #upsample on the training dataset
        X_train_fold_upsample, y_train_fold_upsample = oversampler.fit_resample(x_train_fold,y_train_fold)
        model.fit(X_train_fold_upsample,y_train_fold_upsample)
        y_predict = model.predict(x_val_fold)
        score = accuracy_score(y_val_fold,y_predict)
        scores.append(score)
        f1 = f1_score(y_val_fold, y_predict,  average = 'macro', zero_division=1)
        f1_scores.append(f1)
        
    #validate on the test dataset
    y_test_predict = model.predict(x_test_vectors)
    accuracy_test,f1_score_test,recall_score_test,precision_score_test = print_performance_metric(y_test,y_test_predict)
    
    
    report_score = {'macro_f1_folds':np.array(f1_scores), 'accuracy_test':accuracy_test,'macro_f1_test':f1_score_test,
                   'recall_score_test':recall_score_test,'precision_score_test':precision_score_test}
    return report_score

In [77]:
#create a function to split the dataset
def split_train_test(x_vectors, labels):
    X = x_vectors #list type
    y = labels #Seires type
    X_train, X_test, y_train, y_test = train_test_split (X, y, train_size = 0.8, random_state = 42, shuffle = True, stratify=y)
    
    return X_train, X_test, y_train, y_test

In [78]:
models = []
models.append(('LR', LogisticRegression(solver='lbfgs', max_iter=700)))
models.append(('RF', RandomForestClassifier()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('DT', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(decision_function_shape='ovo', probability=True)))
models.append(('LGBM', LGBMClassifier(objective= 'binary')))
models.append(('XGB', XGBClassifier(eval_metric="mlogloss", objective = "reg:logistic") ))

# Run and train model on w2v 

In [79]:
X_w2v_train, X_w2v_test,y_w2v_train,y_w2v_test = split_train_test(x_w2v_vectors,drop_duplicate_df['HeadText'])

In [81]:

f1_score_folds = {}
all_model_reports = {}

for name, model in models:
    
    report = model_score(name, model,X_w2v_train, X_w2v_test,y_w2v_train,y_w2v_test) 
    f1_score_folds[name] = report['macro_f1_folds']
   
    all_model_reports[name] = report
    print("{name} : {f1_folds_mean} ({std})".format(name = name,f1_folds_mean =report['macro_f1_folds'].mean(),
                                                   std = report['macro_f1_folds'].std()))
    
    
    

Accuracy score on the test dataset:  0.590929501571621
Recall score on the test dataset: 0.5855499218842913
Precision score on the test dataset: 0.5607988501434482
F1 score on the test dataset: 0.5620150398977694
[[321  11  19 192  56  46]
 [  4 191   6   3  30  30]
 [ 12  12 399   7  42  15]
 [ 66   2   6  74   7   9]
 [ 32  70  33  15 206  86]
 [ 16  17   9  10  48 125]]
                    precision    recall  f1-score   support

             facts       0.71      0.50      0.59       645
            issues       0.63      0.72      0.67       264
 oder and decision       0.85      0.82      0.83       487
procedural history       0.25      0.45      0.32       164
reasoning/analysis       0.53      0.47      0.50       442
             rules       0.40      0.56      0.47       225

          accuracy                           0.59      2227
         macro avg       0.56      0.59      0.56      2227
      weighted avg       0.63      0.59      0.60      2227

LR : 0.55167413881974



Accuracy score on the test dataset:  0.608890884598114
Recall score on the test dataset: 0.5368928386415034
Precision score on the test dataset: 0.5648767455806984
F1 score on the test dataset: 0.5453351392513166
[[473   7  15  46  70  34]
 [ 13 162   7   1  58  23]
 [ 33  11 374   2  56  11]
 [107   0   5  36   7   9]
 [ 56  52  37   3 227  67]
 [ 37   8   9   2  85  84]]
                    precision    recall  f1-score   support

             facts       0.66      0.73      0.69       645
            issues       0.68      0.61      0.64       264
 oder and decision       0.84      0.77      0.80       487
procedural history       0.40      0.22      0.28       164
reasoning/analysis       0.45      0.51      0.48       442
             rules       0.37      0.37      0.37       225

          accuracy                           0.61      2227
         macro avg       0.56      0.54      0.55      2227
      weighted avg       0.61      0.61      0.61      2227

XGB : 0.5443801805351

In [82]:
all_model_reports

{'LR': {'macro_f1_folds': array([0.54100735, 0.53965206, 0.55646264, 0.56628724, 0.5549614 ]),
  'accuracy_test': 0.590929501571621,
  'macro_f1_test': 0.5620150398977694,
  'recall_score_test': 0.5855499218842913,
  'precision_score_test': 0.5607988501434482},
 'RF': {'macro_f1_folds': array([0.49033496, 0.4930537 , 0.52913921, 0.52109453, 0.51419095]),
  'accuracy_test': 0.5954198473282443,
  'macro_f1_test': 0.5268275798802475,
  'recall_score_test': 0.5211907290907565,
  'precision_score_test': 0.5452704138587413},
 'KNN': {'macro_f1_folds': array([0.41102845, 0.40259654, 0.41512126, 0.41321369, 0.40484589]),
  'accuracy_test': 0.42703188145487203,
  'macro_f1_test': 0.4208857224189824,
  'recall_score_test': 0.5062602726726508,
  'precision_score_test': 0.49120552111622257},
 'DT': {'macro_f1_folds': array([0.345109  , 0.33465125, 0.34863344, 0.36860928, 0.35643941]),
  'accuracy_test': 0.3762909744050292,
  'macro_f1_test': 0.34364356733588086,
  'recall_score_test': 0.3499591723

In [86]:
def write_peformance_to_csv(file_name, header, report_dict):
    #header = ['ModelName','Accuracy','MacroF1', 'Recall','Precision']
    with open(file_name,'w',encoding='utf-8',newline="") as file:
        csvWriter = csv.writer(file)
    
        csvWriter.writerow(header)
    
        for name in report_dict:
            temp_data = []
            temp_data.append(name)
            for p in report_dict[name]:
                if p != 'macro_f1_folds':
                    temp_data.append(report_dict[name][p])
            csvWriter.writerow(temp_data)
    file.close()

In [87]:
import csv
header = ['name','accuracy_test','macro_f1_test','recall_score_test','precision_score_test']
write_peformance_to_csv('classicML_performance_w2v.csv',header,all_model_reports)

# Run and train model on TF-IDF

In [32]:
X_tfidf_train, X_tfidf_test, y_tfidf_train, y_tfidf_test =split_train_test(x_tfidf_vectors,drop_duplicate_df['HeadText'])

In [41]:
np.shape(X_tfidf_train)

(8908, 27264)

In [None]:
models = []
models.append(('LR', LogisticRegression(solver='lbfgs', max_iter=700)))
models.append(('RF', RandomForestClassifier()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('DT', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(decision_function_shape='ovo', probability=True)))
models.append(('LGBM', LGBMClassifier(objective= 'binary')))
models.append(('XGB', XGBClassifier(eval_metric="mlogloss", objective = "reg:logistic") ))

f1_score_folds = {}
accuracy_scores = {}
f1_score_tests = {}
all_model_reports = {}

for name, model in models:
    report = model_score(name, model,X_tfidf_train, X_tfidf_test, y_tfidf_train, y_tfidf_test)
    f1_score_folds[name] = report['macro_f1_folds']
    accuracy_scores[name] = report['accuracy_test']
    f1_score_tests[name] = report['macro_f1_test']
    all_model_reports[name] = report
    print("{name} : {f1_folds_mean} ({std})".format(name = name,f1_folds_mean =report['macro_f1_folds'].mean(),
                                                   std = report['macro_f1_folds'].std()))

Accuracy score on the test dataset:  0.6695105523125281
Recall score on the test dataset: 0.6126737616792765
Precision score on the test dataset: 0.6172534675567778
F1 score on the test dataset: 0.6141315546460848
[[477   8  13  73  47  27]
 [  5 188   2   2  41  26]
 [ 25   9 414   3  23  13]
 [ 97   0   5  51  11   0]
 [ 52  55  12   8 248  67]
 [ 32  10   4   5  61 113]]
                    precision    recall  f1-score   support

             facts       0.69      0.74      0.72       645
            issues       0.70      0.71      0.70       264
 oder and decision       0.92      0.85      0.88       487
procedural history       0.36      0.31      0.33       164
reasoning/analysis       0.58      0.56      0.57       442
             rules       0.46      0.50      0.48       225

          accuracy                           0.67      2227
         macro avg       0.62      0.61      0.61      2227
      weighted avg       0.67      0.67      0.67      2227

LR : 0.6093060907629

In [33]:
f1_score_folds = {}
accuracy_scores = {}
f1_score_tests = {}
all_model_reports = {}

for name, model in models[5:]:
    report = model_score(name, model,X_tfidf_train, X_tfidf_test, y_tfidf_train, y_tfidf_test)
    f1_score_folds[name] = report['macro_f1_folds']
    accuracy_scores[name] = report['accuracy_test']
    f1_score_tests[name] = report['macro_f1_test']
    all_model_reports[name] = report
    print("{name} : {f1_folds_mean} ({std})".format(name = name,f1_folds_mean =report['macro_f1_folds'].mean(),
                                                   std = report['macro_f1_folds'].std()))

Accuracy score on the test dataset:  0.6699595868881904
Recall score on the test dataset: 0.5468254020690922
Precision score on the test dataset: 0.7428890830063071
F1 score on the test dataset: 0.5504187844300278
[[582   2  11   0  47   3]
 [ 18 163   1   0  73   9]
 [ 45   1 401   0  36   4]
 [147   0   4   4   9   0]
 [ 89  38  10   0 278  27]
 [ 65   3   1   0  92  64]]
                    precision    recall  f1-score   support

             facts       0.62      0.90      0.73       645
            issues       0.79      0.62      0.69       264
 oder and decision       0.94      0.82      0.88       487
procedural history       1.00      0.02      0.05       164
reasoning/analysis       0.52      0.63      0.57       442
             rules       0.60      0.28      0.39       225

          accuracy                           0.67      2227
         macro avg       0.74      0.55      0.55      2227
      weighted avg       0.71      0.67      0.64      2227

SVM : 0.555017016070



Accuracy score on the test dataset:  0.6686124831612034
Recall score on the test dataset: 0.6021301509520721
Precision score on the test dataset: 0.6139833061052088
F1 score on the test dataset: 0.6056744502101026
[[489  10  13  59  47  27]
 [  9 181   5   0  54  15]
 [ 22   7 430   1  23   4]
 [110   1   2  41   9   1]
 [ 60  60  25   5 234  58]
 [ 33  10   8   5  55 114]]
                    precision    recall  f1-score   support

             facts       0.68      0.76      0.71       645
            issues       0.67      0.69      0.68       264
 oder and decision       0.89      0.88      0.89       487
procedural history       0.37      0.25      0.30       164
reasoning/analysis       0.55      0.53      0.54       442
             rules       0.52      0.51      0.51       225

          accuracy                           0.67      2227
         macro avg       0.61      0.60      0.61      2227
      weighted avg       0.66      0.67      0.66      2227

XGB : 0.594335277768

In [None]:
header = ['name','accuracy_test','macro_f1_test','recall_score_test','precision_score_test']
write_peformance_to_csv('classicML_performance_w.csv',header,all_model_reports)

# Train and Run the model