In [1]:
import nltk
from textblob import TextBlob, Word
from nltk.corpus import stopwords
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('court_cases_df.csv', encoding='utf-8')

In [3]:
def lower_case(text):
    return text.apply(lambda x: ' '.join(x.lower() for x in x.split()))


def remove_punctuation(text):
    return text.str.replace(r'[^\w\s]', '')


def remove_stopwords(text):
    stop_words = stopwords.words('english')
    return text.apply(lambda x: ' '.join(x for x in x.split() if x not in stop_words))


def correct_spelling(text):
    return text.apply(lambda x: str(TextBlob(x).correct()))


def lemmatize(text):
    return text.apply(lambda x: ' '.join(
        [Word(word).lemmatize() for word in x.split()]))


def process_column(column_name):
    return remove_stopwords(remove_punctuation(lower_case(df[column_name])))


def extract_sentence(text):
    for lst in TextBlob(text).ngrams(2):
        if 'months' in lst or 'years' in lst or 'life' in lst:
            return ' '.join(lst)

In [4]:
def analyse_text(processed_text):

    # There could be a lot of words between `sentenced` and verdict such as
    # effective, accused, etc. so widen the ngrams
    for lst in TextBlob(processed_text).ngrams(6):
        if is_sentence_text(lst):
            s = extract_sentence(processed_text)
            print(lst)
            print(s)
            print
            return s, s

        # Check for other types of verdicts

    return '', ''

In [5]:
def is_sentence_text(lst):
    return 'sentenced' in lst and ('years' in lst or 'months' in lst or 'life' in lst)

In [12]:
df['Verdict'] = process_column('Summary').apply(analyse_text)

['theft', 'accused', '1', 'sentenced', '15', 'years']
15 years
['312', 'years', 'accused', 'one', 'two', 'sentenced']
21 years
['mitigating', 'factors', 'accused', 'sentenced', '15', 'years']
15 years
['half', 'years', 'accused', '1', '2', 'sentenced']
21 years
['sentence', 'sentenced', 'effective', 'terms', '20', 'years']
20 years
['appellant', 'convicted', 'rape', 'sentenced', 'ten', 'years']
ten years
['two', 'minor', 'children', 'sentenced', '5', 'years']
5 years
['transitional', 'provisions', '136', 'actthose', 'sentenced', 'life']
sentenced life
['high', 'court', 'murdering', 'husband', 'sentenced', 'life']
sentenced life
['supported', '96', 'sentenced', 'r1500', '6', 'months']
6 months
['factors', 'taken', 'account', 'accused', 'sentenced', 'life']
sentenced life
['slightly', 'r5', '000', 'sentenced', '18', 'months']
eight years
['amount', 'r215', '000', 'sentenced', 'six', 'years']
six years
['circumstances', 'found', 'accused', 'sentenced', '10', 'years']
15 years
['brick', 'r

['fine', 'imposed', 'sentenced', 'accused', 'six', 'months']
six months
['lawful', 'custody', 'accused', 'sentenced', '3', 'years']
3 years
['maximum', 'two', 'years', 'imprisonment', 'court', 'sentenced']
two years
['lawful', 'custody', 'appellant', 'sentenced', '24', 'months']
24 months
['amended', 'accused', 'convicted', 'sentenced', 'three', 'years']
three years
['n17', '800', 'cumulatively', 'sentenced', '17', 'years']
17 years
['lawful', 'custody', 'appellant', 'sentenced', 'three', 'years']
three years
['aggravating', 'circumstances', 'accused', 'sentenced', 'nine', 'years']
nine years
['weight', 'personal', 'circumstances', 'sentenced', 'accused', 'life']
accused life
['regional', 'court', 'sentence', 'sentenced', 'eight', 'years']
eight years
['punishment', 'accused', 'accused', 'sentenced', '40', 'years']
40 years
['deceased', 'enlarged', 'liver', 'sentenced', '10', 'years']
10 years
['senseless', 'crimes', 'accused', 'sentenced', '35', 'years']
35 years
['prospects', 'rehabi

['upheld', 'count', '1', 'sentenced', '15', 'years']
15 years
['single', 'count', 'rape', 'sentenced', '15', 'years']
15 years
['sentence', 'imposition', 'appellant', 'sentenced', '25', 'years']
25 years
['sentenced', 'regional', 'magistrates', 'court', '25', 'years']
25 years
['robbery', 'sentenced', 'effective', 'term', '25', 'years']
25 years
['robbery', 'aggravating', 'circumstances', 'sentenced', '20', 'years']
20 years
['two', 'offences', 'sentenced', 'punishment', 'consisting', 'life']
consisting life
['committed', 'gunpoint', 'night', 'sentenced', '15', 'years']
15 years
['1997', 'act', 'appellant', 'sentenced', '15', 'years']
15 years
['act', '62', '1955', 'sentenced', '7', 'years']
7 years
['r649', '827', 'sentenced', 'imprisonment', 'five', 'years']
five years
['court', 'high', 'court', 'person', 'sentenced', 'life']
sentenced life
['sentence', 'appeal', 'upheld', 'sentenced', '20', 'years']
20 years
['restorative', 'justice', 'respondent', 'sentenced', '10', 'years']
10 yea

In [20]:
df.sort_values('Verdict', ascending=False)

Unnamed: 0.1,Unnamed: 0,Column,Applicant,Defendant,Case No,Judges,Summary,Date Heard,Date Judgement,Court,Type,Case Category,Verdict
18273,18273,18273,City of Tshwane Metropolitan Municipality,Beukes 2009 JDR 0951 (GNP),37742/2006,Mothle A J,Contempt of court. Disobedience. To court orde...,2009-09-22,2009-09-22,"North Gauteng High Court, Pretoria",unreported,Contempt of court,"(two years, two years)"
12920,12920,12920,S,Duvenhage 2011 JDR 0773 (Nm),CA 63/2010,Ndauendapo J and Siboleka J,Criminal procedure. Sentence. Appeal against. ...,2011-06-13,2011-06-23,Namibia High Court,unreported,Criminal procedure,"(two years, two years)"
6337,6337,6337,S,Fourie 2015 JDR 1483 (GP),A319/2015,EM Kubushi J and NM Mavundla J,Criminal law. Assault with intent to do grievo...,,,"Gauteng Division, Pretoria",unreported,Criminal law,"(two years, two years)"
4978,4978,4978,S,Lethoko 2012 JDR 0466 (FB),P38/12,Rampai AJP et Phalatsi AJ,Criminal procedure. From magistrates' court to...,2012-03-01,2012-03-01,"Free State High Court, Bloemfontein",unreported,Criminal procedure,"(two years, two years)"
20975,20975,20975,S,ZL 2014 JDR 2126 (SCA),1049/2013,"Mpati P, Bosielo JA and Willis JA",Criminal law. Appeal. Application for hearing ...,2014-09-12,2014-10-01,Supreme Court of Appeal,unreported,Criminal law,"(two years, two years)"
12899,12899,12899,S,Ganuseb 2013 JDR 1033 (Nm),CC 8/2011,Van Niekerk J,Criminal procedure. Sentence. Imposition of. F...,2013-05-07,2013-05-17,Namibia High Court,unreported,Criminal procedure,"(two years, two years)"
13289,13289,13289,S,Harmasen 2017 JDR 0211 (Nm),CR 08/2016,Liebenberg J and Shivute J,Criminal law. Escaping from lawful custody. Se...,2017-01-30,2017-01-30,"Namibia High Court, Main Division",unreported,Criminal law,"(two years, two years)"
13839,13839,13839,S,Moshana 2014 JDR 0635 (Nm),CA 63/2013,Hoff J and Siboleka J,Criminal procedure. Sentence. Composite senten...,2014-01-31,2014-03-14,"Namibia High Court, Windhoek",unreported,Criminal procedure,"(twelve years, twelve years)"
6034,6034,6034,S,Ndarila 2014 JDR 1579 (GP),A144/2014,Strauss AJ and TJ Raulinga J,Criminal law. Theft. Sentence. Too much emphas...,,,"Gauteng Division, Pretoria",unreported,Criminal law,"(three years, three years)"
16332,16332,16332,S,Mabitsi And Mahloko 2012 JDR 1272 (GNP),RCM 79/09,Potterill J and Van der Byl AJ,Criminal law. Review. Special review in terms ...,2012-02-27,2012-02-27,"North Gauteng High Court, Pretoria",unreported,Criminal law,"(three years, three years)"


In [38]:
df.sort_values('Verdict', ascending=False)['Summary'][4517]

"Criminal law. Sentence. Primary caregiver of teenager. Post office employee committed 14 counts of fraud totalling R13 000. Previous conviction. Mother of teenage child. Sentenced to three years' imprisonment. Social worker to visit child once a month."

In [35]:
# convert months into years (e.g. 1 month = 0,0833334 year)
# convert life sentence to 25 if murder, 15 if rape, 10 if robbery

# if sentence and each counts / each count / count 1, count 2 in ngram, return list of sentences for each
# if defendant 1 / defendant 2 etc. / accused 1 / accused 2 etc. / both accused / respectively in summary, return list

# if sentence and suspended in ngram, (value * -1) + value(s)
# if sentence and set aside in ngram, (value * -1) + value(s)

# if sentence and concurrent / concurrently / along side in ngram, return [value, other_value].max()

'This is simply a test, you see. Because I want to see how regex works!!'