# This step will merge the sentence data we received in Dec. 2017 with new sentence data from May 2018

In [51]:
import pandas as pd
import numpy as np
import datetime
import csv

In [52]:
old_sentences = pd.read_excel("../data/inputs/Sentences_12_04_2017.xlsx")
old_sentences.head()

Unnamed: 0,CaseNumber,ChargeNumber,ChargeID,Disposition,DispositionDate,Sentence,SentenceDate,SentenceTerms,SentenceConditions,Judge
0,12-1990-0271,1,3886180.0,Dismissed/Remand To District Court,1990-03-20,,NaT,,,"Judge, Unassigned"
1,12-1990-0271,2,3886181.0,Information Signed,1990-03-12,,NaT,,,"Judge, Unassigned"
2,21-1994-03664,1,3896710.0,,NaT,,NaT,,,
3,21-2001-01044,1,3907943.0,Nolo Contendere Plea - Filed,2001-05-10,Criminal Sentence,2001-05-10,,Indemnity Fund 0D 100.00 Active: 05/10/2001;...,"Cenerini (Retired), Frank J."
4,21-2001-01639,1,3908725.0,Rule 23 Transfer to Superior Court,2001-07-26,,NaT,,,"Pirraglia (Retired), Robert K."


In [53]:
new_sentences = pd.read_excel("../data/inputs/Sentences_New_5_6_2018.xlsx")
new_sentences.head()

Unnamed: 0,Case Number,Associated Case,Charge Number,Charge Description,Disposition,Disposition Date,Sentence,Sentence Date,Sentence Terms,Sentence Conditions,Judge
0,21-2001-01639,N3-2001-0212A,1,LARCENY UNDER $500/DOMESTIC,Plea of Nolo Contendere,2002-02-28,Criminal Sentence,2002-02-28,,Probation 1Y Judge: JUDGE PROCACCINI\n Activ...,"Procaccini, Associate Justice Daniel A."
1,21-2001-02246,N3-2001-0323A,1,LARCENY UNDER $500/DOMESTIC,Dismissed 48A,2002-01-04,,NaT,,,"Procaccini, Associate Justice Daniel A."
2,21-2001-02246,N3-2001-0323A,2,LARCENY UNDER $500/DOMESTIC,Dismissed 48A,2002-01-04,,NaT,,,"Procaccini, Associate Justice Daniel A."
3,21-2002-00683,N3-2002-0216A,1,VIOLATION NO CONTACT ORDER,Dismissed 48A,2002-05-20,,NaT,,,"Procaccini, Associate Justice Daniel A."
4,21-2002-00683,N3-2002-0216A,2,LARCENY UNDER $500/DOMESTIC,Dismissed 48A,2002-05-20,,NaT,,,"Procaccini, Associate Justice Daniel A."


## Sub-step 1: Create mappings from old sentencing charge keys (Case Number + Charge Number) to old sentence record and vice versa

In [84]:
old_sentence_mapping = {}

#### There are some collisions in the old sentencing data. Want to use the most recent record every time

In [91]:
def get_max_sentence_record(key, row):
    if key in old_sentence_mapping:
        return max(
            row[1],
            old_sentence_mapping[key],
            key=lambda x:x['SentenceDate']
        )
    return row[1]

In [92]:
for row in old_sentences.iterrows():
    key = (row[1]['CaseNumber'], row[1]['ChargeNumber'])        
    old_sentence_mapping[key] = get_max_sentence_record(key, row)

#### Need to make 2 mappings, one for the `Case Number` column in the new data and one for the `Associated Case` column

In [93]:
new_dc_sentence_mapping = {}
new_sup_sentence_mapping = {}

In [94]:
all_keys = []
for row in new_sentences.iterrows():
    all_keys.append(
        (
            row[1]['Case Number'], 
            row[1]['Associated Case'],
            row[1]['Charge Number']
        )
    )
    dc_key = (row[1]['Case Number'], row[1]['Charge Number'])
    sup_key = (row[1]['Associated Case'], row[1]['Charge Number'])
    new_dc_sentence_mapping[dc_key] = row[1]
    new_sup_sentence_mapping[sup_key] = row[1]

## Sub-step 2: Ensure new sentencing information adds no new charges, only updates existing charges

#### We want to confirm that every charge ID in the new sentencing data is present in the old sentencing data to simplify analysis

In [95]:
old_only = []
new_only = []
both = []

In [96]:
for key in all_keys:
    dc_key = (key[0], key[2])
    sup_key = (key[1], key[2])
    if not (dc_key in old_sentence_mapping or sup_key in old_sentence_mapping):
        new_only.append(row[1])
print 'There are {} charges that are only in the new data.'.format(len(new_only))

There are 162 charges that are only in the new data.


#### So our assertion is incorrect; there are some sentences in the new data that didn't exist in the old. Now, let's make sure they a) aren't Null and b) are elder abuse

In [97]:
for k in new_only:
    p = ('elder', '65', '60')
    if pd.isnull(k['Sentence Conditions']):
        continue
    if any(o in k['Sentence Conditions'] for o in p):
        print k

#### They are all Null/not elder, so we can ignore them. Therefore, the old sentencing data contains all relevant keys.

## Sub-step 3: Create merged sentencing data, using new sentencing data if available, and old otherwise

In [98]:
merged_sentences = []

In [99]:
def get_sentence_info(new, old):
    merged_sentence = {
        'charge_id': old['ChargeID']
    }
    if new is not None:
        merged_sentence.update({
            'disposition': new['Disposition'],
            'disposition_date': new['Disposition Date'],
            'sentence': new['Sentence'],
            'sentence_date': new['Sentence Date'],
            'sentence_terms': new['Sentence Terms'],
            'sentence_conditions': new['Sentence Conditions'],
            'judge': new['Judge']
        })
    else:
        merged_sentence.update({
            'disposition': old['Disposition'],
            'disposition_date': old['DispositionDate'],
            'sentence': old['Sentence'],
            'sentence_date': old['SentenceDate'],
            'sentence_terms': old['SentenceTerms'],
            'sentence_conditions': old['SentenceConditions'],
            'judge': old['Judge']
        })    
    return merged_sentence

In [100]:
for key in old_sentence_mapping:
    old = old_sentence_mapping[key]
    new = None
    if key in new_dc_sentence_mapping:
        new = new_dc_sentence_mapping[key]
    elif key in new_sup_sentence_mapping:
        new = new_sup_sentence_mapping[key]
    merged_sentences.append(get_sentence_info(new, old))

In [101]:
len(merged_sentences)

15132

In [102]:
to_write = pd.DataFrame(merged_sentences)

## Sub-step 4: Write data to `steps` folder

In [103]:
writer = pd.ExcelWriter('../data/steps/Step2Output_sentences.xlsx')
to_write.to_excel(writer,'Sheet1')
writer.save()