# This is the first step in the analysis: Filter cases to the ones in which we are interested

In [3]:
import pandas as pd
import numpy as np
import datetime
import csv

In [4]:
crim_charge = pd.read_excel("../data/inputs/ChargeReport12_04_2017.xlsx")
crim_charge.head()

Unnamed: 0,Case ID,Case No,Charge ID,Charge Number,Offense Description,Offense Date,Charge Filing Date,Arresting PD
0,7403240,12-1990-0271,3886180,1,ASSLT PERSON OVER 60,1990-02-22,1990-02-23,
1,7403240,12-1990-0271,3886181,1,MALICIOUS DESTRUCTION PROPERTY,1990-02-22,NaT,
2,7413463,21-1994-03664,3896710,1,LARCENY UNDER $500/DOMESTIC,1994-12-13,1994-12-14,
3,7422050,21-2001-01044,3907943,1,Simple Assault and/or Battery,2001-04-18,2001-05-10,PORTSMOUTH POLICE DEPARTMENT
4,7422569,21-2001-01639,3908725,1,LARCENY UNDER $500/DOMESTIC,2001-06-27,2001-07-12,PORTSMOUTH POLICE DEPARTMENT


## Sub-step 1: Filter to cases post-2000

In [5]:
date_filtered_case_nums = set(
    [
        case[1]['Case No']
        for case in crim_charge.iterrows()
        if case[1]['Offense Date'] > pd.Timestamp('2000-01-01 00:00:00')
    ]
)
len(date_filtered_case_nums)

2638

## Sub-step 2: Filter to cases with some elder abuse charge

In [6]:
def is_elder_case(charge):
    elder_charge_keywords = ('elder', '60', '65')
    if any(kw in charge[1]['Offense Description'].lower() for kw in elder_charge_keywords):
        return True
    return False

In [12]:
elder_case_nums = set(
    [charge[1]['Case No'] for charge in crim_charge.iterrows() if is_elder_case(charge)]
)

2006


## Sub-step 3: Combine filters

In [8]:
filtered_charges = [
    charge[1] for charge in crim_charge.iterrows()
    if charge[1]['Case No'] in elder_case_nums
    and charge[1]['Case No'] in date_filtered_case_nums
]

In [9]:
to_write = pd.DataFrame(filtered_charges)

## Sub-step 4: Write data to `steps` folder

In [11]:
writer = pd.ExcelWriter('../data/steps/Step1Output_charges.xlsx')
to_write.to_excel(writer,'Sheet1')
writer.save()