In [1]:
import json
import gspread
from oauth2client.client import SignedJwtAssertionCredentials
import pandas as pd
pd.options.mode.chained_assignment = None
import numpy as np
import re
from datetime import date

### Import relevant tables from DB

In [2]:
import psycopg2
from config import config

params = config()
conn = psycopg2.connect(**params)
cur = conn.cursor()
cur.execute(
    "SELECT * FROM dataentry_cifnepal as CIF inner join dataentry_personboxnepal as PB on CIF.id = PB.cif_id;")
colnames = [desc[0] for desc in cur.description]
rows = cur.fetchall()
dfcif = pd.DataFrame(rows, columns = colnames)
cur.execute(
    "SELECT * FROM public.dataentry_person as p inner join dataentry_cifnepal as CIF on p.id = CIF.main_pv_id;")
colnames = [desc[0] for desc in cur.description]
rows = cur.fetchall()
dfv = pd.DataFrame(rows, columns = colnames)
cur.execute(
    "SELECT * FROM public.dataentry_personboxnepal as pb inner join public.dataentry_person as p on pb.person_id = p.id;")
colnames = [desc[0] for desc in cur.description]
rows = cur.fetchall()
dfs = pd.DataFrame(rows, columns = colnames)
cur.execute(
    "SELECT * FROM public.dataentry_address1 as ad1 inner join public.dataentry_address2 as ad2 on ad1.id = ad2.address1_id;")
colnames = [desc[0] for desc in cur.description]
rows = cur.fetchall()
add = pd.DataFrame(rows, columns = colnames)
cur.close()

### Subset data from CIFs and create suspect and victim IDs

In [3]:
dfcif = dfcif[['cif_number','person_id','pb_number']]

add = add.iloc[:,[1,6,7]]
acols = ['address_1','address2_id','address_2']
add.columns = acols

dfv.infer_objects
dfv['address1_id'] = dfv['address1_id'].fillna(0).astype(int)
dfv['address2_id'] = dfv['address2_id'].fillna(0).astype(int)
dfv = pd.merge(dfv, add, how='left',on='address2_id')
dfv['Address'] = dfv['address_2'].map(str) + ", " + dfv['address_1']
CIF_Victims = dfv[['cif_number','full_name','phone_contact','Address']]

dfs.infer_objects
dfs['address1_id'] = dfs['address1_id'].fillna(0).astype(int)
dfs['address2_id'] = dfs['address2_id'].fillna(0).astype(int)
dfs = pd.merge(dfs, add, how='left',on='address2_id')
dfs['Address'] = dfs['address_2'].map(str) + ", " + dfs['address_1']
dfs = dfs[['person_id','full_name','phone_contact','Address']]
CIF_Suspects = pd.merge(dfs, dfcif, how='outer',on='person_id', sort=True,
         suffixes=('x', 'y'), copy=True)
CIF_Suspects.loc[:,'pb_number']=CIF_Suspects['pb_number'].fillna(0).astype(int)
CIF_Suspects.loc[:,'Suspect_ID'] = CIF_Suspects.loc[:,'cif_number'].str.replace('.','')
CIF_Suspects.loc[:,'Suspect_ID'] = CIF_Suspects.loc[:,'Suspect_ID'].str[:-1] + ".PB" + CIF_Suspects['pb_number'].map(str)
CIF_Suspects = CIF_Suspects.drop_duplicates(subset='Suspect_ID')

CIF_Victims.loc[:,'Victim_ID'] = CIF_Victims['cif_number']
replacements = {
   'Victim_ID': {
      r'(\.1|A$)': '.V1',r'B$': '.V2',r'C$': '.V3', r'D$': '.V4',r'E$': '.V5',
      r'F$': '.V6',r'G$': '.V7',r'H$': '.V8',r'I$': '.V9',r'J$': '.V10'}
}
CIF_Victims.replace(replacements, regex=True, inplace=True)
CIF_Victims.sort_values('full_name',inplace=True)
CIF_Victims = CIF_Victims.drop_duplicates(subset='Victim_ID')
non_blanks = CIF_Victims['full_name'] != ""
CIF_Victims = CIF_Victims[non_blanks]

CIF_Suspects.loc[:,'cif_number'] = CIF_Suspects['cif_number'].str.replace('.','')
CIF_Victims['cif_number'] = CIF_Victims['cif_number'].str.replace('.','')
CIF_Victims['cif_number'] = CIF_Victims['cif_number'].str[:-1]
CIF_Suspects['cif_number'] = CIF_Suspects['cif_number'].str[:-1]

### Get current Case Dispatcher data from Google Sheets

In [20]:
# Getting latest CD data from Google Sheets

json_key = json.load(open('creds.json'))
scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']

credentials = SignedJwtAssertionCredentials(json_key['client_email'], json_key['private_key'].encode(), scope)

file = gspread.authorize(credentials)
CD = file.open("Case Dispatcher 2.0")

In [21]:
def Get_Sheet_Names(GS):
    """Returns a list of all the sheet names in a Google spreadsheet."""
    names = []
    for s in GS.worksheets():
        sheet_name = re.findall(r"'(.*?)'",str(s))
        names.append(''.join(sheet_name))
    return names

Sheet_names = Get_Sheet_Names(CD)

# Convert each sheet into a dataframe and set first row as header

#def Sheets_to_DFs(GSN,WS)
d={}
for sn in Sheet_names:
    d['{0}_GS'.format(sn)]=pd.DataFrame(CD.worksheet(sn).get_all_values())
for x, df in d.items():
    df.columns = df.iloc[0]
for x, df in d.items():  
    df.drop(0, inplace=True)
locals().update(d)

In [22]:
# Add new cases from CIFs to CD data

New_Suspects = CIF_Suspects.iloc[:,[1,2,3,4,6]]
New_Suspects.rename(columns = {
    'full_name':'Name','phone_contact': 'Phone_Number(s)','cif_number': 'Case_ID'},
           inplace=True)
New_Police = New_Suspects
New_Suspects = New_Suspects.reindex( columns = New_Suspects.columns.tolist() + list(Suspects_GS.columns))
New_Suspects = New_Suspects.iloc[:,5:len(New_Suspects.columns)]
Suspects = pd.concat([Suspects_GS,New_Suspects])
Suspects = Suspects.drop_duplicates(subset='Suspect_ID')

New_Victims = CIF_Victims
vcols = ['Case_ID','Name','Phone_Number(s)','Address','Victim_ID']
New_Victims.columns = vcols
New_Victims = New_Victims.reindex( columns = New_Victims.columns.tolist() + list(Victims_GS.columns))
New_Victims = New_Victims.iloc[:,5:len(New_Victims.columns)]
Victims = pd.concat([Victims_GS,New_Victims])
Victims = Victims.drop_duplicates(subset='Victim_ID')

New_Police.rename(columns = {'Name': 'Suspect_Name'})
New_Police = New_Police.reindex( columns = New_Police.columns.tolist() + list(Police_GS.columns))
New_Police = New_Police.iloc[:,5:len(New_Police.columns)]
Police = pd.concat([Police_GS,New_Police])
Police = Police.drop_duplicates(subset='Suspect_ID')

### Move closed cases to closed sheets

In [23]:
Closed_Suspects = Suspects[(Suspects.Case_Status.str.contains("Closed", na=False))]
Closed_Victims = Victims[Victims.Case_Status.str.contains("Closed", na=False)]
Closed_Police = Police[Police.Case_Status.str.contains("Closed", na=False)]

Closed_Cases = [Closed_Suspects,Closed_Victims,Closed_Police]

def add_cdate_var(Sheets):
    """Adds a variable with the current date to the end of each dataframe in a list."""
    today = date.today()
    for sheet in Sheets:
        if len(sheet)>0:
            sheet.loc[:,'Date_Closed'] = today.strftime("%m/%d/%Y")
        else:
            sheet['Date_Closed'] = ""

add_cdate_var(Closed_Cases)

Closed_Sus = pd.concat([Closed_Sus_GS,Closed_Suspects], sort=False)
Closed_Pol = pd.concat([Closed_Pol_GS,Closed_Police], sort=False)
Closed_Vic = pd.concat([Closed_Vic_GS,Closed_Victims], sort=False)

# Next Step: Remove from Active Sheets
Suspects = Suspects[~Suspects.Suspect_ID.isin(Closed_Suspects.Suspect_ID)]
Police = Police[~Police.Suspect_ID.isin(Closed_Police.Suspect_ID)]
Victims = Victims[~Victims.Victim_ID.isin(Closed_Victims.Victim_ID)]

Closed_Suspects = Suspects[(Suspects.Suspect_ID.isin(Closed_Police.Suspect_ID)) |
                            (~Suspects.Case_ID.isin(Victims.Case_ID))]
Closed_Police = Police[(Police.Suspect_ID.isin(Closed_Suspects.Suspect_ID)) |
                            (~Police.Case_ID.isin(Victims.Case_ID))]
Closed_Victims = Victims[(~Victims.Case_ID.isin(Police.Case_ID)) |
                           (~Victims.Case_ID.isin(Suspects.Case_ID))]

Closed_Cases = [Closed_Suspects,Closed_Victims,Closed_Police]
add_cdate_var(Closed_Cases)

Closed_Sus = pd.concat([Closed_Sus,Closed_Suspects], sort=False).drop_duplicates(subset='Suspect_ID')
Closed_Pol = pd.concat([Closed_Pol,Closed_Police], sort=False).drop_duplicates(subset='Suspect_ID')
Closed_Vic = pd.concat([Closed_Vic,Closed_Victims], sort=False).drop_duplicates(subset='Victim_ID')

Suspects = Suspects[~Suspects.Suspect_ID.isin(Closed_Suspects.Suspect_ID)]
Police = Police[~Police.Suspect_ID.isin(Closed_Police.Suspect_ID)]
Victims = Victims[~Victims.Victim_ID.isin(Closed_Victims.Victim_ID)]

### Calculate Case Priority

In [24]:
Victims['Willing_to_Testify'] = Victims.Name[Victims.Case_Status.str.contains("Step Complete", na=False)]

Vics_Willing = Victims[pd.notnull(Victims['Willing_to_Testify'])]
Vics_Willing = Vics_Willing.groupby(
    'Case_ID',sort=False)['Willing_to_Testify'].apply(lambda x: ', '.join(x.astype(str)))

Police = pd.merge(Police, Vics_Willing, how='left',on='Case_ID')
Police.Victims_Willing_to_Testify = Police['Willing_to_Testify']
Police.drop(columns=['Willing_to_Testify'], inplace=True)

Weight_Victims_Willing = Parameters_GS.iloc[0,1]
Weight_Bio_and_Location = Parameters_GS.iloc[1,1]
Weight_Other_Suspects = Parameters_GS.iloc[2,1]
Weight_Police_Willing = Parameters_GS.iloc[3,1]
Weight_Recency_of_Case = Parameters_GS.iloc[4,1]

Weight_Emience = Parameters_GS.iloc[0,5]
Weight_Solvability = Parameters_GS.iloc[1,5]
Weight_Strength_of_Case = Parameters_GS.iloc[2,5]

In [25]:
## Organize Arrest data from Case Dispatcher

Arrests = pd.DataFrame(Arrests_GS)
Arrests.infer_objects()
Arrests['Outcome (Arrest)'] = Arrests['Outcome (Arrest)'].fillna(0).astype(int)
Arrests = Arrests.loc[Arrests['Outcome (Arrest)'] == 1]

PBs = []
for n in range(1,8):
    PBs.append('PB' + str(n))
    
for PB in PBs:
    Arrests[PB + '_ID'] = Arrests['IRF#'] + '.' + PB

dpb={}
for PB in PBs:
    cnames = [col for col in Arrests.columns if PB in col]
    dpb['df{0}'.format(PB)]=pd.DataFrame(Arrests[cnames])
newcn = ['Name','Arrested','Arrest_Date','PB_ID']
locals().update(dpb)

df_list = [dfPB1,dfPB2,dfPB3,dfPB4,dfPB5,dfPB6,dfPB7]
for i, df in enumerate(df_list, 1):
    df.columns = ['Name','Arrested','Arrest_Date','PB_ID']
    
dfPBAll = pd.concat(df_list)

Arrests = dfPBAll[dfPBAll['Arrested'].str.contains("Yes")]