# Dataframe Generation for Model Input
* Author: Callie Clark
* Date updated: 4/3/2021

In [1]:
import pandas as pd
import requests
import datetime as dt
import numpy as np

import dataframe_generation_files as gen

## Format Incident Data

In [3]:
df=gen.format_file('Chicago_Data/Crimes_-_2014.zip','Crimes_-_2014.csv',timezone="America/Chicago")
df.head()

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Community Area,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location,Timestamp
0,9470797,HX123824,2014-01-22,031XX S ASHLAND AVE,460,BATTERY,SIMPLE,RESTAURANT,True,False,...,59,08B,1166178.0,1883833.0,2014,05/24/2017 03:50:24 PM,41.836816,-87.665724,"(41.836816162, -87.665724279)",2014-01-22 16:02:00-06:00
5,9729405,HX379028,2014-08-08,044XX W JACKSON BLVD,4651,OTHER OFFENSE,SEX OFFENDER: FAIL REG NEW ADD,RESIDENCE,True,False,...,26,26,1146739.0,1898315.0,2014,05/21/2017 03:53:28 PM,41.876949,-87.736685,"(41.876949072, -87.736685059)",2014-08-08 10:50:00-05:00
6,9510600,HX165568,2014-02-26,037XX N MONTICELLO AVE,610,BURGLARY,FORCIBLE ENTRY,RESIDENCE,False,False,...,16,05,1151395.0,1924539.0,2014,05/19/2017 03:49:49 PM,41.94882,-87.7189,"(41.948820056, -87.718899701)",2014-02-26 07:40:00-06:00
12,9539289,HX192580,2014-03-20,111XX S STATE ST,460,BATTERY,SIMPLE,GAS STATION,True,False,...,49,08B,1178229.0,1831287.0,2014,05/15/2017 03:51:52 PM,41.692359,-87.623097,"(41.692358646, -87.623096746)",2014-03-20 19:25:00-05:00
15,9801642,HX450761,2014-09-13,070XX S MERRILL AVE,1725,OFFENSE INVOLVING CHILDREN,CONTRIBUTE CRIM DELINQUENCY JUVENILE,OTHER,True,True,...,43,20,1191762.0,1858706.0,2014,05/09/2017 03:51:05 PM,41.767282,-87.572664,"(41.767282129, -87.572663824)",2014-09-13 14:00:00-05:00


In [4]:
gen.check_lat_long(df)

Data has Latitude and Longitude


In [5]:
violent_crime=["01A",'02','03','04A','04B']
property_crime=['05','06','07','09']
index_crime=["01A",'02','03','04A','04B','05','06','07','09']
nonindex_crime=['01B', '08A', '08B','10','11','12','13','14','15','16','17','18','19','20','22','24','26']
nonviolent_crime=nonindex_crime+property_crime
    
df=gen.classify_response_type(df, crime_responder='Police', crime_types=index_crime, UCRcol='FBI Code' )
#TODO may want to indicate the crime type in csv name
df.head()

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,FBI Code,X Coordinate,Y Coordinate,Year,Updated On,Latitude,Longitude,Location,Timestamp,Police
0,9470797,HX123824,2014-01-22,031XX S ASHLAND AVE,460,BATTERY,SIMPLE,RESTAURANT,True,False,...,08B,1166178.0,1883833.0,2014,05/24/2017 03:50:24 PM,41.836816,-87.665724,"(41.836816162, -87.665724279)",2014-01-22 16:02:00-06:00,0
5,9729405,HX379028,2014-08-08,044XX W JACKSON BLVD,4651,OTHER OFFENSE,SEX OFFENDER: FAIL REG NEW ADD,RESIDENCE,True,False,...,26,1146739.0,1898315.0,2014,05/21/2017 03:53:28 PM,41.876949,-87.736685,"(41.876949072, -87.736685059)",2014-08-08 10:50:00-05:00,0
6,9510600,HX165568,2014-02-26,037XX N MONTICELLO AVE,610,BURGLARY,FORCIBLE ENTRY,RESIDENCE,False,False,...,05,1151395.0,1924539.0,2014,05/19/2017 03:49:49 PM,41.94882,-87.7189,"(41.948820056, -87.718899701)",2014-02-26 07:40:00-06:00,1
12,9539289,HX192580,2014-03-20,111XX S STATE ST,460,BATTERY,SIMPLE,GAS STATION,True,False,...,08B,1178229.0,1831287.0,2014,05/15/2017 03:51:52 PM,41.692359,-87.623097,"(41.692358646, -87.623096746)",2014-03-20 19:25:00-05:00,0
15,9801642,HX450761,2014-09-13,070XX S MERRILL AVE,1725,OFFENSE INVOLVING CHILDREN,CONTRIBUTE CRIM DELINQUENCY JUVENILE,OTHER,True,True,...,20,1191762.0,1858706.0,2014,05/09/2017 03:51:05 PM,41.767282,-87.572664,"(41.767282129, -87.572663824)",2014-09-13 14:00:00-05:00,0


In [6]:
df=gen.set_priority(df)
df=gen.set_number_responders(df,number_per_call=2)

In [7]:
df=gen.log_norm_service_time(df, average_time=23.2,s=0.18)

Stats of dist: min 9.149819917224818 max 52.67560394940913 mean 23.57975622245566


## Create Seasonal Profiles
*  Crime frequency and type varies by season

In [8]:
season_map={}
df=gen.create_seasons(df,season_map)
df=gen.calculate_frequency(df)
df.head()

Unnamed: 0,ID,Case Number,Date,Block,IUCR,Primary Type,Description,Location Description,Arrest,Domestic,...,Latitude,Longitude,Location,Timestamp,Police,Priority,#Responders,Service Time,season,Frequency
0,9470797,HX123824,2014-01-22,031XX S ASHLAND AVE,460,BATTERY,SIMPLE,RESTAURANT,True,False,...,41.836816,-87.665724,"(41.836816162, -87.665724279)",2014-01-22 16:02:00-06:00,0,3,2,0 days 00:19:00,winter,622
5,9729405,HX379028,2014-08-08,044XX W JACKSON BLVD,4651,OTHER OFFENSE,SEX OFFENDER: FAIL REG NEW ADD,RESIDENCE,True,False,...,41.876949,-87.736685,"(41.876949072, -87.736685059)",2014-08-08 10:50:00-05:00,0,3,2,0 days 00:23:00,summer,807
6,9510600,HX165568,2014-02-26,037XX N MONTICELLO AVE,610,BURGLARY,FORCIBLE ENTRY,RESIDENCE,False,False,...,41.94882,-87.7189,"(41.948820056, -87.718899701)",2014-02-26 07:40:00-06:00,1,2,2,0 days 00:33:00,winter,659
12,9539289,HX192580,2014-03-20,111XX S STATE ST,460,BATTERY,SIMPLE,GAS STATION,True,False,...,41.692359,-87.623097,"(41.692358646, -87.623096746)",2014-03-20 19:25:00-05:00,0,3,2,0 days 00:24:00,spring,771
15,9801642,HX450761,2014-09-13,070XX S MERRILL AVE,1725,OFFENSE INVOLVING CHILDREN,CONTRIBUTE CRIM DELINQUENCY JUVENILE,OTHER,True,True,...,41.767282,-87.572664,"(41.767282129, -87.572663824)",2014-09-13 14:00:00-05:00,0,3,2,0 days 00:23:00,summer,665


In [9]:
seasonal_profiles={}
seasonal_profiles=gen.generate_seasonal_profiles(df,seasonal_profiles,quantile_list=[0.5],iterations=1, number_profiles=1)
seasonal_profiles


{'summer_0.5': [datetime.date(2014, 7, 13)],
 'fall_0.5': [datetime.date(2014, 10, 22)],
 'spring_0.5': [datetime.date(2014, 3, 29)],
 'winter_0.5': [datetime.date(2014, 2, 9)],
 'summer_worst_case': [datetime.date(2014, 8, 1)],
 'fall_worst_case': [datetime.date(2014, 10, 1)],
 'spring_worst_case': [datetime.date(2014, 6, 1)],
 'winter_worst_case': [datetime.date(2014, 1, 1)]}

In [10]:
daily_profiles={}
gen.quantile_daily_profiles(df, daily_profiles,quantiles=[0.25,0.50,0.75,0.95],iterations=1)
gen.worst_case_profiles(df, daily_profiles,number_profiles=1)
daily_profiles

{'all_data_0.25': [datetime.date(2014, 3, 24)],
 'all_data_0.5': [datetime.date(2014, 5, 7)],
 'all_data_0.75': [datetime.date(2014, 6, 28)],
 'all_data_0.95': [datetime.date(2014, 7, 27)],
 'all_data_worst_case': [datetime.date(2014, 8, 1)]}

## Generate Incident Profiles and send to csvs

In [11]:
gen.create_csv(daily_profiles,df,folder='Chicago_Data')

In [12]:
gen.create_csv(seasonal_profiles,df,folder='Chicago_Data')