In [None]:
import pandas as pd
import sqlite3

Import full census block group data, generate one row per 3 h block group per day per census line

In [None]:
df_census = pd.read_csv("DC-Criminalistics/data/census-data/FinalBlockGroupData.csv")

In [None]:
start_year = '2013'
end_year = '2017'


df_time = pd.DataFrame({
    'date': pd.date_range(
        start = pd.Timestamp(start_year),
        end = pd.Timestamp(end_year) + pd.offsets.YearEnd(0) + pd.Timedelta(days=1),
        freq = '3h',
        closed = 'left'
    )
})

Add columns for month/day/year/3-hr time block category column

In [None]:
df_time['date'] = pd.to_datetime(df_time['date'])
df_time['year'] = df_time['date'].dt.year
df_time['month'] = df_time['date'].dt.month
df_time['day'] = df_time['date'].dt.day
df_time['time_block'] = pd.cut(df_time['date'].dt.hour,8,labels=['12am-3am','3am-6am','6am-9am','9am-12pm','12pm-3pm','3pm-6pm','6pm-9pm','9pm-12am'],include_lowest=True)

In [None]:
df_time.head(10)

Merge with census on Year

In [None]:
full_date_time_census = df_time.merge(df_census[['Tract','BlockGroup','Year']], left_on='year', right_on='Year')

In [None]:
full_date_time_census.head(10)

In [None]:
full_date_time_census.count()

Import Census/Crime/Weather to figure out how to merge

In [None]:
conn = sqlite3.connect('DC-Criminalistics_06012019/data/crime_census_weather.db')
c = conn.cursor()
df = pd.read_sql('''select * from crime_census_weather where year >=2013 and year <= 2017''', conn)

In [None]:
df.head()

In [None]:
list(df)

Add date/time/3-hr time fields to crime/census/weather df

In [None]:
df['START_DATE'] = pd.to_datetime(df['START_DATE'])

df['START_YEAR'] = df['START_DATE'].dt.year
df['START_MONTH'] = df['START_DATE'].dt.month
df['START_DAY'] = df['START_DATE'].dt.day

df['START_TIME_CATEGORY'] = pd.cut(df['START_DATE'].dt.hour,8,labels=['12am-3am','3am-6am','6am-9am','9am-12pm','12pm-3pm','3pm-6pm','6pm-9pm','9pm-12am'],include_lowest=True)

In [None]:
df.head()

Need to update Block Group field in census/date/time df so it will join correctly to crime/weather/census

In [None]:
full_date_time_census['Tract'] = full_date_time_census['Tract'].apply(str)

full_date_time_census['Tract'] = full_date_time_census['Tract'].apply(lambda x: x.zfill(6))

In [None]:
full_date_time_census['BlockGroup_clean'] = full_date_time_census['Tract'] + " " + full_date_time_census['BlockGroup']

In [None]:
full_date_time_census.head()

Joining crime/weather to full date/timeblock data

In [None]:
time_blocks_and_crime = full_date_time_census.merge(df, how='left', left_on=['year','month','day','time_block','BlockGroup_clean'], right_on=['START_YEAR','START_MONTH','START_DAY','START_TIME_CATEGORY','BLOCK_GROUP'])

In [None]:
time_blocks_and_crime.head()

In [None]:
no_crime_time_census = time_blocks_and_crime[time_blocks_and_crime['OCTO_RECORD_ID'].isna()]

In [None]:
no_crime_time_census.count()