# Aggregating crime data

`crime_agg_category.csv`: 32 columns (crime counts broken down by offense **category**)

`crime_agg_name.csv`: 54 columns (crime counts broken down by offense **name**)

### Creates two aggregated datasets. Both include:
- First, ALL values in `crime_against`, `offense_name`, and `offense_category` are first renamed to shorter alternatives in *snake_case*. This was done in preparation for dummifying those columns, to make for friendly column names.
- Data is grouped by year and county, and include the following aggregated columns:
  - Crime count
  - Average age
  - Mode quarter (which quarter had the most crimes?)
  - Mode month (which month had the most crimes?)
  - Mode day of week (mon-fri => 1-7)
  - Mode hour of day (military time)
  - The original `crime_against` column was dummified, and summed during aggregation, to show total crime counts for each:
    - `against_person`
    - `against_society`
    - `against_property`
    - `not_a_crime`

#### Additional columns in `crime_agg_category` data:
- The original `offense_category` column was dummified, and then summed during aggregation, showing total crime counts broken up by offense category

#### Additional columns in `crime_agg_name` data:
- The original `offense_name` column was dummified, and then summed during aggregation, showing total crime counts broken up by offense name.

In [1]:
import crime as cr
import pandas as pd, numpy as np

df_raw = pd.read_csv("output/all.csv")
geo = pd.read_csv('../geo/output/geo_county_school.csv').rename(columns={'geo_county':'geo'})
geo = geo.groupby('county').agg('first').reset_index()[['county', 'geo']]
pop = pd.read_csv('../county_stats/output/county_population.csv')
pop = pop[pop.year >= 1997]
pop = pop[pop.year <= 2019].reset_index(drop=True)
pop = pop[['year', 'county', 'total']].rename(columns={'total':'pop'})
display(df_raw.shape, df_raw.head(3), geo.shape, geo.head(3), pop.shape, pop.head(3))

(6770666, 11)

Unnamed: 0,year,county,date,quarter,month,day_of_week,hour,age,crime_against,offense_name,offense_category
0,1997,BOULDER,1997-03-14,1,3,4,,15.0,Person,Fondling,Sex Offenses
1,1997,BOULDER,1997-07-02,3,7,2,21.0,14.0,Property,Arson,Arson
2,1997,KIT CARSON,1997-01-20,1,1,0,22.0,58.0,Person,Simple Assault,Assault Offenses


(63, 2)

Unnamed: 0,county,geo
0,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...
1,ALAMOSA,MULTIPOLYGON (((-105.59917426201822 37.7521648...
2,ARAPAHOE,MULTIPOLYGON (((-103.70653410023402 39.7398580...


(1472, 3)

Unnamed: 0,year,county,pop
0,1997.0,ADAMS,332750.0
1,1997.0,ALAMOSA,14478.0
2,1997.0,ARAPAHOE,459061.0


#### Remap all values in categorical columns based on excel sheet
- We created 3 tables by hand in excel to rename EACH value in offense_name, offense_category and crime_against. This needed to be done in order to create dummy columns with friendly names.

In [2]:
xl = pd.ExcelFile("crime_renaming_map.xlsx")
name = pd.read_excel(xl, sheet_name='offense_name')
cat = pd.read_excel(xl, sheet_name='offense_category')
against = pd.read_excel(xl, sheet_name='crime_against')

df = df_raw.copy()
# Create dict from 2 cols from excel file, and pass it to series.map()
df.offense_name = df.offense_name.map(dict(zip(name.OLD, name.NEW)))
df.offense_category = df.offense_category.map(dict(zip(cat.OLD, cat.NEW)))
df.crime_against = df.crime_against.map(dict(zip(against.OLD, against.NEW)))
df_refactored = df
display(name, cat, against, df_refactored)

Unnamed: 0,OLD,NEW
0,Simple Assault,assault_simple
1,Intimidation,intimidation
2,Fondling,fondling
3,Rape,rape
4,Impersonation,impersonation
5,Robbery,robbery
6,Arson,arson
7,Destruction/Damage/Vandalism of Property,property_damage
8,Theft From Motor Vehicle,theft_from_vehicle
9,Burglary/Breaking & Entering,burglary


Unnamed: 0,OLD,NEW
0,Assault Offenses,assault
1,Sex Offenses,sex_offense
2,Fraud Offenses,fraud
3,Robbery,robbery
4,Arson,arson
5,Destruction/Damage/Vandalism of Property,property_damage
6,Larceny/Theft Offenses,larceny_theft
7,Burglary/Breaking & Entering,burglary
8,Homicide Offenses,homicide
9,Drug/Narcotic Offenses,drug


Unnamed: 0,OLD,NEW
0,Person,against_person
1,Property,against_property
2,Society,against_society
3,Not a Crime,not_a_crime


Unnamed: 0,year,county,date,quarter,month,day_of_week,hour,age,crime_against,offense_name,offense_category
0,1997,BOULDER,1997-03-14,1,3,4,,15.0,against_person,fondling,sex_offense
1,1997,BOULDER,1997-07-02,3,7,2,21.0,14.0,against_property,arson,arson
2,1997,KIT CARSON,1997-01-20,1,1,0,22.0,58.0,against_person,assault_simple,assault
3,1997,KIT CARSON,1997-01-18,1,1,5,,21.0,against_property,other_larceny,larceny_theft
4,1997,KIT CARSON,1997-03-31,1,3,0,,,against_property,property_damage,property_damage
...,...,...,...,...,...,...,...,...,...,...,...
6770661,2019,BOULDER,2019-10-16,4,10,2,21.0,,against_property,other_larceny,larceny_theft
6770662,2019,BOULDER,2019-10-16,4,10,2,21.0,,against_property,other_larceny,larceny_theft
6770663,2019,ARAPAHOE,2019-06-01,2,6,5,18.0,20.0,against_property,shoplifting,larceny_theft
6770664,2019,ADAMS,2019-01-21,1,1,0,12.0,15.0,against_property,property_damage,property_damage


### Aggregated datasets
1. Version 1: includes crime_category dummy sums
2. Version 2: includes crime_name dummy sums

In [3]:
INDEX = ['year', 'county']
df = df_refactored.copy()

def dummies_special(df, include, exclude) -> pd.DataFrame:
    return pd.get_dummies(df,
            columns=['crime_against', include],
            prefix="", prefix_sep=""
        ).drop(
            columns=[exclude, 'date', 'quarter', 'month', 'day_of_week', 'hour', 'age']
        ).groupby(INDEX).sum().reset_index()

dum_cat = dummies_special(df, 'offense_category', 'offense_name')
dum_name = dummies_special(df, 'offense_name', 'offense_category')

In [4]:
# Convert these to modes
df_modes = df[INDEX + ['quarter', 'month', 'day_of_week', 'hour']]

# For the record, pandas.Series.mode() totally sucks!!! When there's multiple modes
# it puts each of them in a numpy.ndarray as a VALUE in the cell, so you have mixed values.
# And you can't even safely index it because sometimes those arrays are EMPTY :(
# Pandas, for the love of god please give us the option to return only one mode.
# I applied the following function to fix this.
def first_in_list(x):
    """ pd.Series.mode returns ndarray when multiple modes. Safely convert to float """
    if type(x) == np.ndarray:
        if x.size > 0:
            return float(x[0])
        return np.nan
    return float(x)

# df.applymap() is just like apply but instead of acting on an axis, it acts on each cell in df
df_modes = df_modes.groupby(INDEX).agg(pd.Series.mode).applymap(first_in_list).reset_index()

# Append '_mode' to the end of each col name
for c in df_modes.columns:
    if c not in INDEX:
        df_modes = df_modes.rename(columns={c: f'{c}_mode'})

In [5]:
# Convert count and average
df_count = df[INDEX + ['date']].groupby(INDEX).count().reset_index().rename(columns={'date': 'cr_count'})

df_avg = df[INDEX + ['age']].groupby(INDEX).mean().reset_index().rename(columns={'age': 'age_avg'})

#### Stitching everything together

In [6]:
# Numerical aggregations: counts, avgs, modes
df = df_count.merge(df_avg, on=INDEX)
df = df.merge(df_modes, on=INDEX)

In [7]:
# Summed aggregations for dummies: 2 versions
df_cat = df.merge(dum_cat, on=INDEX)
df_cat = df_cat.merge(pop, on=['year', 'county'])
df_cat.insert(2, 'pop', df_cat.pop('pop'))

df_name = df.merge(dum_name, on=INDEX)
df_name = df_name.merge(pop, on=['year', 'county'])
df_name.insert(2, 'pop', df_name.pop('pop'))

### Add geo

In [8]:
INDEX = ['year', 'county', 'geo']
df_cat = df_cat.merge(geo, on='county')
df_cat.insert(2, 'geo', df_cat.pop('geo'))

df_name = df_name.merge(geo, on='county')
df_name.insert(2, 'geo', df_name.pop('geo'))

BASE_COLS = ['pop', 'cr_count', 'age_avg', 'quarter_mode', 'month_mode', 'day_of_week_mode', 'hour_mode', 'against_person', 'against_property', 'against_society', 'not_a_crime']

df_base = df_cat.copy()[INDEX + BASE_COLS]
df_base.insert(5, 'cr_rate', df_base.cr_count / df_base['pop'] * 100_000)
for i in ['against_person', 'against_property', 'against_society']:
    df_base[f'{i}_rate'] = df_base[i] / df_base['pop'] * 100_000

df_cat = df_cat.drop(columns=BASE_COLS)
df_name = df_name.drop(columns=BASE_COLS)

display(df_base.shape, df_base.head(3), df_cat.shape, df_cat.head(3), df_name.shape, df_name.head(3))

(1378, 18)

Unnamed: 0,year,county,geo,pop,cr_count,cr_rate,age_avg,quarter_mode,month_mode,day_of_week_mode,hour_mode,against_person,against_property,against_society,not_a_crime,against_person_rate,against_property_rate,against_society_rate
0,1997,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,332750.0,22947,6896.168295,24.582071,1.0,3.0,0.0,17.0,3047.0,17766.0,2134.0,0.0,915.702479,5339.143501,641.322314
1,1998,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,344025.0,26748,7775.016351,24.822389,2.0,5.0,1.0,17.0,3457.0,21065.0,2224.0,2.0,1004.868832,6123.101519,646.464646
2,1999,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,355309.0,24733,6960.983257,25.124482,3.0,8.0,0.0,18.0,2902.0,19315.0,2513.0,3.0,816.753868,5436.113355,707.271699


(1378, 23)

Unnamed: 0,year,county,geo,arson,assault,bribery,burglary,drug,embezzlement,extortion,...,kidnapping,larceny_theft,porn,property_damage,prostitution,robbery,sex_offense,stolen_property,vehicle_theft,weapon_law
0,1997,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,86.0,2672.0,2.0,1931.0,1845.0,26.0,5.0,...,50.0,8023.0,1.0,5467.0,14.0,189.0,316.0,245.0,1317.0,274.0
1,1998,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,97.0,3019.0,1.0,2444.0,1875.0,29.0,3.0,...,54.0,9258.0,0.0,6301.0,15.0,273.0,377.0,272.0,1575.0,334.0
2,1999,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,97.0,2510.0,1.0,2070.0,2163.0,52.0,2.0,...,47.0,9064.0,0.0,5453.0,14.0,190.0,330.0,276.0,1438.0,336.0


(1378, 45)

Unnamed: 0,year,county,geo,arson,assault_aggravated,assault_simple,bribery,burglary,credit_card_machine_fraud,drug_equipment,...,shoplifting,sodomy,stolen_property,theft_from_building,theft_from_vehicle,theft_from_vending_machine,vehicle_part_theft,vehicle_theft,weapon_law,wire_fraud
0,1997,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,86.0,602.0,1954.0,2.0,1931.0,44.0,725.0,...,1323.0,3.0,245.0,1232.0,1733.0,53.0,1293.0,1317.0,274.0,0.0
1,1998,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,97.0,735.0,2202.0,1.0,2444.0,120.0,628.0,...,1813.0,3.0,272.0,1215.0,2203.0,38.0,1397.0,1575.0,334.0,1.0
2,1999,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,97.0,584.0,1868.0,1.0,2070.0,104.0,711.0,...,1641.0,2.0,276.0,1190.0,2115.0,57.0,1480.0,1438.0,336.0,2.0


### Calculate rates and separate them

In [9]:
def add_rates(df):
    result = df.copy()[INDEX]
    cols = [c for c in df.columns if c not in df_base.columns]
    for c in cols:
        result[f'{c}_rate'] = df[c] / df_base['pop'] * 100_000
    return result

df_cat_rate = add_rates(df_cat)
df_name_rate = add_rates(df_name)
df_base = df_base.drop(columns='pop')

In [10]:
df_base_rate = df_base[INDEX + ['cr_rate', 'age_avg', 'quarter_mode', 'month_mode', 'day_of_week_mode', 'hour_mode', 'against_person_rate', 'against_property_rate', 'against_society_rate']]
df_base_count = df_base[INDEX + ['cr_count', 'against_person', 'against_property', 'against_society', 'not_a_crime']]

In [11]:
df_base_rate

Unnamed: 0,year,county,geo,cr_rate,age_avg,quarter_mode,month_mode,day_of_week_mode,hour_mode,against_person_rate,against_property_rate,against_society_rate
0,1997,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,6896.168295,24.582071,1.0,3.0,0.0,17.0,915.702479,5339.143501,641.322314
1,1998,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,7775.016351,24.822389,2.0,5.0,1.0,17.0,1004.868832,6123.101519,646.464646
2,1999,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,6960.983257,25.124482,3.0,8.0,0.0,18.0,816.753868,5436.113355,707.271699
3,2000,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,6508.611621,25.131533,1.0,1.0,0.0,17.0,741.185100,5057.230748,710.195773
4,2001,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,6347.634243,24.902283,4.0,10.0,0.0,17.0,745.045779,4873.957061,728.631403
...,...,...,...,...,...,...,...,...,...,...,...,...
1373,2010,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,500.357398,23.000000,3.0,9.0,0.0,3.0,357.398142,71.479628,71.479628
1374,2011,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,434.782609,28.000000,1.0,2.0,2.0,16.0,289.855072,0.000000,144.927536
1375,2012,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,796.524258,35.363636,1.0,1.0,3.0,11.0,434.467777,144.822592,217.233888
1376,2013,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,642.857143,27.666667,2.0,4.0,5.0,20.0,71.428571,285.714286,285.714286


In [12]:
df_base_count

Unnamed: 0,year,county,geo,cr_count,against_person,against_property,against_society,not_a_crime
0,1997,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,22947,3047.0,17766.0,2134.0,0.0
1,1998,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,26748,3457.0,21065.0,2224.0,2.0
2,1999,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,24733,2902.0,19315.0,2513.0,3.0
3,2000,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,22893,2607.0,17788.0,2498.0,0.0
4,2001,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,22816,2678.0,17519.0,2619.0,0.0
...,...,...,...,...,...,...,...,...
1373,2010,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,7,5.0,1.0,1.0,0.0
1374,2011,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,6,4.0,0.0,2.0,0.0
1375,2012,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,11,6.0,2.0,3.0,0.0
1376,2013,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,9,1.0,4.0,4.0,0.0


In [13]:
df_cat

Unnamed: 0,year,county,geo,arson,assault,bribery,burglary,drug,embezzlement,extortion,...,kidnapping,larceny_theft,porn,property_damage,prostitution,robbery,sex_offense,stolen_property,vehicle_theft,weapon_law
0,1997,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,86.0,2672.0,2.0,1931.0,1845.0,26.0,5.0,...,50.0,8023.0,1.0,5467.0,14.0,189.0,316.0,245.0,1317.0,274.0
1,1998,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,97.0,3019.0,1.0,2444.0,1875.0,29.0,3.0,...,54.0,9258.0,0.0,6301.0,15.0,273.0,377.0,272.0,1575.0,334.0
2,1999,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,97.0,2510.0,1.0,2070.0,2163.0,52.0,2.0,...,47.0,9064.0,0.0,5453.0,14.0,190.0,330.0,276.0,1438.0,336.0
3,2000,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,82.0,2291.0,1.0,1752.0,2178.0,29.0,5.0,...,43.0,7893.0,2.0,5426.0,10.0,182.0,264.0,251.0,1451.0,308.0
4,2001,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,122.0,2344.0,0.0,1684.0,2333.0,20.0,4.0,...,58.0,7358.0,2.0,5468.0,33.0,195.0,264.0,317.0,1486.0,251.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1373,2010,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.0,5.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1374,2011,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.0,3.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
1375,2012,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.0,6.0,0.0,0.0,3.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1376,2013,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.0,1.0,0.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
df_cat_rate

Unnamed: 0,year,county,geo,arson_rate,assault_rate,bribery_rate,burglary_rate,drug_rate,embezzlement_rate,extortion_rate,...,kidnapping_rate,larceny_theft_rate,porn_rate,property_damage_rate,prostitution_rate,robbery_rate,sex_offense_rate,stolen_property_rate,vehicle_theft_rate,weapon_law_rate
0,1997,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,25.845229,803.005259,0.601052,580.315552,554.470323,7.813674,1.502630,...,15.026296,2411.119459,0.300526,1642.975207,4.207363,56.799399,94.966191,73.628850,395.792637,82.344102
1,1998,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,28.195625,877.552503,0.290677,710.413487,545.018531,8.429620,0.872030,...,15.696534,2691.083497,0.000000,1831.552939,4.360148,79.354698,109.585059,79.064022,457.815566,97.085968
2,1999,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,27.300181,706.427363,0.281445,582.591491,608.765891,14.635149,0.562890,...,13.227923,2551.018972,0.000000,1534.720483,3.940232,53.474581,92.876904,77.678865,404.718147,94.565575
3,2000,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,23.313072,651.344482,0.284306,498.103681,619.217932,8.244867,1.421529,...,12.225147,2244.025315,0.568612,1542.643020,2.843058,51.743647,75.056719,71.360744,412.527649,87.566172
4,2001,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,33.941593,652.123714,0.000000,468.505262,649.063407,5.564196,1.112839,...,16.136167,2047.067530,0.556420,1521.251054,9.180923,54.250906,73.447381,88.192499,413.419727,69.830654
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1373,2010,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.000000,357.398142,0.000000,71.479628,71.479628,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1374,2011,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.000000,217.391304,0.000000,0.000000,72.463768,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,72.463768,0.000000,0.000000,72.463768
1375,2012,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.000000,434.467777,0.000000,0.000000,217.233888,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,72.411296,0.000000
1376,2013,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.000000,71.428571,0.000000,0.000000,285.714286,0.000000,0.000000,...,0.000000,285.714286,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [15]:
df_name

Unnamed: 0,year,county,geo,arson,assault_aggravated,assault_simple,bribery,burglary,credit_card_machine_fraud,drug_equipment,...,shoplifting,sodomy,stolen_property,theft_from_building,theft_from_vehicle,theft_from_vending_machine,vehicle_part_theft,vehicle_theft,weapon_law,wire_fraud
0,1997,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,86.0,602.0,1954.0,2.0,1931.0,44.0,725.0,...,1323.0,3.0,245.0,1232.0,1733.0,53.0,1293.0,1317.0,274.0,0.0
1,1998,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,97.0,735.0,2202.0,1.0,2444.0,120.0,628.0,...,1813.0,3.0,272.0,1215.0,2203.0,38.0,1397.0,1575.0,334.0,1.0
2,1999,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,97.0,584.0,1868.0,1.0,2070.0,104.0,711.0,...,1641.0,2.0,276.0,1190.0,2115.0,57.0,1480.0,1438.0,336.0,2.0
3,2000,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,82.0,618.0,1591.0,1.0,1752.0,123.0,901.0,...,1264.0,0.0,251.0,919.0,1738.0,41.0,1387.0,1451.0,308.0,1.0
4,2001,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,122.0,634.0,1616.0,0.0,1684.0,94.0,1079.0,...,1030.0,3.0,317.0,654.0,1725.0,29.0,1714.0,1486.0,251.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1373,2010,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.0,0.0,4.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1374,2011,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.0,0.0,3.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1375,2012,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.0,3.0,3.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1376,2013,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0


In [16]:
df_name_rate

Unnamed: 0,year,county,geo,arson_rate,assault_aggravated_rate,assault_simple_rate,bribery_rate,burglary_rate,credit_card_machine_fraud_rate,drug_equipment_rate,...,shoplifting_rate,sodomy_rate,stolen_property_rate,theft_from_building_rate,theft_from_vehicle_rate,theft_from_vending_machine_rate,vehicle_part_theft_rate,vehicle_theft_rate,weapon_law_rate,wire_fraud_rate
0,1997,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,25.845229,180.916604,587.227648,0.601052,580.315552,13.223140,217.881292,...,397.595793,0.901578,73.628850,370.247934,520.811420,15.927874,388.580015,395.792637,82.344102,0.000000
1,1998,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,28.195625,213.647264,640.069762,0.290677,710.413487,34.881186,182.544873,...,526.996585,0.872030,79.064022,353.172008,640.360439,11.045709,406.075140,457.815566,97.085968,0.290677
2,1999,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,27.300181,164.363976,525.739568,0.281445,582.591491,29.270297,200.107512,...,461.851515,0.562890,77.678865,334.919746,595.256523,16.042374,416.538844,404.718147,94.565575,0.562890
3,2000,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,23.313072,175.700956,452.330454,0.284306,498.103681,34.969608,256.159484,...,359.362473,0.000000,71.360744,261.276988,494.123400,11.656536,394.332080,412.527649,87.566172,0.284306
4,2001,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,33.941593,176.384998,449.586998,0.000000,468.505262,26.151719,300.188348,...,286.556069,0.834629,88.192499,181.949193,479.911863,8.068083,476.851556,413.419727,69.830654,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1373,2010,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.000000,0.000000,285.918513,0.000000,71.479628,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
1374,2011,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.000000,0.000000,217.391304,0.000000,0.000000,0.000000,72.463768,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,72.463768,0.000000
1375,2012,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.000000,217.233888,217.233888,0.000000,0.000000,0.000000,72.411296,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,72.411296,0.000000,0.000000
1376,2013,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.000000,0.000000,71.428571,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,71.428571,0.000000,0.000000,71.428571,0.000000,0.000000,0.000000


In [17]:
df_base

Unnamed: 0,year,county,geo,cr_count,cr_rate,age_avg,quarter_mode,month_mode,day_of_week_mode,hour_mode,against_person,against_property,against_society,not_a_crime,against_person_rate,against_property_rate,against_society_rate
0,1997,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,22947,6896.168295,24.582071,1.0,3.0,0.0,17.0,3047.0,17766.0,2134.0,0.0,915.702479,5339.143501,641.322314
1,1998,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,26748,7775.016351,24.822389,2.0,5.0,1.0,17.0,3457.0,21065.0,2224.0,2.0,1004.868832,6123.101519,646.464646
2,1999,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,24733,6960.983257,25.124482,3.0,8.0,0.0,18.0,2902.0,19315.0,2513.0,3.0,816.753868,5436.113355,707.271699
3,2000,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,22893,6508.611621,25.131533,1.0,1.0,0.0,17.0,2607.0,17788.0,2498.0,0.0,741.185100,5057.230748,710.195773
4,2001,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,22816,6347.634243,24.902283,4.0,10.0,0.0,17.0,2678.0,17519.0,2619.0,0.0,745.045779,4873.957061,728.631403
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1373,2010,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,7,500.357398,23.000000,3.0,9.0,0.0,3.0,5.0,1.0,1.0,0.0,357.398142,71.479628,71.479628
1374,2011,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,6,434.782609,28.000000,1.0,2.0,2.0,16.0,4.0,0.0,2.0,0.0,289.855072,0.000000,144.927536
1375,2012,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,11,796.524258,35.363636,1.0,1.0,3.0,11.0,6.0,2.0,3.0,0.0,434.467777,144.822592,217.233888
1376,2013,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,9,642.857143,27.666667,2.0,4.0,5.0,20.0,1.0,4.0,4.0,0.0,71.428571,285.714286,285.714286


### Output

In [18]:
df_base_count.to_csv('output/crime_agg_base_count.csv', index=False)
df_base_rate.to_csv('output/crime_agg_base_rate.csv', index=False)
df_cat.to_csv("output/crime_agg_category.csv", index=False)
df_name.to_csv("output/crime_agg_name.csv", index=False)
df_cat_rate.to_csv("output/crime_agg_category_rate.csv", index=False)
df_name_rate.to_csv("output/crime_agg_name_rate.csv", index=False)

In [19]:
df_cat

Unnamed: 0,year,county,geo,arson,assault,bribery,burglary,drug,embezzlement,extortion,...,kidnapping,larceny_theft,porn,property_damage,prostitution,robbery,sex_offense,stolen_property,vehicle_theft,weapon_law
0,1997,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,86.0,2672.0,2.0,1931.0,1845.0,26.0,5.0,...,50.0,8023.0,1.0,5467.0,14.0,189.0,316.0,245.0,1317.0,274.0
1,1998,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,97.0,3019.0,1.0,2444.0,1875.0,29.0,3.0,...,54.0,9258.0,0.0,6301.0,15.0,273.0,377.0,272.0,1575.0,334.0
2,1999,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,97.0,2510.0,1.0,2070.0,2163.0,52.0,2.0,...,47.0,9064.0,0.0,5453.0,14.0,190.0,330.0,276.0,1438.0,336.0
3,2000,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,82.0,2291.0,1.0,1752.0,2178.0,29.0,5.0,...,43.0,7893.0,2.0,5426.0,10.0,182.0,264.0,251.0,1451.0,308.0
4,2001,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,122.0,2344.0,0.0,1684.0,2333.0,20.0,4.0,...,58.0,7358.0,2.0,5468.0,33.0,195.0,264.0,317.0,1486.0,251.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1373,2010,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.0,5.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1374,2011,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.0,3.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
1375,2012,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.0,6.0,0.0,0.0,3.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1376,2013,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.0,1.0,0.0,0.0,4.0,0.0,0.0,...,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
df_name

Unnamed: 0,year,county,geo,arson,assault_aggravated,assault_simple,bribery,burglary,credit_card_machine_fraud,drug_equipment,...,shoplifting,sodomy,stolen_property,theft_from_building,theft_from_vehicle,theft_from_vending_machine,vehicle_part_theft,vehicle_theft,weapon_law,wire_fraud
0,1997,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,86.0,602.0,1954.0,2.0,1931.0,44.0,725.0,...,1323.0,3.0,245.0,1232.0,1733.0,53.0,1293.0,1317.0,274.0,0.0
1,1998,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,97.0,735.0,2202.0,1.0,2444.0,120.0,628.0,...,1813.0,3.0,272.0,1215.0,2203.0,38.0,1397.0,1575.0,334.0,1.0
2,1999,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,97.0,584.0,1868.0,1.0,2070.0,104.0,711.0,...,1641.0,2.0,276.0,1190.0,2115.0,57.0,1480.0,1438.0,336.0,2.0
3,2000,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,82.0,618.0,1591.0,1.0,1752.0,123.0,901.0,...,1264.0,0.0,251.0,919.0,1738.0,41.0,1387.0,1451.0,308.0,1.0
4,2001,ADAMS,MULTIPOLYGON (((-103.70574149517748 39.9999110...,122.0,634.0,1616.0,0.0,1684.0,94.0,1079.0,...,1030.0,3.0,317.0,654.0,1725.0,29.0,1714.0,1486.0,251.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1373,2010,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.0,0.0,4.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1374,2011,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.0,0.0,3.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1375,2012,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.0,3.0,3.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
1376,2013,KIOWA,MULTIPOLYGON (((-102.11017149074574 38.2683561...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
