In [54]:
import json
import pandas as pd
import numpy as np
import requests
from sodapy import Socrata
from api_keys import MyAppToken

import dtale

from datetime import datetime, date, timedelta
import time

# Set to True if you want "all the info messages"
debug = False;
# print(MyAppToken)

#checking the day of the month and printing the result, this is used to filter the dataframe later
today = date.today()
day = today.strftime("%d")
                     
todaysDate = time.strftime("%m-%d-%Y")
todaysFile = todaysDate + ".csv"
                     
if(debug):
    print("Day =", day);

In [55]:
crime_data = "ijzp-q8t2"
client = Socrata("data.cityofchicago.org", MyAppToken)

In [56]:
def GetData(start_date, end_date):
    max_rows = 2000000;
    where_clause = "Date BETWEEN '" + start_date + "' AND '" + end_date + "'";
    city = "Chicago";
    
    df = pd.DataFrame(
        client.get(
            crime_data, 
            where=where_clause,
            limit=max_rows,
            exclude_system_fields=True
        )
    )
    client.close()

    df["city"] = city;

    df['day'] = pd.DatetimeIndex(df['date']).day
    df['month'] = pd.DatetimeIndex(df['date']).month
    df['year'] = pd.DatetimeIndex(df['date']).year
    df['date'] = pd.to_datetime(df['date']).dt.strftime('%Y-%m-%d')

    df["primary_type"] = df["primary_type"].str.lower().str.title()
    df["description"] = df["description"].str.lower().str.title()
    df["location"] = df["location_description"].str.lower().str.title()    

    # Organize: 
    dfReturn = df[[
        "city"
        , "primary_type"
        , "description"
        , "date"
        , "day"
        , "month"
        , "year"
        , "location"
        , "latitude"
        , "longitude"
        , "domestic"
    ]]
    
    return dfReturn;

In [75]:
df2020 = GetData('2020-01-01', '2020-04-07');
df2019 = GetData('2019-01-01', '2019-04-07');

In [77]:
df2019.head()

Unnamed: 0,city,primary_type,description,date,day,month,year,location,latitude,longitude,domestic
0,Chicago,Sex Offense,Aggravated Criminal Sexual Abuse,2019-01-01,1,1,2019,Residence,41.907072136,-87.731331357,False
1,Chicago,Offense Involving Children,Aggravated Criminal Sexual Abuse By Family Member,2019-01-01,1,1,2019,Apartment,41.853079711,-87.676856787,False
2,Chicago,Offense Involving Children,Child Abuse,2019-01-01,1,1,2019,Residence,41.875288057,-87.723999683,True
3,Chicago,Deceptive Practice,Financial Identity Theft Over $ 300,2019-01-01,1,1,2019,Apartment,41.929013312,-87.799805351,False
4,Chicago,Sex Offense,Agg Criminal Sexual Abuse,2019-01-01,1,1,2019,Apartment,41.896591951,-87.692793096,True


In [78]:
print(df2019.shape)
print(df2020.shape)

(62515, 11)
(55650, 11)


In [79]:
final = df2020.copy();
final = final.append(df2019);
print(final.shape)
final = final.sort_values(["date"])

(118165, 11)


In [80]:
final.sample(5)

Unnamed: 0,city,primary_type,description,date,day,month,year,location,latitude,longitude,domestic
17902,Chicago,Theft,$500 And Under,2019-01-28,28,1,2019,Cha Apartment,41.864278508,-87.659707941,False
8222,Chicago,Criminal Damage,To Vehicle,2020-01-13,13,1,2020,Street,41.731915135,-87.637421943,False
8865,Chicago,Narcotics,Solicit Narcotics On Publicway,2020-01-14,14,1,2020,Street,41.874936466,-87.748211202,False
38826,Chicago,Homicide,First Degree Murder,2020-03-03,3,3,2020,House,41.782573361,-87.669143476,False
21535,Chicago,Assault,Aggravated Po: Other Dang Weap,2020-02-03,3,2,2020,Street,41.773109884,-87.586294711,False


In [81]:
domestic_crimes = final['domestic'] == True
non_domestic_crimes = final['domestic'] == False
domestic_crimes_df = final[domestic_crimes]
non_domestic_crimes_df = final[non_domestic_crimes]

domestic_crimes_df.head()

Unnamed: 0,city,primary_type,description,date,day,month,year,location,latitude,longitude,domestic
669,Chicago,Assault,Aggravated - Handgun,2019-01-01,1,1,2019,Street,41.862528811,-87.703929853,True
677,Chicago,Battery,Domestic Battery Simple,2019-01-01,1,1,2019,Street,41.785468964,-87.724176235,True
683,Chicago,Battery,Domestic Battery Simple,2019-01-01,1,1,2019,Residence,41.881955638,-87.699986249,True
686,Chicago,Other Offense,Telephone Threat,2019-01-01,1,1,2019,Apartment,41.770565763,-87.585004532,True
692,Chicago,Theft,$500 And Under,2019-01-01,1,1,2019,Apartment,41.767869148,-87.587271934,True


In [82]:
#making dictionaries for mongoDB
non_domestic_chicago_data = non_domestic_crimes_df.to_dict()
domestic_chicago_data = domestic_crimes_df.to_dict()  

# print(non_domestic_chicago_data)
# print(domestic_chicago_data)

In [83]:
final.head()

Unnamed: 0,city,primary_type,description,date,day,month,year,location,latitude,longitude,domestic
666,Chicago,Weapons Violation,Unlawful Poss Of Handgun,2019-01-01,1,1,2019,Gas Station,41.793924612,-87.673580917,False
667,Chicago,Criminal Damage,To Vehicle,2019-01-01,1,1,2019,Auto / Boat / Rv Dealership,41.744337567,-87.652359252,False
668,Chicago,Theft,$500 And Under,2019-01-01,1,1,2019,Gas Station,41.8020092,-87.622063199,False
669,Chicago,Assault,Aggravated - Handgun,2019-01-01,1,1,2019,Street,41.862528811,-87.703929853,True
670,Chicago,Burglary,Forcible Entry,2019-01-01,1,1,2019,Residence,41.794352937,-87.734224585,False
