In [None]:
import pandas as pd
import numpy as np
import csv

from sqlalchemy import create_engine

import config
password=config.password

In [None]:
# Read the csv file
crime_df = pd.read_csv("assets/data/cdata_raw.csv", encoding='utf-8', parse_dates=['Date'],
                      usecols=['ID','Date','Primary Type','Description','Location Description',
                               'Arrest','District','Year','Latitude','Longitude',
                               'Historical Wards 2003-2015','Zip Codes','Police Districts'],
                      converters={'District': str, 'Historical Wards 2003-2015': str,
                                  'Zip Codes' : str, 'Police Districts': str})

In [None]:
crime_df.rename(columns={"Primary Type": "Primary_Type", "Location Description": "Location_Description",
                       "Historical Wards 2003-2015": "Historical_Wards", "Zip Codes": "Zip_Codes",
                       "Police Districts": "Police_Districts"}, inplace=True)

In [None]:
crime_df.head()

In [None]:
len(crime_df.index)

In [None]:
# Check date range
least_recent_date = crime_df['Date'].min()
recent_date = crime_df['Date'].max()
print(f'Start date: {least_recent_date} and Recent date: {recent_date}')

In [None]:
start_date = '2010-01-01 00:00:00'
end_date = '2019-09-10 23:55:00'

In [None]:
mask = (crime_df['Date'] > start_date) & (crime_df['Date'] <= end_date)

In [None]:
filtered_crime_df = crime_df.loc[mask]
filtered_crime_df.head()

In [None]:
filtered_crime_df.tail()

In [None]:
len(filtered_crime_df.index)

In [None]:
# Unique types of crime
types = filtered_crime_df['Primary_Type'].unique()
print(types)
print(len(types))

In [None]:
# Total number of unique crimes
np.count_nonzero(types)

In [None]:
# Calculate missing values in the columns 
null_columns=filtered_crime_df.columns[filtered_crime_df.isnull().any()]
filtered_crime_df[null_columns].isnull().sum()

In [None]:
# Dropping rows with missing values
new_crime_df = filtered_crime_df.dropna(subset=['Latitude', 'Longitude'])
len(new_crime_df.index)

In [None]:
new_crime_df.head()

In [None]:
# Connect to Postgres database 
engine = create_engine(f'postgresql://postgres:{config.password}@localhost:5432/crime_db')

In [None]:
# Save dataframe into the table
new_crime_df.to_sql('chicago', engine)

In [None]:
with engine.connect() as con:
    con.execute('ALTER TABLE chicago ADD PRIMARY KEY ("ID");')