In [None]:
import pandas as pd
import os
from sqlalchemy import create_engine
from datetime import date, timedelta
from dotenv import load_dotenv, find_dotenv
from sqlalchemy_utils import database_exists, create_database

### Extract CSVs into DataFrames

In [None]:
crime_file = "./Resources/2019_NYPD_Complaint_Data.csv"
crime_df = pd.read_csv(crime_file)
crime_df.head()

### Transform 2019 Crime DataFrame

In [None]:
# Create a filtered dataframe from specific columns
crime_cols = ["CMPLNT_NUM", "BORO_NM", "CMPLNT_FR_DT", "CMPLNT_TO_DT", "CRM_ATPT_CPTD_CD", "KY_CD", "LAW_CAT_CD",
                "OFNS_DESC", "SUSP_AGE_GROUP", "SUSP_RACE", "SUSP_SEX", "VIC_AGE_GROUP", 
                "VIC_RACE", "VIC_SEX", "Latitude", "Longitude"]

crime_transformed = crime_df[crime_cols].copy()

# Rename the column headers
crime_transformed = crime_transformed.rename(columns={"CMPLNT_NUM": "Complaint_ID", 
                                                      "BORO_NM": "Boro_Name", 
                                                      "CMPLNT_FR_DT":"Start_Date", 
                                                      "CMPLNT_TO_DT":"End_Date", 
                                                      "CRM_ATPT_CPTD_CD":"Complaint_Status", 
                                                      "KY_CD":"Complaint_Code", 
                                                      "LAW_CAT_CD": "Complaint_Cat",
                                                      "OFNS_DESC":"Complaint_Desc", 
                                                      "SUSP_AGE_GROUP":"Suspect_Age", 
                                                      "SUSP_RACE":"Suspect_Race", 
                                                      "SUSP_SEX": "Suspect_Gender", 
                                                      "VIC_AGE_GROUP":"Victim_Age", 
                                                      "VIC_RACE":"Victim_Race", 
                                                      "VIC_SEX":"Victim_Gender", 
                                                      "Latitude":"Latitude", 
                                                      "Longitude":"Longitude"})

#Convert the Start Date and End Date columns from object to datetime datatypes
crime_transformed["Start_Date"] = pd.to_datetime(crime_transformed["Start_Date"], format="%m/%d/%Y", errors="coerce")
crime_transformed["End_Date"] = pd.to_datetime(crime_transformed["End_Date"], format="%m/%d/%Y", errors="coerce")

#Filter to return where the start date and end date year is equal to 2019
crime_transformed_filter=crime_transformed[(crime_transformed["Start_Date"].dt.year == 2019) & (crime_transformed["End_Date"].dt.year == 2019)]

#Filter to return where the month equals July or December based on the start date
crime_transformed_filter=crime_transformed_filter[(crime_transformed_filter["Start_Date"].dt.month_name() == "July") | (crime_transformed_filter["Start_Date"].dt.month_name() == "December")]

#This will show only the rows that have a NAN value to allow the user the option of dropping them.
crime_transformed_filter_na = crime_transformed_filter[crime_transformed_filter.isna().any(axis=1)]
crime_transformed_filter_na

cols=["Latitude", "Longitude"]
crime_transformed_filter[["Lat3", "Long3"]] = crime_transformed_filter[cols].round(3)
crime_transformed_filter[["Lat2", "Long2"]] = crime_transformed_filter[cols].round(2)
crime_transformed_filter[["Lat1", "Long1"]] = crime_transformed_filter[cols].round(1)

crime_transformed_filter

In [None]:
lat_long = "./Resources/us_zip_lat_long.csv"
lat_long_df = pd.read_csv(lat_long)
lat_long_df.head()

### Transform US Latitude/Longitude Dataframe

In [None]:
lat_long_df[["Lat3", "Long3"]] = lat_long_df[cols].round(3)
lat_long_df[["Lat2", "Long2"]] = lat_long_df[cols].round(2)
lat_long_df[["Lat1", "Long1"]] = lat_long_df[cols].round(1)

lat_long_df.drop(["Timezone", "Daylight savings time flag"],axis=1, inplace=True)

lat_long_NY_df = lat_long_df[(lat_long_df.State.eq("NY"))]

lat_long_NY_df

### Create database connection

In [None]:
#Create connection to postgres and create database nypd_crimes using sqlalchemy
load_dotenv()
my_env_var = os.getenv('DB_CONNECTION_STRING')
engine = create_engine(my_env_var)

connection = engine.connect()
connection.execute("commit")
connection.execute("create database nypd_crimes")

engine = create_engine(f'{my_env_var}/nypd_crimes')

In [None]:
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base
from sqlalchemy import Table, Column, Integer, String, Float, DateTime, Date, MetaData

meta = MetaData()

ny_crimes = Table('crimes', meta,
    Column("Complaint_ID", Integer, primary_key=True),
    Column("Boro_Name", String(255)),
    Column("Start_Date", DateTime),
    Column("End_Date", DateTime),
    Column("Complaint_Status", String(255)),
    Column("Complaint_Code", Integer),
    Column("Complaint_Cat", String(255)),
    Column("Complaint_Desc", String(255)),
    Column("Suspect_Age", String(255)),
    Column("Suspect_Race", String(255)),
    Column("Suspect_Gender", String(255)),
    Column("Victim_Age", String(255)),
    Column("Victim_Race", String(255)),
    Column("Victim_Gender",String(255)),
    Column("Latitude", Float),
    Column("Longitude", Float),
    Column("Lat3", Float),
    Column("Long3", Float),
    Column("Lat2", Float),
    Column("Long2", Float),
    Column("Lat1", Float),
    Column("Long1", Float),)

### Load DataFrames into database

In [None]:
crime_transformed_filter.to_sql(name='crimes', con = engine, if_exists='replace', index=False)

In [None]:
ny_lat_long = Table('lat_long', meta,
                    Column("Zip", Integer, primary_key=True),
                    Column("City", String(255)),
                    Column("State",String(255)),
                    Column("Latitude", Float),
                    Column("Longitude", Float),
                    Column("geopoint", String(255)),
                    Column("Lat3", Float),
                    Column("Long3", Float),
                    Column("Lat2", Float),
                    Column("Long2", Float),
                    Column("Lat1", Float),
                    Column("Long1", Float),)

meta.create_all(engine)

In [None]:
# Confirm tables
engine.table_names()

In [None]:
lat_long_NY_df.to_sql(name='lat_long', con = engine, if_exists='replace', index=False)

In [None]:
joined_data = crimes.join(lat_long, crimes.Latitude == lat_long.Latitude)
stmt = select([crimes]).select_from(joined_data)
result = connection.execute(stmt)
result.fetchall()