In [1]:
# Import Dependencies
import csv
import json
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from config import db_password

In [2]:
# Load in the data set from local Resources folder
file_path = "Resources/Austin_Crime_Report_Occurance_Reported_2017_to_2022_cleaned.csv"
crime_df = pd.read_csv(file_path, low_memory=False)
crime_df.head()

Unnamed: 0,Incident Number,Highest Offense Description,Highest Offense Code,Family Violence,Occurred Date Time,Occurred Date,Occurred Time,Report Date Time,Report Date,Report Time,...,Census Tract,Clearance Status,Clearance Date,UCR Category,Category Description,X-coordinate,Y-coordinate,Latitude,Longitude,Location
0,2017471291,THEFT,600,N,02/16/2017 06:00:00 PM,02/16/2017,1800,02/16/2017 06:22:00 PM,02/16/2017,1822,...,21.0,N,03/29/2017,23H,Theft,3128234.0,3128234.0,30.274788,-97.698514,"(30.27478825, -97.69851396)"
1,20172171183,THEFT BY SHOPLIFTING,607,N,08/05/2017 06:00:00 PM,08/05/2017,1800,08/05/2017 06:00:00 PM,08/05/2017,1800,...,19.1,C,08/05/2017,23C,Theft,3094135.0,3094135.0,30.257886,-97.807007,"(30.25788603, -97.80700704)"
2,20205047456,THEFT BY SHOPLIFTING,607,N,11/20/2020 01:30:00 PM,11/20/2020,1330,11/20/2020 03:22:00 PM,11/20/2020,1522,...,24.37,N,11/23/2020,23C,Theft,3099306.0,3099306.0,30.162911,-97.793123,"(30.16291061, -97.79312325)"
3,20191441181,AUTO THEFT,700,N,05/23/2019 08:00:00 PM,05/23/2019,2000,05/24/2019 01:43:00 PM,05/24/2019,1343,...,6.0,N,07/02/2019,240,Auto Theft,3112186.0,3112186.0,30.284284,-97.749112,"(30.28428417, -97.74911194)"
4,20175050403,BURGLARY OF VEHICLE,601,N,12/19/2017 10:00:00 PM,12/19/2017,2200,12/20/2017 06:26:00 AM,12/20/2017,626,...,23.13,N,12/21/2017,23F,Theft,3118326.0,3118326.0,30.228133,-97.731168,"(30.22813316, -97.73116767)"


In [3]:
# Check that all rows have been imported to the DataFrame
crime_df.count()

Incident Number                200912
Highest Offense Description    200912
Highest Offense Code           200912
Family Violence                200912
Occurred Date Time             200912
Occurred Date                  200912
Occurred Time                  200912
Report Date Time               200912
Report Date                    200912
Report Time                    200912
Location Type                  200912
Address                        200912
Zip Code                       200912
Council District               200912
APD Sector                     200912
APD District                   200912
PRA                            200912
Census Tract                   200912
Clearance Status               200912
Clearance Date                 200912
UCR Category                   200912
Category Description           200912
X-coordinate                   200912
Y-coordinate                   200912
Latitude                       200912
Longitude                      200912
Location    

In [4]:
# Print list of columns 
crime_df.columns

Index(['Incident Number', 'Highest Offense Description',
       'Highest Offense Code', 'Family Violence', 'Occurred Date Time',
       'Occurred Date', 'Occurred Time', 'Report Date Time', 'Report Date',
       'Report Time', 'Location Type', 'Address', 'Zip Code',
       'Council District', 'APD Sector', 'APD District', 'PRA', 'Census Tract',
       'Clearance Status', 'Clearance Date', 'UCR Category',
       'Category Description', 'X-coordinate', 'Y-coordinate', 'Latitude',
       'Longitude', 'Location'],
      dtype='object')

In [5]:
# Drop unnecessary or redundant columns
crime_df = crime_df.drop(columns=["Highest Offense Description", "Occurred Date Time", "Report Date Time", "Address", "Category Description", "X-coordinate", "Y-coordinate", "Location"])

In [6]:
# Print updated list of columns 
crime_df.columns

Index(['Incident Number', 'Highest Offense Code', 'Family Violence',
       'Occurred Date', 'Occurred Time', 'Report Date', 'Report Time',
       'Location Type', 'Zip Code', 'Council District', 'APD Sector',
       'APD District', 'PRA', 'Census Tract', 'Clearance Status',
       'Clearance Date', 'UCR Category', 'Latitude', 'Longitude'],
      dtype='object')

In [7]:
# Check to make sure it's loaded in correctly
crime_df.sample(5)

Unnamed: 0,Incident Number,Highest Offense Code,Family Violence,Occurred Date,Occurred Time,Report Date,Report Time,Location Type,Zip Code,Council District,APD Sector,APD District,PRA,Census Tract,Clearance Status,Clearance Date,UCR Category,Latitude,Longitude
196659,20185042919,601,N,10/25/2018,2030,10/26/2018,1109,RESIDENCE / HOME,78753.0,1.0,ED,7,223,438.0,N,11/23/2018,23F,30.387903,-97.661687
177870,20205036614,607,N,09/13/2020,1642,09/14/2020,1308,DEPARTMENT / DISCOUNT STORE,78723.0,4.0,ID,5,292,21.0,N,09/15/2020,23C,30.316393,-97.706147
105693,20202830089,500,N,10/09/2020,200,10/09/2020,200,RESIDENCE / HOME,78723.0,1.0,CH,4,324,21.0,N,10/11/2020,220,30.306427,-97.669436
173217,20182551286,502,N,09/12/2018,1911,09/12/2018,1911,RENTAL STORAGE FACILITY,78701.0,9.0,GE,4,404,11.0,N,09/27/2018,220,30.266274,-97.748257
18381,20213280061,700,N,11/24/2021,212,11/24/2021,229,RESIDENCE / HOME,78704.0,9.0,DA,1,452,13.12,N,11/30/2021,240,30.251726,-97.759389


In [8]:
# Save and export the cleaned data as csv file
file_path = "Resources/Austin_Crime_Report_Occurance_Reported_2017_to_2022_dropped_columns.csv"
crime_df.to_csv(file_path, index=False)

# Merge "Crime Table" To PostgreSQL 

In [9]:
# Create a connection string
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/crime_data"

In [10]:
# Create database engine
engine = create_engine(db_string)

In [11]:
# Save the DataFrame to SQL table
crime_df.to_sql(name='crime', con=engine)

912

# Loading in dataset with DROPPED COLUMNS to create a new dataset without Family Violence column

In [12]:
# Load in the data set from local Resources folder
file_path = "Resources/Austin_Crime_Report_Occurance_Reported_2017_to_2022_dropped_columns.csv"
crime_df = pd.read_csv(file_path, low_memory=False)
crime_df.head()

Unnamed: 0,Incident Number,Highest Offense Code,Family Violence,Occurred Date,Occurred Time,Report Date,Report Time,Location Type,Zip Code,Council District,APD Sector,APD District,PRA,Census Tract,Clearance Status,Clearance Date,UCR Category,Latitude,Longitude
0,2017471291,600,N,02/16/2017,1800,02/16/2017,1822,PARKING /DROP LOT/ GARAGE,78721.0,1.0,CH,4,424,21.0,N,03/29/2017,23H,30.274788,-97.698514
1,20172171183,607,N,08/05/2017,1800,08/05/2017,1800,SHOPPING MALL,78746.0,8.0,DA,5,462,19.1,C,08/05/2017,23C,30.257886,-97.807007
2,20205047456,607,N,11/20/2020,1330,11/20/2020,1522,DEPARTMENT / DISCOUNT STORE,78748.0,5.0,FR,2,666,24.37,N,11/23/2020,23C,30.162911,-97.793123
3,20191441181,700,N,05/23/2019,2000,05/24/2019,1343,PARKING /DROP LOT/ GARAGE,78705.0,9.0,BA,2,358,6.0,N,07/02/2019,240,30.284284,-97.749112
4,20175050403,601,N,12/19/2017,2200,12/20/2017,626,PARKING /DROP LOT/ GARAGE,78741.0,3.0,HE,2,483,23.13,N,12/21/2017,23F,30.228133,-97.731168


In [13]:
# Drop unnecessary or redundant columns
crime_df = crime_df.drop(columns=["Family Violence"])

In [14]:
# Print updated list of columns 
crime_df.columns

Index(['Incident Number', 'Highest Offense Code', 'Occurred Date',
       'Occurred Time', 'Report Date', 'Report Time', 'Location Type',
       'Zip Code', 'Council District', 'APD Sector', 'APD District', 'PRA',
       'Census Tract', 'Clearance Status', 'Clearance Date', 'UCR Category',
       'Latitude', 'Longitude'],
      dtype='object')

In [15]:
# Check to make sure it's loaded in correctly
crime_df.sample(5)

Unnamed: 0,Incident Number,Highest Offense Code,Occurred Date,Occurred Time,Report Date,Report Time,Location Type,Zip Code,Council District,APD Sector,APD District,PRA,Census Tract,Clearance Status,Clearance Date,UCR Category,Latitude,Longitude
99634,20175011270,601,03/18/2017,1300,03/18/2017,1611,PARKING /DROP LOT/ GARAGE,78753.0,7.0,ED,4,205,412.0,N,03/21/2017,23F,30.402954,-97.675348
153383,20205047724,601,11/22/2020,100,11/23/2020,1025,AUTO DEALERSHIP NEW / USED,78751.0,9.0,BA,5,321,3.0,N,11/24/2020,23F,30.304357,-97.714069
32210,20201910180,700,07/08/2020,1800,07/09/2020,507,RESIDENCE / HOME,78617.0,2.0,HE,7,778,24.53,N,07/09/2020,240,30.168335,-97.612632
36965,20201750947,700,06/23/2020,1532,06/23/2020,1649,AUTO DEALERSHIP NEW / USED,78721.0,1.0,CH,4,425,21.11,C,06/29/2020,240,30.269098,-97.696261
186478,20205040716,600,10/08/2020,1300,10/09/2020,1547,RESIDENCE / HOME,78731.0,10.0,BA,6,326,1.0,N,10/12/2020,23H,30.308591,-97.753441


In [16]:
# Save and export the cleaned data as csv file
file_path = "Resources/Austin_Crime_Report_Occurance_Reported_2017_to_2022_dropped_family_violence.csv"
crime_df.to_csv(file_path, index=False)

# Merge "Crime Reports Table" To PostgreSQL

In [17]:
# Create a connection string
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/crime_data"

In [18]:
# Create database engine
engine = create_engine(db_string)

In [19]:
# Save the DataFrame to SQL table
crime_df.to_sql(name='crime reports', con=engine)

912

# Loading in dataset with DROPPED COLUMNS to create new dataset with only Family Violence and Zip Code column

In [20]:
# Load in the data set from local Resources folder
file_path = "Resources/Austin_Crime_Report_Occurance_Reported_2017_to_2022_dropped_columns.csv"
crime_df = pd.read_csv(file_path, low_memory=False)
crime_df.head()

Unnamed: 0,Incident Number,Highest Offense Code,Family Violence,Occurred Date,Occurred Time,Report Date,Report Time,Location Type,Zip Code,Council District,APD Sector,APD District,PRA,Census Tract,Clearance Status,Clearance Date,UCR Category,Latitude,Longitude
0,2017471291,600,N,02/16/2017,1800,02/16/2017,1822,PARKING /DROP LOT/ GARAGE,78721.0,1.0,CH,4,424,21.0,N,03/29/2017,23H,30.274788,-97.698514
1,20172171183,607,N,08/05/2017,1800,08/05/2017,1800,SHOPPING MALL,78746.0,8.0,DA,5,462,19.1,C,08/05/2017,23C,30.257886,-97.807007
2,20205047456,607,N,11/20/2020,1330,11/20/2020,1522,DEPARTMENT / DISCOUNT STORE,78748.0,5.0,FR,2,666,24.37,N,11/23/2020,23C,30.162911,-97.793123
3,20191441181,700,N,05/23/2019,2000,05/24/2019,1343,PARKING /DROP LOT/ GARAGE,78705.0,9.0,BA,2,358,6.0,N,07/02/2019,240,30.284284,-97.749112
4,20175050403,601,N,12/19/2017,2200,12/20/2017,626,PARKING /DROP LOT/ GARAGE,78741.0,3.0,HE,2,483,23.13,N,12/21/2017,23F,30.228133,-97.731168


In [21]:
# Drop unnecessary or redundant columns
crime_df = crime_df.drop(columns=["Incident Number", "Highest Offense Code", "Occurred Date", "Occurred Time", "Report Date", "Report Time", "Location Type", "Council District", "APD Sector", "APD District", "PRA", "Census Tract", "Clearance Status", "Clearance Date", "UCR Category", "Latitude", "Longitude"])

In [22]:
# Print updated list of columns 
crime_df.columns

Index(['Family Violence', 'Zip Code'], dtype='object')

In [23]:
# Check to make sure it's loaded in correctly
crime_df.sample(5)

Unnamed: 0,Family Violence,Zip Code
17169,N,78729.0
134416,N,78744.0
14756,N,78717.0
50765,N,78704.0
135296,N,78753.0


In [24]:
# Save and export the cleaned data as csv file
file_path = "Resources/Austin_Crime_Report_Occurance_Reported_2017_to_2022_only_family_violence.csv"
crime_df.to_csv(file_path, index=False)

# Merge "Family Violence Table" To PostgreSQL

In [25]:
# Create a connection string
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/crime_data"

In [26]:
# Create database engine
engine = create_engine(db_string)

In [27]:
# Save the DataFrame to SQL table
crime_df.to_sql(name='family violence', con=engine)

912