In [7]:
from IPython.display import Image
from IPython.core.display import HTML 
Image(url= "https://heartspringhealth.com/wp-content/uploads/2014/05/cars-c.jpg", width=1000, height=250)

The Motor Vehicle Collisions crash table contains details on the crash event. Each row represents a crash event. The Motor Vehicle Collisions data tables contain information from all police reported motor vehicle collisions in NYC. The police report (MV104-AN) is required to be filled out for collisions where someone is injured or killed, or where there is at least $1000 worth of damage

Below are the steps to perform our ETL processes:
1. Extract data daily from NYC Open Data (https://opendata.cityofnewyork.us/) via Socrata API
2. For our initial load, we will only be using data from 2021 - now to populate our table
3. For our daily load, we will perform upsert functions against our data to only inserting newer data to the table
4. Data will be stored in the Amazon S3 bucket
5. We then proceed to perform necessary transformations then load to our Data Warehouse in AWS SQL Server 

In [9]:
#Install required libraries
!pip install sodapy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting sodapy
  Downloading sodapy-2.1.1-py2.py3-none-any.whl (14 kB)
Installing collected packages: sodapy
Successfully installed sodapy-2.1.1


In [10]:
#Import required Libraries 
import pandas as pd 
from sodapy import Socrata 

#**Data Extraction**#

In [35]:
client = Socrata("data.cityofnewyork.us", None)

# First 2000 results, returned as JSON from API / converted to Python list of
# dictionaries by sodapy.
results = client.get("h9gi-nx95", order='crash_date DESC', limit=1000)

# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)



In [36]:
results_df.head()

Unnamed: 0,crash_date,crash_time,borough,zip_code,latitude,longitude,location,cross_street_name,number_of_persons_injured,number_of_persons_killed,...,vehicle_type_code1,vehicle_type_code2,contributing_factor_vehicle_3,contributing_factor_vehicle_4,vehicle_type_code_3,vehicle_type_code_4,on_street_name,off_street_name,contributing_factor_vehicle_5,vehicle_type_code_5
0,2022-07-12T00:00:00.000,18:53,BROOKLYN,11230.0,40.6305,-73.96631,"{'latitude': '40.6305', 'longitude': '-73.9663...",1115 CONEY ISLAND AVENUE,0,0,...,Station Wagon/Sport Utility Vehicle,Sedan,,,,,,,,
1,2022-07-12T00:00:00.000,15:00,BRONX,10472.0,40.831768,-73.86671,"{'latitude': '40.831768', 'longitude': '-73.86...",1792 WESTCHESTER AVENUE,2,0,...,Sedan,Sedan,Unspecified,Unspecified,Sedan,Sedan,,,,
2,2022-07-12T00:00:00.000,2:00,STATEN ISLAND,10308.0,40.549232,-74.15342,"{'latitude': '40.549232', 'longitude': '-74.15...",4111 AMBOY ROAD,0,0,...,Station Wagon/Sport Utility Vehicle,,,,,,,,,
3,2022-07-12T00:00:00.000,11:00,MANHATTAN,10033.0,40.846138,-73.93424,"{'latitude': '40.846138', 'longitude': '-73.93...",,3,0,...,Taxi,Taxi,Unspecified,Unspecified,Station Wagon/Sport Utility Vehicle,Station Wagon/Sport Utility Vehicle,AUDUBON AVENUE,WEST 177 STREET,,
4,2022-07-12T00:00:00.000,16:30,,,,,,,2,0,...,Bike,E-Bike,,,,,WEST 155 STREET,HENRY HUDSON PARKWAY,,


In [18]:
results_df.sort_values('crash_date', ascending=False)

Unnamed: 0,crash_date,crash_time,on_street_name,number_of_persons_injured,number_of_persons_killed,number_of_pedestrians_injured,number_of_pedestrians_killed,number_of_cyclist_injured,number_of_cyclist_killed,number_of_motorist_injured,...,longitude,location,off_street_name,contributing_factor_vehicle_3,vehicle_type_code_3,cross_street_name,contributing_factor_vehicle_4,vehicle_type_code_4,contributing_factor_vehicle_5,vehicle_type_code_5
1396,2022-07-01T00:00:00.000,9:15,,0,0,0,0,0,0,0,...,-73.84675,"{'latitude': '40.89911', 'longitude': '-73.846...",,,,1955 NEREID AVENUE,,,,
981,2022-07-01T00:00:00.000,15:00,WHITESTONE EXPRESSWAY,3,0,0,0,0,0,3,...,-73.824234,"{'latitude': '40.784996', 'longitude': '-73.82...",,,,,,,,
1719,2022-07-01T00:00:00.000,9:08,CHURCH STREET,0,0,0,0,0,0,0,...,-74.006256,"{'latitude': '40.717075', 'longitude': '-74.00...",WORTH STREET,,,,,,,
965,2022-07-01T00:00:00.000,14:45,43 STREET,0,0,0,0,0,0,0,...,-73.920135,"{'latitude': '40.750244', 'longitude': '-73.92...",37 AVENUE,,,,,,,
1700,2022-07-01T00:00:00.000,0:00,HOYT STREET,0,0,0,0,0,0,0,...,-73.99374,"{'latitude': '40.67784', 'longitude': '-73.993...",3 STREET,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
431,2020-04-15T00:00:00.000,15:20,GOWANUS EXPY (BQE),0,0,0,0,0,0,0,...,-73.99843,"{'latitude': '40.671585', 'longitude': '-73.99...",,,,,,,,
21,2020-01-21T00:00:00.000,15:49,BRUCKNER BLVD,0,0,0,0,0,0,0,...,,,�ST 138 STREET,,,,,,,
20,2019-05-21T00:00:00.000,22:50,GOLD STREET,0,0,0,0,0,0,0,...,-73.9831200,"{'latitude': '40.69754', 'longitude': '-73.983...",CONCORD STREET,,,,,,,
1061,2019-04-17T00:00:00.000,0:49,BELT PARKWAY,3,0,0,0,0,0,3,...,-73.86542,"{'latitude': '40.651974', 'longitude': '-73.86...",,,,,,,,
