In [None]:
# Dependencies and Setup
import pandas as pd
from zipfile import ZipFile, ZIP_DEFLATED
import os

# Dataset is too large to host on github (100MB limit), 
# download from link below and place zipped file in Resources folder to recreate these steps
# https://www.kaggle.com/sobhanmoosavi/us-accidents

# get current directory to use absolute file paths
current_directory = os.getcwd() 

# path to zip
zip_path = os.path.join(current_directory,"Resources","199387_1319582_bundle_archive.zip")

# file we care about in the zip
file_name = "US_Accidents_June20.csv"

# output csv path
destination = os.path.join(current_directory, "Resources", "US_Accidents_Cleaned.csv")
# output zip path
destination_zip = os.path.join("Resources", "US_Accidents.zip")


In [None]:
# access zip file
with ZipFile(zip_path) as zin:
    # read csv
    accidents_df = pd.read_csv(zin.open(file_name))

drop_columns = ['Source', 'TMC', 'End_Time', 'End_Lat', 'End_Lng', 'Distance(mi)', 
                'Description', 'Number', 'Street', 'Side', 'Country', 'Timezone', 
                'Airport_Code', 'Weather_Timestamp', 'Temperature(F)', 'Wind_Chill(F)', 
                'Humidity(%)', 'Pressure(in)', 'Visibility(mi)', 'Wind_Direction', 
                'Wind_Speed(mph)', 'Precipitation(in)', 'Weather_Condition', 'Amenity', 
                'Bump', 'Crossing', 'Give_Way', 'Junction', 'No_Exit', 'Railway', 
                'Roundabout', 'Station', 'Stop', 'Traffic_Calming', 'Traffic_Signal', 
                'Turning_Loop', 'Civil_Twilight', 'Nautical_Twilight', 'Astronomical_Twilight']
accidents_df = accidents_df.drop(columns=drop_columns)

accidents_df.head()

In [None]:
# change type of Start_Time to datetime
accidents_df.Start_Time = pd.to_datetime(accidents_df.Start_Time)

# Remove dates after the end of January 2020
cutoff_date = pd.to_datetime('2020-02-01 00:00:00')
accidents_df = accidents_df.drop(accidents_df[accidents_df.Start_Time >= cutoff_date].index)

In [None]:
# write csv to Resources
accidents_df.to_csv(destination, index=False)

In [None]:
# Zip up csv
with ZipFile(destination_zip, "w", ZIP_DEFLATED) as zout:
    # write 
    zout.write(destination, arcname=os.path.split(destination)[1])

In [None]:
# Delete unzipped csv
os.remove(destination)