In [None]:
# PURPOSE OF THIS CELL: clean raw data and set the dataframe to the csv of cleaned data
import pandas as pd

# TIP: make sure the Car Accidents data file is called "data.csv" locally
df = pd.read_csv('raw-data.csv')

# Reformatting the start and end times to allow for filtering
df['Start_Time'] = df['Start_Time'].str.split('.').str[0]
df['End_Time']   = df['End_Time'].str.split('.').str[0]

df['Start_Time'] = pd.to_datetime(df['Start_Time'])
df['End_Time'] = pd.to_datetime(df['End_Time'])

# Filtering the data to focus on accidents during specific years
df = df[df['Start_Time'].dt.year >= 2023]
df = df[df['End_Time'].dt.year >= 2023]

# Filter to focus on California
df = df[df["State"] == "FL"]

# Removes all records with an empty cell
new_df = df.dropna() 

# Saves the cleaned data to its own csv file
new_df.to_csv('clean-data.csv', index=False)

print('num rows after cleaning: ', len(new_df))

# Setting the data frame to the cleaned data
df = pd.read_csv('clean-data.csv')

num rows after cleaning:  21209


In [24]:
df["State"].unique()

array(['FL'], dtype=object)

In [27]:
# Converting starting lat and lng to an array
# Source: https://stackoverflow.com/questions/31789160/convert-select-columns-in-pandas-dataframe-to-numpy-array
latlng = df[['Start_Lat', 'Start_Lng']]
array = latlng.to_numpy()
print('have all values?', len(array))

have all values? 21209


In [28]:
import json
geojson = {
    "type": "FeatureCollection",
    "features": []
}

for lat, lng in array:
    feature = {
        "type": "Feature",
        "geometry": {
            "type": "Point",
            "coordinates": [lng, lat]
        },
        "properties": {}
    }
    geojson["features"].append(feature)

with open("data.json", "w") as f:
    json.dump(geojson, f, indent=2)

print('the length of array', len(array), 'should match length of features', len(geojson["features"]))

#https://stackoverflow.com/questions/12309269/how-do-i-write-json-data-to-a-file

the length of array 21209 should match length of features 21209
