# Data Analysis and Visualization

In [None]:
# ((Codeblock One))

import pandas as pd
import folium
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# ((Codeblock Two))

df = pd.read_csv("/Users/Administrator/Documents/clean_nyc_collisions.csv")

In [None]:
# ((Codeblock Three))

# Total number of injuires and injuries by category
plt.figure(figsize=(20, 25)).subplots_adjust(hspace = 0.4)

# Total number of PERSONS injured
plt.subplot(4, 2 ,1)
df.groupby('year').total_injured.sum().plot.bar()
plt.title('Total number of PERSONS INJURED', fontsize=16)
plt.xlabel('Year', fontsize=13)

# Total number of MOTORISTS injured
plt.subplot(4, 2, 2)
df.groupby('year').moto_injured.sum().plot.bar()
plt.title('Total number of MOTORISTS INJURED', fontsize=16)
plt.xlabel('Year', fontsize=13)

# Total number of CYCLISTS injured
plt.subplot(4, 2 ,3)
df.groupby('year').cyc_injured.sum().plot.bar()
plt.title('Total number of CYCLISTS INJURED', fontsize=16)
plt.xlabel('Year', fontsize=13)

# Total number of PEDESTRIANS injured
plt.subplot(4, 2, 4)
df.groupby('year').ped_injured.sum().plot.bar()
plt.title('Total number of PEDESTRIANS INJURED', fontsize=16)
plt.xlabel('Year', fontsize=13)

plt.show()

In [None]:
# ((Codeblock Four))

# Total number of fatalities and fatalities by category
plt.figure(figsize=(20, 25)).subplots_adjust(hspace = 0.4)

# Total number of fatalities
plt.subplot(4, 2 ,1)
df.groupby('year').total_fatality.sum().plot.bar()
plt.title('Total number of fatalities', fontsize=16)
plt.xlabel('Year', fontsize=13)

# TTotal number of MOTORIST fatalities
plt.subplot(4, 2, 2)
df.groupby('year').moto_fatality.sum().plot.bar()
plt.title('Total number of MOTORIST fatalities', fontsize=16)
plt.xlabel('Year', fontsize=13)

# Total number of CYCLIST fatalities
plt.subplot(4, 2 ,3)
df.groupby('year').cyc_fatality.sum().plot.bar()
plt.title('Total number of CYCLIST fatalities', fontsize=16)
plt.xlabel('Year', fontsize=13)

# Total number of PEDESTRIAN fatalities
plt.subplot(4, 2, 4)
df.groupby('year').ped_fatality.sum().plot.bar()
plt.title('Total number of PEDESTRIAN fatalities', fontsize=16)
plt.xlabel('Year', fontsize=13)

plt.show()

In [None]:
# ((Codeblock Five))

# Total number of fatalities per borough

fig, ax = plt.subplots(1, figsize=(25, 15))

plt.subplot(2, 2 ,1)
df.groupby('borough').total_fatality.sum().sort_values(ascending=False).plot.bar()
plt.title('Number of people injured per borough', fontsize=18)
plt.xlabel('Borough,   *NYC = unknown location incidents', fontsize=14)

plt.subplot(2, 2 ,2)
df.groupby('borough').total_fatality.sum().sort_values(ascending=False).plot.bar()
plt.title('Number of fatalities per borough', fontsize=18)
plt.xlabel('Borough,   *NYC = unknown location incidents', fontsize=14)

plt.show()

In [None]:
# ((Codeblock Six))

# Total number of injured and fatalities per quarter
fig, ax = plt.subplots(1, figsize=(25, 15))

plt.subplot(2, 2 ,1)
df.groupby('quarter').total_injured.sum().plot.bar()
plt.title('Total number of PERSONS injured', fontsize=18)
plt.xlabel('Quarter', fontsize=14)

plt.subplot(2, 2 ,2)
df.groupby('quarter').total_fatality.sum().plot.bar()
plt.title('Total number of fatalities', fontsize=18)
plt.xlabel('Quarter', fontsize=14)

plt.show()

# Geographic Visualization

In [None]:
# ((Codeblock Seven))

# Filter out rows with 'EMPTY' latitude or longitude, and sample 1000 rows
df = df[(df['latitude'] != 'EMPTY') & (df['longitude'] != 'EMPTY')]
df = df.sample(1000)

In [None]:
# ((Codeblock Eight))

# Create map object centered on New York City
map = folium.Map(location=[40.7128, -74.0060], zoom_start=10)

In [None]:
# ((Codeblock Nine))

# Define the boroughs and their colors
boroughs = {'manhattan': 'red',
            'brooklyn': 'blue',
            'queens': 'green',
            'bronx': 'orange',
            'staten island': 'purple',
            'nyc': 'black'}

In [None]:
# ((Codeblock Ten))

# Add a marker for each collision using the borough and latitude and longitude columns
for index, row in df.iterrows():
    borough = row['borough'].lower()
    if borough not in boroughs:
        continue
    popup_text = f"Date: {row['date']}<br>Time: {row['time']}<br>Location: {row['location']}"
    marker = folium.Marker(
        [float(row['latitude']), float(row['longitude'])],
        popup=popup_text,
        icon=folium.Icon(color=boroughs[borough])
    )
    marker.add_to(map)

In [None]:
# ((Codeblock Eleven))

# Add a layer with the borough boundaries
folium.GeoJson('https://data.cityofnewyork.us/api/geospatial/tqmj-j8zm?method=export&format=GeoJSON', name='boroughs').add_to(map)

In [None]:
# ((Codeblock Twelve))

# Add a layer control to toggle the borough boundaries on and off
folium.LayerControl().add_to(map)

In [None]:
# ((Codeblock Thirteen))

# Display the map in the Jupyter Notebook
map