In [42]:
%matplotlib notebook
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import requests
import json

In [43]:
#***************SAN DIEGO***************

In [44]:

#San Diego County List of fatal crashes by location
url_SD = "https://crashviewer.nhtsa.dot.gov/CrashAPI/crashes/GetCrashesByLocation?fromCaseYear=2014&toCaseYear=2018&state=6&county=73&format=json"
   

In [45]:
#Pull data with API, in json format, and check output
url_SD
response = requests.get(url_SD).json()
response

{'Count': 892,
 'Message': 'Results returned successfully',
 'Results': [[{'CITY': '3260',
    'CITYNAME': 'SAN DIEGO',
    'COUNTY': '73',
    'COUNTYNAME': 'SAN DIEGO (73)',
    'CaseYear': '2014',
    'FATALS': '1',
    'LATITUDE': '32.76605000',
    'LONGITUD': '-117.150869440',
    'STATE': '6',
    'STATENAME': 'California',
    'ST_CASE': '60005',
    'TOTALVEHICLES': '2',
    'TWAY_ID': 'I-8',
    'TWAY_ID2': '',
    'VE_FORMS': '2'},
   {'CITY': '1080',
    'CITYNAME': 'EL CAJON',
    'COUNTY': '73',
    'COUNTYNAME': 'SAN DIEGO (73)',
    'CaseYear': '2014',
    'FATALS': '1',
    'LATITUDE': '32.79522222',
    'LONGITUD': '-116.935727780',
    'STATE': '6',
    'STATENAME': 'California',
    'ST_CASE': '60008',
    'TOTALVEHICLES': '2',
    'TWAY_ID': 'JAMACHA ROAD',
    'TWAY_ID2': '',
    'VE_FORMS': '2'},
   {'CITY': '3260',
    'CITYNAME': 'SAN DIEGO',
    'COUNTY': '73',
    'COUNTYNAME': 'SAN DIEGO (73)',
    'CaseYear': '2015',
    'FATALS': '1',
    'LATITUDE': '32.7

In [46]:
# Validate length of data
RSD=(len(response["Results"][0]))
RSD


892

In [47]:
# Create empty columns to prepare for dataframe
SD_ST_CASE = []
SD_CaseYear = []
SD_TOTALVEHICLES = []
SD_FATALS = []
SD_LATITUDE =[]
SD_LONGITUD = []
SD_fulldata = []
# Create for loop to prepare for dataframe population
for i in range(0,RSD):
    SD_ST_CASE.append(response["Results"][0][i]["ST_CASE"])
    SD_CaseYear.append(response["Results"][0][i]["CaseYear"])
    SD_TOTALVEHICLES.append(response["Results"][0][i]["TOTALVEHICLES"])
    SD_FATALS.append(response["Results"][0][i]["FATALS"])
    SD_LATITUDE.append(response["Results"][0][i]["LATITUDE"])
    SD_LONGITUD.append(response["Results"][0][i]["LONGITUD"])

In [48]:
# Create actual dataframe and check output
SD_stats = pd.DataFrame({'CASEYEAR':SD_CaseYear , 
                         'ST_CASE': SD_ST_CASE ,
                         'TOTALVEHICLES': SD_TOTALVEHICLES , 
                         'FATALS': SD_FATALS , 
                         'LATITUDE': SD_LATITUDE , 
                         'LONGITUD':SD_LONGITUD})


SD_stats.head()

Unnamed: 0,CASEYEAR,ST_CASE,TOTALVEHICLES,FATALS,LATITUDE,LONGITUD
0,2014,60005,2,1,32.76605,-117.15086944
1,2014,60008,2,1,32.79522222,-116.93572778
2,2015,60009,1,1,32.742625,-117.25468889
3,2016,60011,1,1,33.11413056,-117.10489722
4,2016,60014,1,1,32.75183056,-117.01482778


In [49]:
# Sort the data by year and reset index, check output
SD_grouped=SD_stats.sort_values('CASEYEAR')

# Reset Index
SD_data=SD_grouped.reset_index(drop=True)

# Convert FATALS from object to integer
SD_data['FATALS']= SD_data['FATALS'].astype(int)
SD_data.head()

Unnamed: 0,CASEYEAR,ST_CASE,TOTALVEHICLES,FATALS,LATITUDE,LONGITUD
0,2014,60005,2,1,32.76605,-117.15086944
1,2014,60991,4,1,32.75531111,-117.1589
2,2014,60978,1,1,32.76893611,-117.20908333
3,2014,62532,1,1,33.15081111,-117.19241111
4,2014,62537,1,1,32.75539722,-117.204775


In [50]:
# Group by Year
SD_data_group = SD_data.groupby (['CASEYEAR'])

# Get number of cases for each year
SD_data_group_ST_CASE = SD_data_group['ST_CASE'].count()

# Check output
SD_data_group_ST_CASE

CASEYEAR
2014    220
2015    228
2016    229
2017    215
Name: ST_CASE, dtype: int64

In [51]:
# Get number of fatalities for each year
SD_data_group_FATALS = SD_data_group['FATALS'].sum()
SD_data_group_FATALS

CASEYEAR
2014    233
2015    246
2016    243
2017    231
Name: FATALS, dtype: int64

In [52]:
SD_Acc_df= pd.concat([SD_data_group_ST_CASE,SD_data_group_FATALS],axis=1).reset_index()
SD_Acc_df= SD_Acc_df.rename(columns={"CASEYEAR":"Year","ST_CASE": "San Diego Cases", "FATALS": "San Diego Fatalities"})
SD_Acc_df

Unnamed: 0,Year,San Diego Cases,San Diego Fatalities
0,2014,220,233
1,2015,228,246
2,2016,229,243
3,2017,215,231


In [53]:
#***************TEXAS***************

In [54]:
#Dallas County list of fatal crashes by location
url_TX = "https://crashviewer.nhtsa.dot.gov/CrashAPI/crashes/GetCrashesByLocation?fromCaseYear=2014&toCaseYear=2018&state=48&county=113&format=json"
 

In [55]:
url_TX
response = requests.get(url_TX).json()
response

{'Count': 1013,
 'Message': 'Results returned successfully',
 'Results': [[{'CITY': '1730',
    'CITYNAME': 'DALLAS',
    'COUNTY': '113',
    'COUNTYNAME': 'DALLAS (113)',
    'CaseYear': '2017',
    'FATALS': '1',
    'LATITUDE': '32.69296111',
    'LONGITUD': '-96.845794440',
    'STATE': '48',
    'STATENAME': 'Texas',
    'ST_CASE': '480002',
    'TOTALVEHICLES': '1',
    'TWAY_ID': 'US-67 MARVIN D LOVE FWY',
    'TWAY_ID2': '',
    'VE_FORMS': '1'},
   {'CITY': '1730',
    'CITYNAME': 'DALLAS',
    'COUNTY': '113',
    'COUNTYNAME': 'DALLAS (113)',
    'CaseYear': '2014',
    'FATALS': '1',
    'LATITUDE': '32.68372222',
    'LONGITUD': '-96.814405560',
    'STATE': '48',
    'STATENAME': 'Texas',
    'ST_CASE': '480002',
    'TOTALVEHICLES': '1',
    'TWAY_ID': 'SL 12',
    'TWAY_ID2': 'PALODURO LN',
    'VE_FORMS': '1'},
   {'CITY': '2590',
    'CITYNAME': 'GARLAND',
    'COUNTY': '113',
    'COUNTYNAME': 'DALLAS (113)',
    'CaseYear': '2014',
    'FATALS': '2',
    'LATITUDE'

In [56]:
RTX=(len(response["Results"][0]))
RTX

1013

In [57]:
TX_ST_CASE = []
TX_CaseYear = []
TX_TOTALVEHICLES = []
TX_FATALS = []
TX_LATITUDE =[]
TX_LONGITUD = []


for i in range(0,RTX):
    TX_ST_CASE.append(response["Results"][0][i]["ST_CASE"])
    TX_CaseYear.append(response["Results"][0][i]["CaseYear"])
    TX_TOTALVEHICLES.append(response["Results"][0][i]["TOTALVEHICLES"])
    TX_FATALS.append(response["Results"][0][i]["FATALS"])
    TX_LATITUDE.append(response["Results"][0][i]["LATITUDE"])
    TX_LONGITUD.append(response["Results"][0][i]["LONGITUD"])

In [58]:
TX_stats = pd.DataFrame({'CASEYEAR':TX_CaseYear ,
                         'ST_CASE': TX_ST_CASE ,
                         'TOTALVEHICLES': TX_TOTALVEHICLES , 
                         'FATALS': TX_FATALS , 
                         'LATITUDE': TX_LATITUDE , 
                         'LONGITUD':TX_LONGITUD})


TX_stats.head()

Unnamed: 0,CASEYEAR,ST_CASE,TOTALVEHICLES,FATALS,LATITUDE,LONGITUD
0,2017,480002,1,1,32.69296111,-96.84579444
1,2014,480002,1,1,32.68372222,-96.81440556
2,2014,480003,2,2,32.97686389,-96.66528056
3,2015,480003,1,1,32.67020278,-96.94210278
4,2016,480010,1,1,32.96082222,-96.91518333


In [59]:
TX_grouped=TX_stats.sort_values('CASEYEAR')
TX_data=TX_grouped.reset_index(drop=True)
TX_data['FATALS']= TX_data['FATALS'].astype(int)
TX_data.head()

Unnamed: 0,CASEYEAR,ST_CASE,TOTALVEHICLES,FATALS,LATITUDE,LONGITUD
0,2014,480811,1,1,32.97722778,-96.83873056
1,2014,482087,1,1,32.81483056,-96.8269
2,2014,482661,1,1,32.72734722,-96.85866667
3,2014,480918,1,2,32.77762222,-97.00679722
4,2014,482664,1,1,32.94088056,-96.76884722


In [60]:
# Group by Year
TX_data_group = TX_data.groupby (['CASEYEAR'])

# Get number of cases for each year
TX_data_group_ST_CASE = TX_data_group['ST_CASE'].count()

# Check output
TX_data_group_ST_CASE

CASEYEAR
2014    222
2015    239
2016    288
2017    264
Name: ST_CASE, dtype: int64

In [61]:
TX_data_group_FATALS = TX_data_group['FATALS'].sum()
TX_data_group_FATALS

CASEYEAR
2014    238
2015    259
2016    315
2017    282
Name: FATALS, dtype: int64

In [62]:
TX_Acc_df= pd.concat([TX_data_group_ST_CASE,TX_data_group_FATALS],axis=1).reset_index()
TX_Acc_df= TX_Acc_df.rename(columns={"CASEYEAR":"Year","ST_CASE": "Dallas Cases", "FATALS": "Dallas Fatalities"})
TX_Acc_df

Unnamed: 0,Year,Dallas Cases,Dallas Fatalities
0,2014,222,238
1,2015,239,259
2,2016,288,315
3,2017,264,282


In [63]:
Combined_ACC_df = pd.merge(SD_Acc_df, TX_Acc_df, how='left' , on='Year')
Combined_ACC_df

Unnamed: 0,Year,San Diego Cases,San Diego Fatalities,Dallas Cases,Dallas Fatalities
0,2014,220,233,222,238
1,2015,228,246,239,259
2,2016,229,243,288,315
3,2017,215,231,264,282


In [64]:
Combined_ACC_df.count()


Year                    4
San Diego Cases         4
San Diego Fatalities    4
Dallas Cases            4
Dallas Fatalities       4
dtype: int64

In [65]:
Combined_ACC_df.plot(x="Year", y=["San Diego Cases", "Dallas Cases","San Diego Fatalities", "Dallas Fatalities"], kind="bar")
plt.show()

<IPython.core.display.Javascript object>

In [32]:
Combined_ACC_df.plot(x="Year", y=["San Diego Cases", "Dallas Cases"], kind="bar")
plt.xlabel('Year')
plt.ylabel('Number of Cases')
plt.title('Case Comparison')
plt.show()

<IPython.core.display.Javascript object>

In [66]:
Combined_ACC_df.plot(x="Year", y=["San Diego Fatalities", "Dallas Fatalities"], kind="bar")
plt.xlabel('Year')
plt.ylabel('Number of Fatalities')
plt.title('Fatalities Comparison')
plt.show()


<IPython.core.display.Javascript object>

In [33]:
Combined_ACC_df = pd.merge(SD_Acc_df, TX_Acc_df, how='left' , on='Year')
Combined_ACC_df

Unnamed: 0,Year,San Diego Cases,San Diego Fatalities,Dallas Cases,Dallas Fatalities
0,2014,220,233,222,238
1,2015,228,246,239,259
2,2016,229,243,288,315
3,2017,215,231,264,282


In [73]:
years = ['2014', '2015','2016', '2017']
sd_case = np.array([220, 228, 229, 215])
sd_fatal = np.array([233, 246, 243, 231])
ind = [year for year in years]

plt.bar(ind, sd_case, width=0.6, label='Case Count', color='blue')
plt.bar(ind, sd_fatal, width=0.6, label='Fatality Count', color='red', bottom=sd_case)

plt.xticks(ind, years)
plt.ylabel('Number of Cases')
plt.xlabel('Years')
plt.legend(loc='best')
plt.title('Comparison of Fatalities')
plt.show();

In [74]:
countries = ['Norway', 'Germany', 'Canada', 'United States', 'Netherlands']
bronzes = np.array([10,7,10,6,6])
silvers = np.array([14,10,8,8,6])
golds = np.array([14,14,11,9,8])
ind = [country for country in countries]
 
plt.bar(ind, golds, width=0.6, label='golds', color='gold', bottom=silvers+bronzes)
plt.bar(ind, silvers, width=0.6, label='silvers', color='silver', bottom=bronzes)
plt.bar(ind, bronzes, width=0.6, label='bronzes', color='#CD7F32')
 
plt.xticks(ind, countries)
plt.ylabel("Medals")
plt.xlabel("Countries")
plt.legend(loc="upper right")
plt.title("2018 Winter Olympics Top Scorers")
plt.show();


In [75]:
# Make a DataFrame to plot data for graph
raw_data = {'years': ['2014', '2015', '2016', '2017'],
        'sdcase': [220, 228, 229, 215],
        'txcase': [294, 257, 262, 170],
        'sdfatal': [233, 246, 243, 231],
        'txfatal': [300, 295, 250, 200]}
df = pd.DataFrame(raw_data, columns = ['years', 'sdcase', 'txcase', 'sdfatal', 'txfatal'])
df

Unnamed: 0,years,sdcase,txcase,sdfatal,txfatal
0,2014,220,294,233,300
1,2015,228,257,246,295
2,2016,229,262,243,250
3,2017,215,170,231,200


In [78]:
# Setting the positions, colors and width for the bars
pos = list(range(len(df['sdcase']))) 
width = 0.20 
    
# Plotting the bars
fig, ax = plt.subplots(figsize=(10,6))

# Create a bar with sdcase data,
# in position pos,
plt.bar(pos, 
        #using df['sdcase'] data,
        df['sdcase'], 
        # of width
        width,  
        # with color
        color='black', 
        # with label the first value in years
        label=df['years'][0]) 

# Create a bar with txcase data,
# in position pos + some width buffer,
plt.bar([p + width for p in pos], 
        #using df['txcase'] data,
        df['txcase'],
        # of width
        width,  
        # with color
        color='blue', 
        # with label the second value in first_name
        label=df['years'][1]) 

# Create a bar with post_score data,
# in position pos + some width buffer,
plt.bar([p + width*2 for p in pos], 
        #using df['sdfatal'] data,
        df['sdfatal'], 
        # of width
        width, 
        # with color
        color='grey', 
        # with label the third value in years
        label=df['years'][2]) 
# Create a bar with post_score data,
# in position pos + some width buffer,
plt.bar([p + width*3 for p in pos], 
        #using df['yxfatal'] data,
        df['txfatal'], 
        # of width
        width, 
        # with color
        color='skyblue', 
        # with label the third value in years
        label=df['years'][3]) 


# Set the y axis label
ax.set_ylabel('Number of Crashes and Fatalities')

# Set the chart's title
ax.set_title('San Deigo vs Dallas Fatalities Data')

# Set the position of the x ticks
ax.set_xticks([p + 1.5 * width for p in pos])

# Set the labels for the x ticks
ax.set_xticklabels(df['years'])

# Setting the x-axis and y-axis limits
plt.xlim(min(pos)-width, max(pos)+width*4)
plt.ylim([0,350])

# Adding the legend and showing the plot
plt.legend(['San Diego Cases', 'Dallas Cases', 'San Diego Fatalities', 'Dallas Fatalities'], loc='upper right')
plt.grid()
plt.savefig('Cases and Fatalities.png')
plt.show()

<IPython.core.display.Javascript object>