# Congestion Analysis
Akshitha Ramachandran <br>

In [None]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
import seaborn as sns
import statsmodels.api as sm
# plt.style.use('fivethirtyeight')
%matplotlib inline

# Import/Clean Data
- Convert all times to datetime stamps
- Add column for years
- Add column for months
- Add column for hour of day

In [None]:
accident_data = pd.read_csv("updated_accident_data.csv")

In [None]:
accident_data['dispatch_ts'] = pd.to_datetime(accident_data.dispatch_ts)

In [None]:
accident_data['year'] = accident_data['dispatch_ts'].dt.year
accident_data['month'] = accident_data['dispatch_ts'].dt.month
accident_data['hour'] = accident_data['dispatch_ts'].dt.hour
accident_data['weekday'] = accident_data['dispatch_ts'].dt.dayofweek
accident_data['day'] = accident_data['dispatch_ts'].dt.day

In [None]:
years = accident_data['year'].unique()
months = accident_data['month'].unique()
hours = accident_data['hour'].unique()
modes = accident_data['mode_type'].unique()
loc_types = accident_data['location_type'].unique()
weekdays = accident_data['weekday'].unique()

# Identify Most Accident Prone Locations

In [None]:
accident_data = accident_data.sort_values(['lat', 'long'])

In [None]:
def num_accidents_df(margin, df):
    
    # create new row for accident counts
    title = "num_accidents"
    df[title] = 0
    
    # iterate through rows and count num accidents
    for index, row in df.iterrows():
        lat = row['lat']
        long = row['long']
        lat_max = lat + margin
        lat_min = lat - margin
        long_max = long + margin
        long_min = long - margin
        num_accidents = len(accident_data[(accident_data['lat'] > lat_min) & 
                                           (accident_data['lat'] < lat_max) & 
                                           (accident_data['long'] > long_min) & 
                                           (accident_data['long'] < long_max)])
        df.at[index, title] = num_accidents
    
    return df

In [None]:
accident_cluster = num_accidents_df(0.00001, accident_data)

In [None]:
accident_cluster_trim = accident_cluster.sort_values(['num_accidents'], ascending=False)

# Mapping Locations to Streets

In [None]:
dangerous_locs = accident_cluster_trim.groupby(['lat', 'long']).size()\
                                                .reset_index(name='freq')\
                                                .sort_values(['freq'], ascending=False)

In [None]:
def map_loc_to_street(name_df, loc_df):
    loc_df["streets"] = ""
    loc_df["modes"] = ""
    loc_df["location_type"] = ""
    
    for index, row in loc_df.iterrows():
        # lat/long of accident point
        lat = row['lat']
        long = row['long']
        
        # set of all streets involved in accident
        streets = name_df[(name_df["lat"] == lat) & 
                          (name_df["long"] == long)]["xstreet1"].unique()
        streets = np.append(streets, name_df[(name_df["lat"] == lat) & 
                                             (name_df["long"] == long)]["xstreet2"].unique())
        streets = np.append(streets, name_df[(name_df["lat"] == lat) & 
                                             (name_df["long"] == long)]["street"].unique())
        streets = pd.Series(streets).dropna()
        
        # types of accidents at loc
        mode = name_df[(name_df["lat"] == lat) & 
                          (name_df["long"] == long)]["mode_type"].unique()
        loc_type = name_df[(name_df["lat"] == lat) & 
                          (name_df["long"] == long)]["location_type"].unique()
        
        # add to df
        loc_df.at[index, "streets"] = list(set(streets))
        loc_df.at[index, "modes"] = mode
        loc_df.at[index, "location_type"] = loc_type
        
    return loc_df

In [None]:
dangerous_locs = map_loc_to_street(accident_data, dangerous_locs)
# dangerous_locs = dangerous_locs.reset_index().drop(columns=['index'])

In [None]:
# Export to csv
dangerous_locs.to_csv("dangerous_location.csv")

In [None]:
dangerous_locs = dangerous_locs.head(15)

In [None]:
dangerous_locs

# Aggregate All Accidents

In [None]:
agg_hour = accident_data.groupby(["hour"]).count()['dispatch_ts']
agg_hour = agg_hour.rename(columns={'dispatch_ts':'count'})
agg_hour = pd.DataFrame(agg_hour)

# Load Intersection Data

In [None]:
def load_intersection(intersection_number):
    str_num = str(intersection_number)
    file_path = "Traffic Patterns/Location "+ str_num +"/final_loc"+ str_num +".csv"
    df = pd.read_csv(file_path)
    df['Time'] = pd.to_datetime(df.Time)
    df['hour'] = df['Time'].dt.hour
    return df

In [None]:
def prep_intersection_data(df):
    agg_df = df.groupby(["hour"]).sum()['Total']
    agg_df = agg_df.rename(columns={'Total':'count'})
    agg_df = pd.DataFrame(agg_df)
    return agg_df

In [None]:
loc0_data = load_intersection(0)
loc3_data = load_intersection(3)
loc6_data = load_intersection(6)
loc9_data = load_intersection(9)
loc11_data = load_intersection(11)
loc12_data = load_intersection(12)
loc14_data = load_intersection(14)
loc16_data = load_intersection(16)

# Prep Accident Data

In [None]:
def prep_accident_data(lat, long, margin, mode_type=None):
    agg_hour_acc = accident_data.loc[(accident_data['lat'] > (lat - margin)) & 
                                           (accident_data['lat'] < (lat + margin)) & 
                                           (accident_data['long'] > (long - margin)) & 
                                           (accident_data['long'] < (long + margin))]
    # filter by mode type
    if mode_type:
        agg_hour_acc = agg_hour_acc[agg_hour_acc["mode_type"] == mode_type]
    
    # groupby accident counts per hour
    agg_hour_acc = agg_hour_acc.groupby(["hour"]).count()['dispatch_ts'].reset_index()
    agg_hour_acc = agg_hour_acc.rename(columns={'dispatch_ts':'count'})
    agg_hour_acc = pd.DataFrame(agg_hour_acc)
    
    
    # fill any empty hours with 0
    x = pd.DataFrame({"hour":list(range(24))})
    agg_hour_acc_trim = x.set_index('hour').join(agg_hour_acc.set_index('hour')).fillna(0).reset_index()[7:18]
    
    agg_hour_acc_trim = agg_hour_acc_trim.reset_index().drop(['index'], axis=1)
    return agg_hour_acc_trim

# Location 0 Analysis

In [None]:
agg_hour_loc0 = prep_intersection_data(loc0_data)
agg_hour_loc0_accident_trim = prep_accident_data(42.332547, -71.072124, 0.00001)
agg_hour_loc0_accident_mv_trim = prep_accident_data(42.332547, -71.072124, 0.00001, "mv")
agg_hour_loc0_accident_ped_trim = prep_accident_data(42.332547, -71.072124, 0.00001, "ped")
agg_hour_loc0_accident_bike_trim = prep_accident_data(42.332547, -71.072124, 0.00001, "bike")

In [None]:
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(agg_hour_loc0.index, agg_hour_loc0[0],'o-',label="congestion")
ax.set_xlabel('Hour')
ax.set_ylabel('Frequency')
ax.set_title('Total Congestion By Hour of Day: Mass Ave & Melnea Cass Blvd')
ax.legend(loc="upper right")
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(agg_hour_loc0_accident_trim['hour'], agg_hour_loc0_accident_trim['count'],'o-', label="All")
ax.plot(agg_hour_loc0_accident_mv_trim['hour'], agg_hour_loc0_accident_mv_trim['count'],'o-', label="MV")
ax.plot(agg_hour_loc0_accident_ped_trim['hour'], agg_hour_loc0_accident_ped_trim['count'],'o-', label="Ped")
ax.plot(agg_hour_loc0_accident_bike_trim['hour'], agg_hour_loc0_accident_bike_trim['count'],'o-', label="Bike")
ax.set_xlabel('Hour')
ax.set_ylabel('Frequency')
ax.set_title('Accident Counts by Type Across Hour of Day: Mass Ave & Melnea Cass Blvd')
ax.legend(loc="upper right")

# Location 3 Analysis

In [None]:
agg_hour_loc3 = prep_intersection_data(loc3_data)
agg_hour_loc3_accident_trim = prep_accident_data(42.328160, -71.096990, 0.00001)
agg_hour_loc3_accident_mv_trim = prep_accident_data(42.328160, -71.096990, 0.00001, "mv")
agg_hour_loc3_accident_ped_trim = prep_accident_data(42.328160, -71.096990, 0.00001, "ped")
agg_hour_loc3_accident_bike_trim = prep_accident_data(42.328160, -71.096990, 0.00001, "bike")

In [None]:
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(agg_hour_loc3.index, agg_hour_loc3[0],'o-',label="congestion")
ax.set_xlabel('Hour')
ax.set_ylabel('Frequency')
ax.set_title('Total Congestion By Hour of Day: Columbus Ave & Cedar St')
ax.legend(loc="upper right")
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(agg_hour_loc3_accident_trim['hour'], agg_hour_loc3_accident_trim['count'],'o-', label="All")
ax.plot(agg_hour_loc3_accident_mv_trim['hour'], agg_hour_loc3_accident_mv_trim['count'],'o-', label="MV")
ax.plot(agg_hour_loc3_accident_ped_trim['hour'], agg_hour_loc3_accident_ped_trim['count'],'o-', label="Ped")
ax.plot(agg_hour_loc3_accident_bike_trim['hour'], agg_hour_loc3_accident_bike_trim['count'],'o-', label="Bike")
ax.set_xlabel('Hour')
ax.set_ylabel('Frequency')
ax.set_title('Accident Counts by Type Across Hour of Day: Columbus Ave & Cedar St')
ax.legend(loc="upper right")

# Location 6 Analysis

In [None]:
agg_hour_loc6 = prep_intersection_data(loc6_data)
agg_hour_loc6_accident_trim = prep_accident_data(42.30545103, -71.08057209, 0.00001)
agg_hour_loc6_accident_mv_trim = prep_accident_data(42.30545103, -71.08057209, 0.00001, "mv")
agg_hour_loc6_accident_ped_trim = prep_accident_data(42.30545103, -71.08057209, 0.00001, "ped")
agg_hour_loc6_accident_bike_trim = prep_accident_data(42.30545103, -71.08057209, 0.00001, "bike")

In [None]:
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(agg_hour_loc6.index, agg_hour_loc6[0],'o-',label="congestion")
ax.set_xlabel('Hour')
ax.set_ylabel('Frequency')
ax.set_title('Total Congestion By Hour of Day: Washington St & Columbia Rd')
ax.legend(loc="upper right")
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(agg_hour_loc6_accident_trim['hour'], agg_hour_loc6_accident_trim['count'],'o-', label="All")
ax.plot(agg_hour_loc6_accident_mv_trim['hour'], agg_hour_loc6_accident_mv_trim['count'],'o-', label="MV")
ax.plot(agg_hour_loc6_accident_ped_trim['hour'], agg_hour_loc6_accident_ped_trim['count'],'o-', label="Ped")
ax.plot(agg_hour_loc6_accident_bike_trim['hour'], agg_hour_loc6_accident_bike_trim['count'],'o-', label="Bike")
ax.set_xlabel('Hour')
ax.set_ylabel('Frequency')
ax.set_title('Accident Counts by Type Across Hour of Day: Washington St & Columbia Rd')
ax.legend(loc="upper right")

# Location 9 Analysis

In [None]:
agg_hour_loc9 = prep_intersection_data(loc9_data)
agg_hour_loc9_accident_trim = prep_accident_data(42.33362614, -71.07336417, 0.00001)
agg_hour_loc9_accident_mv_trim = prep_accident_data(42.33362614, -71.07336417, 0.00001, "mv")
agg_hour_loc9_accident_ped_trim = prep_accident_data(42.33362614, -71.07336417, 0.00001, "ped")
agg_hour_loc9_accident_bike_trim = prep_accident_data(42.33362614, -71.07336417, 0.00001, "bike")

In [None]:
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(agg_hour_loc9.index, agg_hour_loc9[0],'o-',label="congestion")
ax.set_xlabel('Hour')
ax.set_ylabel('Frequency')
ax.set_title('Total Congestion By Hour of Day: Mass Ave & Albany St')
ax.legend(loc="upper right")
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(agg_hour_loc9_accident_trim['hour'], agg_hour_loc9_accident_trim['count'],'o-', label="All")
ax.plot(agg_hour_loc9_accident_mv_trim['hour'], agg_hour_loc9_accident_mv_trim['count'],'o-', label="MV")
ax.plot(agg_hour_loc9_accident_ped_trim['hour'], agg_hour_loc9_accident_ped_trim['count'],'o-', label="Ped")
ax.plot(agg_hour_loc9_accident_bike_trim['hour'], agg_hour_loc9_accident_bike_trim['count'],'o-', label="Bike")
ax.set_xlabel('Hour')
ax.set_ylabel('Frequency')
ax.set_title('Accident Counts by Type Across Hour of Day: Mass Ave & Albany St')
ax.legend(loc="upper right")

# Location 11 Analysis

In [None]:
agg_hour_loc11 = prep_intersection_data(loc11_data)
agg_hour_loc11_accident_trim = prep_accident_data(42.350858, -71.089484, 0.00001)
agg_hour_loc11_accident_mv_trim = prep_accident_data(42.350858, -71.089484, 0.00001, "mv")
agg_hour_loc11_accident_ped_trim = prep_accident_data(42.350858, -71.089484, 0.00001, "ped")
agg_hour_loc11_accident_bike_trim = prep_accident_data(42.350858, -71.089484, 0.00001, "bike")

In [None]:
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(agg_hour_loc11.index, agg_hour_loc11[0],'o-',label="congestion")
ax.set_xlabel('Hour')
ax.set_ylabel('Frequency')
ax.set_title('Total Congestion By Hour of Day: Beacon St & Mass Ave')
ax.legend(loc="upper right")
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(agg_hour_loc11_accident_trim['hour'], agg_hour_loc11_accident_trim['count'],'o-', label="All")
ax.plot(agg_hour_loc11_accident_mv_trim['hour'], agg_hour_loc11_accident_mv_trim['count'],'o-', label="mv")
ax.plot(agg_hour_loc11_accident_ped_trim['hour'], agg_hour_loc11_accident_ped_trim['count'],'o-', label="Ped")
ax.plot(agg_hour_loc11_accident_bike_trim['hour'], agg_hour_loc11_accident_bike_trim['count'],'o-', label="Bike")
ax.set_xlabel('Hour')
ax.set_ylabel('Frequency')
ax.set_title('Accident Rates by Type Across Hour of Day: Beacon St & Mass Ave')
ax.legend(loc="upper right")

# Location 12 Analysis

In [None]:
agg_hour_loc12 = prep_intersection_data(loc12_data)
agg_hour_loc12_accident_trim = prep_accident_data(42.298123, -71.087048, 0.00001)
agg_hour_loc12_accident_mv_trim = prep_accident_data(42.298123, -71.087048, 0.00001, "mv")
agg_hour_loc12_accident_ped_trim = prep_accident_data(42.298123, -71.087048, 0.00001, "ped")
agg_hour_loc12_accident_bike_trim = prep_accident_data(42.298123, -71.087048, 0.00001, "bike")

In [None]:
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(agg_hour_loc12.index, agg_hour_loc12[0],'o-',label="congestion")
ax.set_xlabel('Hour')
ax.set_ylabel('Frequency')
ax.set_title('Total Congestion By Hour of Day: Blue Hill Ave & American Legion Hwy')
ax.legend(loc="upper right")
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(agg_hour_loc12_accident_trim['hour'], agg_hour_loc12_accident_trim['count'],'o-', label="All")
ax.plot(agg_hour_loc12_accident_mv_trim['hour'], agg_hour_loc12_accident_mv_trim['count'],'o-', label="mv")
ax.plot(agg_hour_loc12_accident_ped_trim['hour'], agg_hour_loc12_accident_ped_trim['count'],'o-', label="Ped")
ax.plot(agg_hour_loc12_accident_bike_trim['hour'], agg_hour_loc12_accident_bike_trim['count'],'o-', label="Bike")
ax.set_xlabel('Hour')
ax.set_ylabel('Frequency')
ax.set_title('Accident Rates by Type Across Hour of Day: Blue Hill Ave & American Legion Hwy')
ax.legend(loc="upper right")

# Location 14 Analysis

In [None]:
agg_hour_loc14 = prep_intersection_data(loc14_data)
agg_hour_loc14_accident_trim = prep_accident_data(42.33119415,-71.07481518, 0.00001)
agg_hour_loc14_accident_mv_trim = prep_accident_data(42.33119415,-71.07481518, 0.00001, "mv")
agg_hour_loc14_accident_ped_trim = prep_accident_data(42.33119415,-71.07481518, 0.00001, "ped")
agg_hour_loc14_accident_bike_trim = prep_accident_data(42.33119415,-71.07481518, 0.00001, "bike")

In [None]:
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(agg_hour_loc14.index, agg_hour_loc14[0],'o-',label="congestion")
ax.set_xlabel('Hour')
ax.set_ylabel('Frequency')
ax.set_title('Total Congestion By Hour of Day: Hampden St & Melnea Cass Blvd')
ax.legend(loc="upper right")
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(agg_hour_loc14_accident_trim['hour'], agg_hour_loc14_accident_trim['count'],'o-', label="All")
ax.plot(agg_hour_loc14_accident_mv_trim['hour'], agg_hour_loc14_accident_mv_trim['count'],'o-', label="mv")
ax.plot(agg_hour_loc14_accident_ped_trim['hour'], agg_hour_loc14_accident_ped_trim['count'],'o-', label="Ped")
ax.plot(agg_hour_loc14_accident_bike_trim['hour'], agg_hour_loc14_accident_bike_trim['count'],'o-', label="Bike")
ax.set_xlabel('Hour')
ax.set_ylabel('Frequency')
ax.set_title('Accident Rates by Type Across Hour of Day: Hampden St & Melnea Cass Blvd')
ax.legend(loc="upper right")

# Location 16 Analysis

In [None]:
agg_hour_loc16 = prep_intersection_data(loc16_data)
agg_hour_loc16_accident_trim = prep_accident_data(42.30340674,-71.08526661, 0.00001)
agg_hour_loc16_accident_mv_trim = prep_accident_data(42.30340674,-71.08526661, 0.00001, "mv")
agg_hour_loc16_accident_ped_trim = prep_accident_data(42.30340674,-71.08526661, 0.00001, "ped")
agg_hour_loc16_accident_bike_trim = prep_accident_data(42.30340674,-71.08526661, 0.00001, "bike")

In [None]:
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(agg_hour_loc16.index, agg_hour_loc16[0],'o-',label="congestion")
ax.set_xlabel('Hour')
ax.set_ylabel('Frequency')
ax.set_title('Total Congestion By Hour of Day: Blue Hill Ave & Columbia Rd')
ax.legend(loc="upper right")
fig, ax = plt.subplots(1,1, figsize=(8,6))
ax.plot(agg_hour_loc16_accident_trim['hour'], agg_hour_loc16_accident_trim['count'],'o-', label="All")
ax.plot(agg_hour_loc16_accident_mv_trim['hour'], agg_hour_loc16_accident_mv_trim['count'],'o-', label="mv")
ax.plot(agg_hour_loc16_accident_ped_trim['hour'], agg_hour_loc16_accident_ped_trim['count'],'o-', label="Ped")
ax.plot(agg_hour_loc16_accident_bike_trim['hour'], agg_hour_loc16_accident_bike_trim['count'],'o-', label="Bike")
ax.set_xlabel('Hour')
ax.set_ylabel('Frequency')
ax.set_title('Accident Rates by Type Across Hour of Day: Blue Hill Ave & Columbia Rd')
ax.legend(loc="upper right")

# OLS

In [None]:
def prep_OLS_data(df):
    df = df.drop(['Time', 'Total'], axis=1)
    df = df.groupby(["hour"]).sum().reset_index()
    if len(df) == 12:
        df = df.drop([11])
    df = df.drop(['hour'], axis=1)
    return df

In [None]:
def prep_OLS_data_group_turn(df):
    df = df.drop(['Time', 'Total'], axis=1)
    df = df.groupby(["hour"]).sum().reset_index()
    if len(df) == 12:
        df = df.drop([11])
    
    right = df["Right - N"] + df["Right - E"] + df["Right - S"] + df["Right - W"]
    left = df["Left - N"] + df["Left - E"] + df["Left - S"] + df["Left - W"]
    thru = df["Thru - N"] + df["Thru - E"] + df["Thru - S"] + df["Thru - W"]
    
    new_df = pd.DataFrame({"Right":right, "Left": left, "Thru": thru })
    return new_df    

In [None]:
def prep_OLS_data_group_cardinal(df):
    df = df.drop(['Time', 'Total'], axis=1)
    df = df.groupby(["hour"]).sum().reset_index()
    if len(df) == 12:
        df = df.drop([11])
    
    north = df["Right - N"] + df["Left - N"] + df["Thru - N"]
    south = df["Right - S"] + df["Left - S"] + df["Thru - S"]
    east = df["Right - E"] + df["Left - E"] + df["Thru - E"]
    west = df["Right - W"] + df["Left - W"] + df["Thru - W"]
    
    
    new_df = pd.DataFrame({"North":north, "South": south, "East": east, "West": west })
    return new_df    

In [None]:
ols_loc0 = prep_OLS_data(loc0_data)
ols_loc3 = prep_OLS_data(loc3_data)
ols_loc6 = prep_OLS_data(loc6_data)
ols_loc9 = prep_OLS_data(loc9_data)
ols_loc11 = prep_OLS_data(loc11_data)
ols_loc12 = prep_OLS_data(loc12_data)
ols_loc14 = prep_OLS_data(loc14_data)
ols_loc16 = prep_OLS_data(loc16_data)

In [None]:
ols_loc0_group_turn = prep_OLS_data_group_turn(loc0_data)
ols_loc3_group_turn = prep_OLS_data_group_turn(loc3_data)
ols_loc6_group_turn = prep_OLS_data_group_turn(loc6_data)
ols_loc9_group_turn = prep_OLS_data_group_turn(loc9_data)
ols_loc11_group_turn = prep_OLS_data_group_turn(loc11_data)
ols_loc12_group_turn = prep_OLS_data_group_turn(loc12_data)
ols_loc14_group_turn = prep_OLS_data_group_turn(loc14_data)
ols_loc16_group_turn = prep_OLS_data_group_turn(loc16_data)

In [None]:
ols_loc0_group_cardinal = prep_OLS_data_group_cardinal(loc0_data)
ols_loc3_group_cardinal = prep_OLS_data_group_cardinal(loc3_data)
ols_loc6_group_cardinal = prep_OLS_data_group_cardinal(loc6_data)
ols_loc9_group_cardinal = prep_OLS_data_group_cardinal(loc9_data)
ols_loc11_group_cardinal = prep_OLS_data_group_cardinal(loc11_data)
ols_loc12_group_cardinal = prep_OLS_data_group_cardinal(loc12_data)
ols_loc14_group_cardinal = prep_OLS_data_group_cardinal(loc14_data)
ols_loc16_group_cardinal = prep_OLS_data_group_cardinal(loc16_data)

In [None]:
y0 = agg_hour_loc0_accident_trim['count']
y3 = agg_hour_loc3_accident_trim['count']
y6 = agg_hour_loc6_accident_trim['count']
y9 = agg_hour_loc9_accident_trim['count']
y11 = agg_hour_loc11_accident_trim['count']
y12 = agg_hour_loc12_accident_trim['count']
y14 = agg_hour_loc14_accident_trim['count']
y16 = agg_hour_loc16_accident_trim['count']

In [None]:
X = ols_loc0.append([ols_loc3, ols_loc6, ols_loc9, ols_loc11, ols_loc12, ols_loc14, ols_loc16])
X = X.apply(lambda x: x/x.max(), axis=0)

y = y0.append([y3, y6, y9, y11, y12, y14, y16])

model = sm.OLS(y,X)
results = model.fit()
print(results.summary())

In [None]:
X = ols_loc0_group_turn.append([ols_loc3_group_turn, ols_loc6_group_turn, 
                               ols_loc9_group_turn, ols_loc11_group_turn, ols_loc12_group_turn,
                               ols_loc14_group_turn, ols_loc16_group_turn])
X = X.apply(lambda x: x/x.max(), axis=0)

y = y0.append([y3, y6, y9, y11, y12, y14, y16])

model = sm.OLS(y,X)
results = model.fit()
print(results.summary())

In [None]:
X = ols_loc0_group_cardinal.append([ols_loc3_group_cardinal, ols_loc6_group_cardinal, ols_loc9_group_cardinal,
                               ols_loc11_group_cardinal, ols_loc12_group_cardinal, ols_loc14_group_cardinal,
                               ols_loc6_group_cardinal])
X = X.apply(lambda x: x/x.max(), axis=0)

y = y0.append([y3, y6, y9, y11, y12, y14, y16])

model = sm.OLS(y,X)
results = model.fit()
print(results.summary())

# Visualize Dangerous Locations

In [None]:
# import decartes
import geopandas as gpd
from shapely.geometry import Point, Polygon

In [None]:
dangerous_geo = [Point(xy) for xy in zip(dangerous_locs['long'], dangerous_locs['lat'])]
geo_dangerous_df = gpd.GeoDataFrame(dangerous_locs, crs={'init':'epsg:4326'}, geometry = dangerous_geo)

In [None]:
street_map = gpd.read_file('Boston_Street_Segments/Boston_Street_Segments.shp')
fig, ax = plt.subplots(figsize=(15, 15))
street_map.plot(ax = ax, alpha = 0.4, color="grey")

geo_dangerous_df.plot(ax = ax, markersize = 10, color="red", marker="o", label="Dangerous Locations")
plt.legend(prop={'size': 10})
plt.show()

# Insights
- Of the 17 most dangerous locations in Boston: 
    - 13 are intersections: of these we have data for 7
    - 4 are on strees: 3 on I-93 one on JFK surface road