In [12]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import geopandas as gpd
from shapely.geometry import Point
import folium
import requests
from pathlib import Path
import os


In [2]:
# Configurations for better output
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
plt.style.use('seaborn-darkgrid')

In [6]:
# Function to load data from API
def load_data_from_api(url):
    response = requests.get(url)
    data = response.json()
    return pd.DataFrame(data)

# Loading data from APIs (if used)
# Example API for traffic stops
traffic_stops_url = 'https://opendata.maryland.gov/resource/tx73-47dk.json'
traffic_stops_data = load_data_from_api(traffic_stops_url)



In [16]:
# Define the base directory
base_dir = Path('projectdata')

# Specific files
aadt_points_file = base_dir / 'Maryland_Annual_Average_Daily_Traffic_-_Annual_Average_Daily_Traffic_(SHA_Statewide_AADT_Points).csv'
aadt_lines_file = base_dir / 'Maryland_Annual_Average_Daily_Traffic_-_Annual_Average_Daily_Traffic_(SHA_Statewide_AADT_Lines).csv'
pedestrian_injury_file = base_dir / 'SHIP_Pedestrian_Injury_Rate_on_Public_Roads_2009-2022_20240505.csv'


In [17]:
if not aadt_points_file.exists():
    print(f"The file {aadt_points_file} does not exist!")
else:
    aadt_points_data = pd.read_csv(aadt_points_file)

if not aadt_lines_file.exists():
    print(f"The file {aadt_lines_file} does not exist!")
else:
    aadt_lines_data = pd.read_csv(aadt_lines_file)

if not pedestrian_injury_file.exists():
    print(f"The file {pedestrian_injury_file} does not exist!")
else:
    pedestrian_injury_data = pd.read_csv(pedestrian_injury_file)


  aadt_points_data = pd.read_csv(aadt_points_file)
  aadt_lines_data = pd.read_csv(aadt_lines_file)


In [18]:
aadt_points_data = pd.read_csv(aadt_points_file)
aadt_lines_data = pd.read_csv(aadt_lines_file)
pedestrian_injury_data = pd.read_csv(pedestrian_injury_file)

  aadt_points_data = pd.read_csv(aadt_points_file)
  aadt_lines_data = pd.read_csv(aadt_lines_file)


In [19]:
# General cleaning for all datasets
def clean_data(df):
    # Example: Convert dates, handle missing values
    df.dropna(inplace=True)
    return df

aadt_points_data = clean_data(aadt_points_data)
aadt_lines_data = clean_data(aadt_lines_data)
pedestrian_injury_data = clean_data(pedestrian_injury_data)


In [20]:
# Merging data examples
# Assuming common columns 'Date' and 'Location' for simplicity
combined_data = pd.merge(aadt_points_data, aadt_lines_data, on=['Location'], how='inner')
combined_data = pd.merge(combined_data, pedestrian_injury_data, on=['Date', 'Location'], how='inner')


KeyError: 'Location'

In [21]:
print("Columns in aadt_points_data:", aadt_points_data.columns)
print("Columns in aadt_lines_data:", aadt_lines_data.columns)
print("Columns in pedestrian_injury_data:", pedestrian_injury_data.columns)


Columns in aadt_points_data: Index(['X', 'Y', 'OBJECTID', 'LOCATION_ID', 'COUNTY_ID', 'COUNTY_DESC',
       'MUN_SORT', 'MUNICIPALITY', 'ROADNAME', 'ID_PREFIX',
       ...
       'AAWDT_2017_ANNO', 'AAWDT_2018_ANNO', 'AAWDT_ANNO',
       'MOTORCYCLE_AADT_ANNO', 'CAR_AADT_ANNO', 'BUS_AADT_ANNO',
       'LIGHT_TRUCK_AADT_ANNO', 'SINGLE_UNIT_AADT_ANNO',
       'COMBINATION_UNIT_AADT_ANNO', 'TRUCK_AADT_ANNO'],
      dtype='object', length=102)
Columns in aadt_lines_data: Index(['OBJECTID', 'LOCATION_ID', 'COUNTY_ID', 'COUNTY_DESC', 'MUN_SORT',
       'MUNICIPALITY', 'ROADNAME', 'ID_PREFIX', 'ID_RTE_NO', 'MP_SUFFIX',
       ...
       'AAWDT_2018_ANNO', 'AAWDT_ANNO', 'MOTORCYCLE_AADT_ANNO',
       'CAR_AADT_ANNO', 'BUS_AADT_ANNO', 'LIGHT_TRUCK_AADT_ANNO',
       'SINGLE_UNIT_AADT_ANNO', 'COMBINATION_UNIT_AADT_ANNO',
       'TRUCK_AADT_ANNO', 'Shape_Length'],
      dtype='object', length=102)
Columns in pedestrian_injury_data: Index(['Jurisdiction', 'Value', 'Race/ ethnicity', 'Year', 'Measu

In [None]:
# Statistical summary and correlations
print(combined_data.describe())
sns.heatmap(combined_data.corr(), annot=True)
plt.show()


In [None]:
# More complex analyses like regression, clustering, etc.
from sklearn.linear_model import LinearRegression

# Example: Linear regression to predict AADT from other factors
model = LinearRegression()
model.fit(combined_data[['Num_Lanes']], combined_data['AADT'])
predictions = model.predict(combined_data[['Num_Lanes']])


In [None]:
# Geographic visualization of traffic data
map = folium.Map(location=[38.9072, -76.8569], zoom_start=10)
for idx, row in combined_data.iterrows():
    folium.CircleMarker([row['Latitude'], row['Longitude']], radius=5, popup=str(row['AADT']), color='red').add_to(map)
map.save('Traffic_Map.html')


In [None]:
# Summarize key findings and provide recommendations based on the analysis
print("Traffic volumes are highest on roads with X characteristic, suggesting Y policy interventions.")
