# Project type level analysis

In [182]:
import psycopg2
import json
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape
from shapely.geometry import LineString, MultiLineString
import matplotlib.pyplot as plt
import re

from config import DB_VISION_ZERO, DB_MOPED
from helper import dict_factory, to_json_list


def get_data(query, cursor):
    """
    Get data from database
    """
    cursor.execute(query)
    data = cursor.fetchall()
    field_names = [i[0] for i in cursor.description]
    df = pd.DataFrame(data, columns=field_names)

    return df

conn_vz = psycopg2.connect(
    dbname = DB_VISION_ZERO['dbname'],
    user = DB_VISION_ZERO["user"],
    host = DB_VISION_ZERO["host"],
    password = DB_VISION_ZERO["password"],
    port=5432
)

conn_moped = psycopg2.connect(
    dbname = DB_MOPED["dbname"],
    user = DB_MOPED["user"],
    host = DB_MOPED["host"],
    password = DB_MOPED["password"],
    port = 5432
)

cursor_vz = conn_vz.cursor()
cursor_moped = conn_moped.cursor()

## Moped data

In [183]:
# Creating moped dataframe
QUERY_MOPED = """SELECT project_id, project_component_id, geometry, 
line_geometry, substantial_completion_date, component_name 
FROM component_arcgis_online_view"""

# Creating moped dataframe
df_moped = get_data(QUERY_MOPED, cursor_moped)

# Dropping observations where substantial completion date and component type are missing
df_moped_filter = df_moped.dropna(subset=['substantial_completion_date', 
                                          'component_name',
                                          'line_geometry'])

In [184]:
# Filtering down to Intersection moped projects
df_moped_intersections = df_moped_filter[df_moped_filter['component_name'] == "Intersection"]

# Apply the geometry transformation
df_moped_intersections.loc[:, "geometry"] = df_moped_intersections["geometry"].apply(lambda x: shape(x) if x is not None else None)
df_moped_intersections.loc[:, "line_geometry"] = df_moped_intersections["line_geometry"].apply(lambda x: shape(x) if x is not None else None)

# Adding a unique ID column
df_moped_intersections.insert(0, 'moped_unique_id', range(1, 1 + len(df_moped_intersections)))

# Creating geo data frame
gdf_moped_intersections = gpd.GeoDataFrame(df_moped_intersections, geometry="line_geometry")

In [185]:
gdf_moped_intersections

Unnamed: 0,moped_unique_id,project_id,project_component_id,geometry,line_geometry,substantial_completion_date,component_name
12,1,12,469.0,"MULTIPOINT (-97.734674 30.266826, -97.734306 3...","MULTILINESTRING ((-97.73460 30.26683, -97.7346...",2022-10-10 05:00:00+00:00,Intersection
372,2,278,11606.0,MULTIPOINT (-97.768806439 30.215194039),"LINESTRING (-97.76873 30.21519, -97.76874 30.2...",2021-12-01 06:00:00+00:00,Intersection
809,3,571,757.0,MULTIPOINT (-97.689956 30.216907),"LINESTRING (-97.68988 30.21691, -97.68988 30.2...",2023-07-31 05:00:00+00:00,Intersection
863,4,619,819.0,MULTIPOINT (-97.884885883 30.193983211),"LINESTRING (-97.88481 30.19398, -97.88481 30.1...",2008-01-01 06:00:00+00:00,Intersection
1032,5,749,1008.0,MULTIPOINT (-97.75144677999999 30.201474407),"LINESTRING (-97.75137 30.20147, -97.75138 30.2...",2020-12-17 06:00:00+00:00,Intersection
...,...,...,...,...,...,...,...
12048,222,3485,12301.0,MULTIPOINT (-97.75162047000001 30.30777717),"LINESTRING (-97.75155 30.30778, -97.75155 30.3...",2019-10-01 05:00:00+00:00,Intersection
12049,223,3486,12308.0,MULTIPOINT (-97.728465347 30.308568413),"LINESTRING (-97.72839 30.30857, -97.72839 30.3...",2020-08-01 05:00:00+00:00,Intersection
12072,224,3490,12349.0,MULTIPOINT (-97.70899967299999 30.265431859),"LINESTRING (-97.70893 30.26543, -97.70893 30.2...",2020-08-01 05:00:00+00:00,Intersection
12073,225,3490,12348.0,MULTIPOINT (-97.69046950000001 30.27354869),"LINESTRING (-97.69040 30.27355, -97.69040 30.2...",2020-08-01 05:00:00+00:00,Intersection


## INRIX data

In [186]:
inrix_intersections = pd.read_csv("../Data/intersections.csv")

# Keeping only those observations where latitude and longitude data is present
inrix_intersections = inrix_intersections[inrix_intersections['lat'].notnull() & inrix_intersections['lon'].notnull()]

In [187]:
inrix_intersections.columns

Index(['Unnamed: 0', 'id', 'name', 'lat', 'lon', 'totalVehicleVolume',
       'controlDelayAvg', 'levelOfService', 'Percent Arrival on Green',
       'Split Failure Percent', 'startDate', 'endDate', 'day count',
       'startTime', 'endTime', 'days_of_week', 'reportID', 'lookup', 'ID',
       'Month', 'Year', 'Time'],
      dtype='object')

In [188]:
inrix_intersections = inrix_intersections[['id', 'name', 'lat', 'lon', 'totalVehicleVolume', 'startDate', 'endDate', 'day count']]

In [189]:
# Converting latitude and longitude to geographical information
gdf_inrix_intersections = gpd.GeoDataFrame(inrix_intersections,
                          geometry=gpd.points_from_xy(inrix_intersections.lon,
                                                      inrix_intersections.lat),
                                                      crs='EPSG:4326')

gdf_inrix_intersections.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
RangeIndex: 58666 entries, 0 to 58665
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   id                  58666 non-null  object  
 1   name                58666 non-null  object  
 2   lat                 58666 non-null  float64 
 3   lon                 58666 non-null  float64 
 4   totalVehicleVolume  58666 non-null  float64 
 5   startDate           58666 non-null  object  
 6   endDate             58666 non-null  object  
 7   day count           58666 non-null  int64   
 8   geometry            58666 non-null  geometry
dtypes: float64(3), geometry(1), int64(1), object(4)
memory usage: 4.0+ MB


In [190]:
gdf_inrix_intersections.iloc[:, 4:14]

Unnamed: 0,totalVehicleVolume,startDate,endDate,day count,geometry
0,115272.0,2021-12-01,2022-01-01,23,POINT (-97.78691 30.16707)
1,37130.0,2021-12-01,2022-01-01,23,POINT (-97.74490 30.18447)
2,78719.0,2021-12-01,2022-01-01,23,POINT (-97.81631 30.18591)
3,18875.0,2021-12-01,2022-01-01,23,POINT (-97.74121 30.18958)
4,107775.0,2021-12-01,2022-01-01,23,POINT (-97.78782 30.19708)
...,...,...,...,...,...
58661,56625.0,2020-07-01,2020-08-01,23,POINT (-97.71781 30.36316)
58662,41727.0,2020-07-01,2020-08-01,23,POINT (-97.71480 30.36419)
58663,133314.0,2020-07-01,2020-08-01,23,POINT (-97.67667 30.42679)
58664,59872.0,2020-07-01,2020-08-01,23,POINT (-97.77042 30.43515)


## Spatial join

In [191]:
# Creating buffer on moped for joining
gdf_moped_intersections = gdf_moped_intersections.set_geometry('line_geometry')
gdf_moped_intersections.set_crs(epsg=4326, inplace=True)
gdf_moped_intersections_proj = gdf_moped_intersections.to_crs(epsg=32614)
buffer_distance = 5

gdf_moped_intersections_proj = gdf_moped_intersections.to_crs(epsg=32614)

gdf_moped_intersections_proj['buffered_geometry'] = gdf_moped_intersections_proj.geometry.buffer(buffer_distance)
buffered_moped_gdf = gdf_moped_intersections_proj.set_geometry('buffered_geometry').to_crs('EPSG:4326')

In [192]:
# Spatial join
moped_inrix_intersections = gpd.sjoin(gdf_inrix_intersections, buffered_moped_gdf, how='inner')

In [193]:
# Converting columns to date-time format
moped_inrix_intersections['startDate'] = pd.to_datetime(moped_inrix_intersections['startDate']).dt.tz_localize('UTC')
moped_inrix_intersections['endDate'] = pd.to_datetime(moped_inrix_intersections['endDate']).dt.tz_localize('UTC')

In [194]:
moped_inrix_intersections.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1462 entries, 42 to 57260
Data columns (total 17 columns):
 #   Column                       Non-Null Count  Dtype              
---  ------                       --------------  -----              
 0   id                           1462 non-null   object             
 1   name                         1462 non-null   object             
 2   lat                          1462 non-null   float64            
 3   lon                          1462 non-null   float64            
 4   totalVehicleVolume           1462 non-null   float64            
 5   startDate                    1462 non-null   datetime64[ns, UTC]
 6   endDate                      1462 non-null   datetime64[ns, UTC]
 7   day count                    1462 non-null   int64              
 8   geometry_left                1462 non-null   geometry           
 9   index_right                  1462 non-null   int64              
 10  moped_unique_id              1462 non-null   int64 

## Traffice volume

In [195]:
# Calculating traffic volume

results = []

for moped_id, group in moped_inrix_intersections.groupby("moped_unique_id"):
    
    # Getting the completition date for each unique moped component
    substantial_completion_date = group["substantial_completion_date"].iloc[0]

    # Calculating the total volume before and after the completion date
    total_volume_before = group[group["endDate"] < substantial_completion_date]["totalVehicleVolume"].sum()
    total_volume_after = group[group["startDate"] >= substantial_completion_date]["totalVehicleVolume"].sum()

    # Calculating the number of days before and after the completion date
    days_before = group[group["endDate"] < substantial_completion_date]["day count"].sum()
    days_after = group[group["startDate"] >= substantial_completion_date]["day count"].sum()

    # Annualizing the traffic volume
    annualized_volume_before = (total_volume_before / days_before) * 365 if days_before != 0 else 0
    annualized_volume_after = (total_volume_after / days_after) * 365 if days_after != 0 else 0

    # Creating a difference column
    annualized_volume_delta = annualized_volume_after - annualized_volume_before

    # Combining results together in the list
    results.append({
        "moped_unique_id": moped_id,
        "annualized_volume_before": annualized_volume_before,
        "annualized_volume_after": annualized_volume_after,
        "annualized_volume_delta": annualized_volume_delta
    })

annualized_volume = pd.DataFrame(results)

In [196]:
# Merging useful information
additional_info = moped_inrix_intersections[['moped_unique_id',
                                             'id',
                                             'name',
                                             'line_geometry',
                                             'substantial_completion_date',
                                             'component_name']].drop_duplicates()

annualized_volume = annualized_volume.merge(additional_info, on='moped_unique_id', how='left')

# Renaming columns
annualized_volume.rename(columns={"id": "inrix_id"}, inplace=True)

In [197]:
annualized_volume

Unnamed: 0,moped_unique_id,annualized_volume_before,annualized_volume_after,annualized_volume_delta,inrix_id,name,line_geometry,substantial_completion_date,component_name
0,1,1749061.0,0.0,-1749061.0,30.2677_-97.7343,North Interstate 35 & East 8th Street,"MULTILINESTRING ((621721.871 3349029.909, 6217...",2022-10-10 05:00:00+00:00,Intersection
1,1,1749061.0,0.0,-1749061.0,30.2666_-97.7339,North Interstate 35 & East 7th Street,"MULTILINESTRING ((621721.871 3349029.909, 6217...",2022-10-10 05:00:00+00:00,Intersection
2,2,1325175.0,1424946.0,99771.77,30.2152_-97.7688,South Congress Avenue & Sheraton Avenue,"LINESTRING (618500.269 3343271.742, 618500.134...",2021-12-01 06:00:00+00:00,Intersection
3,10,0.0,2128830.0,2128830.0,30.2977_-97.7089,Airport Boulevard & Aldrich Street,"LINESTRING (624148.929 3352495.726, 624148.795...",2018-05-07 05:00:00+00:00,Intersection
4,13,0.0,1820213.0,1820213.0,30.2760_-97.7360,Trinity Street & East 15th Street,"LINESTRING (621578.180 3350039.713, 621578.045...",2014-07-07 05:00:00+00:00,Intersection
5,16,0.0,1769235.0,1769235.0,30.2641_-97.7470,South 1st Street & West Cesar Chavez Street,"LINESTRING (620536.577 3348714.358, 620536.442...",2013-08-23 05:00:00+00:00,Intersection
6,18,0.0,1243831.0,1243831.0,30.2838_-97.7419,West 21st Street & Guadalupe Street,"LINESTRING (621003.981 3350904.575, 621003.846...",2013-10-29 05:00:00+00:00,Intersection
7,28,0.0,1949450.0,1949450.0,30.2940_-97.7079,Airport Boulevard & Zach Scott Street,"LINESTRING (624264.212 3352084.842, 624264.078...",2019-06-11 05:00:00+00:00,Intersection
8,28,0.0,1949450.0,1949450.0,30.2940_-97.7079,Airport Boulevard,"LINESTRING (624264.212 3352084.842, 624264.078...",2019-06-11 05:00:00+00:00,Intersection
9,32,1292227.0,1539647.0,247420.3,30.3071_-97.7475,Shoal Creek Boulevard & West 38th Street,"LINESTRING (620434.045 3353476.410, 620433.910...",2020-07-15 05:00:00+00:00,Intersection


In [198]:
annualized_volume.to_csv('../Output/annualized_volume.csv')