In [75]:
# parameters
INPUT_DATA_PATH = "../data/interim/"
INPUT_PROCESSED_DATA_PATH = "../data/processed/"

YEAR = 2017
SAVE_OUTPUT = True
OUTPUT_DATA_PATH = "../data/processed/"
OUTPUT_FIGURES_PATH = "../reports/figures/"

In [76]:
# Parameters
YEAR = 2024 
SAVE_OUTPUT = True


In [77]:
# Import necessary libraries
import geopandas as gpd
import pandas as pd
from pathlib import Path


In [78]:
def read_and_merge_data(dataset, year,id_col):
    # Read count data
    aadbt_data = pd.read_parquet(Path(INPUT_PROCESSED_DATA_PATH) / f"{dataset}_aadbt{year}.parquet")
    try:
        aadbt_data = aadbt_data[[id_col, 'AADBT', 'Year']]
    except KeyError:
        aadbt_data["Year"]=year
        aadbt_data = aadbt_data[[id_col, 'AADBT', 'Year']]
    print("AADBT", aadbt_data.shape)
    
    # Read station data
    try:
        station_data = gpd.read_parquet(Path(INPUT_DATA_PATH) / f"{dataset}/{dataset}_stations.parquet")
    except FileNotFoundError:
        station_data = gpd.read_parquet(Path(INPUT_DATA_PATH) / f"{dataset}/stations_data{year}.parquet")
    station_data = station_data[[id_col, 'Desc_aforament', 'direction_counter', 'directionality', 'geometry']]
    print("Station", station_data.shape)
    
    # Join the data
    merged_data = pd.merge(station_data, aadbt_data, how='inner', left_on=id_col, right_on=id_col)
    print("Joined:", merged_data.shape)
    print(merged_data.columns)
    
    return merged_data

# Load data

In [79]:
bicizen = read_and_merge_data("bicizen", YEAR,'ROOT_ID')
bicizen.head()

AADBT (26, 3)
Station (55, 5)
Joined: (26, 7)
Index(['ROOT_ID', 'Desc_aforament', 'direction_counter', 'directionality',
       'geometry', 'AADBT', 'Year'],
      dtype='object')


Unnamed: 0,ROOT_ID,Desc_aforament,direction_counter,directionality,geometry,AADBT,Year
0,1048273,C/ de Pi i Margall 114,N,,POINT (430189.262 4584777.934),383.739921,2024
1,1048896,C/ Jocs Florals 175,SE,,POINT (427815.131 4580216.804),488.159634,2024
2,1047975,Pg. Sant Joan 8,NW,,POINT (431434.433 4582633.172),4052.869891,2024
3,1048955,C/Rosselló 161,NE,,POINT (429152.250 4582499.693),215.127711,2024
4,1050910,"Passeig Maragall, 242",SE,,POINT (430865.603 4586454.646),504.215104,2024


In [80]:
bacc = read_and_merge_data("bacc", YEAR,'ROOT_ID')
bacc.head()

AADBT (5, 3)
Station (6, 5)
Joined: (5, 7)
Index(['ROOT_ID', 'Desc_aforament', 'direction_counter', 'directionality',
       'geometry', 'AADBT', 'Year'],
      dtype='object')


Unnamed: 0,ROOT_ID,Desc_aforament,direction_counter,directionality,geometry,AADBT,Year
0,1095689,Diagonal Plaza 5 Oros,NE,Unidirectional,POINT (2.15908 41.39633),4541.630416,2024
1,1095690,Diagonal Amb Ganduxer,SW,Bidirectional,POINT (2.13942 41.39164),5487.170484,2024
2,1095691,Meridiana Amb Aragó,N,Bidirectional,POINT (2.18686 41.40906),4877.750214,2024
3,1095692,Diagonal Amb Glories,NE,Bidirectional,POINT (2.18353 41.40272),2999.612144,2024
4,1095693,Gran Via Amb La Bordeta,SW,Bidirectional,POINT (2.14703 41.37364),1942.134539,2024


In [81]:
# Read count data
dataset = "counter"
aadbt_data = pd.read_parquet(Path(INPUT_PROCESSED_DATA_PATH) / f"{dataset}_aadbt{YEAR}.parquet")
aadbt_data["Year"]=YEAR
aadbt_data = aadbt_data[["id", 'AADBT', 'Year']]
print("AADBT", aadbt_data.shape)

# Read station data

station_data = gpd.read_parquet(Path(INPUT_DATA_PATH) / f"{dataset}/stations_data{YEAR}.parquet")
station_data=station_data.rename(columns={"Id_aforament": "id"})
station_data['id'] = station_data['id'].astype(int)
station_data = station_data[["id", 'Desc_aforament', 'geometry']]
print("Station", station_data.shape)

# Join the data
merged_data = pd.merge(station_data, aadbt_data, how='inner', left_on="id", right_on="id")
print("Joined:", merged_data.shape)
print(merged_data.columns)
counters = merged_data
counters.head()

AADBT (867, 3)
Station (381, 3)
Joined: (381, 5)
Index(['id', 'Desc_aforament', 'geometry', 'AADBT', 'Year'], dtype='object')


Unnamed: 0,id,Desc_aforament,geometry,AADBT,Year
0,20001,DIPUTACIÓ - COMTE D'URGELL (carril BICI),POINT (2.15709 41.38300),3886.0,2024
1,20002,DIPUTACIÓ - COMTE BORELL (carrril BICI),POINT (2.15709 41.38300),8083.0,2024
2,20003,GRAN VIA DE LES CORTS CATALANES - MUNTANER (ca...,POINT (2.16165 41.38459),6807.0,2024
3,20005,MUNTANER - GRAN VIA DE LES CORTS CATALANES (ca...,POINT (2.16310 41.38388),2953.0,2024
4,20006,MUNTANER - SEPULVEDA (carril BICI sentit munta...,POINT (2.16313 41.38388),2929.0,2024


# Manage data

## Scale BiciZen and BACC

In [82]:
bicizen["AADBT"]= -477.175 + bicizen["AADBT"] * 3.266

# bacc["AADBT"]= -477.175 + bacc["AADBT"] * 3.266


In [83]:
counters = counters[(counters["AADBT"] >= 0) & (counters["AADBT"] <= 10000)]


In [85]:
# Combine data into a single DataFrame for Plotly, including id or ROOT_ID
data = pd.concat([
    pd.DataFrame({'Dataset': 'BiciZen', 'AADBT': bicizen['AADBT'], 'ID': bicizen['ROOT_ID']}),
    pd.DataFrame({'Dataset': 'BACC', 'AADBT': bacc['AADBT'], 'ID': bacc['ROOT_ID']}),
    pd.DataFrame({'Dataset': 'Counters', 'AADBT': counters['AADBT'], 'ID': counters['id']})
])

# Create the boxplot
fig = px.box(data, x='Dataset', y='AADBT', hover_data=['ID'], title="Boxplot of AADBT for BiciZen, BACC, and Counters")
fig.show()
