In [1]:
# parameters
INPUT_DATA_PATH = "../data/interim/"
INPUT_PROCESSED_DATA_PATH = "../data/processed/"

YEAR = 2023
SAVE_OUTPUT = True
OUTPUT_DATA_PATH = "../data/processed/"
OUTPUT_FIGURES_PATH = "../reports/figures/"

In [2]:
# Parameters
YEAR = 2024
SAVE_OUTPUT = True


In [3]:
# Import necessary libraries
import geopandas as gpd
import pandas as pd
from pathlib import Path
import plotly.express as px
import numpy as np


In [4]:
def read_and_merge_data(dataset, year,id_col):
    # Read count data
    aadbt_data = pd.read_parquet(Path(INPUT_PROCESSED_DATA_PATH) / f"{dataset}_aadbt{year}.parquet")
    try:
        aadbt_data = aadbt_data[[id_col, 'AADBT', 'Year']]
    except KeyError:
        aadbt_data["Year"]=year
        aadbt_data = aadbt_data[[id_col, 'AADBT', 'Year']]
    print("AADBT", aadbt_data.shape)
    
    # Read station data
    try:
        station_data = gpd.read_parquet(Path(INPUT_DATA_PATH) / f"{dataset}/{dataset}_stations.parquet")
    except FileNotFoundError:
        station_data = gpd.read_parquet(Path(INPUT_DATA_PATH) / f"{dataset}/stations_data{year}.parquet")
    station_data = station_data[[id_col, 'Desc_aforament', 'direction_counter', 'directionality', 'geometry']]
    print("Station", station_data.shape)
    
    # Join the data
    merged_data = pd.merge(station_data, aadbt_data, how='inner', left_on=id_col, right_on=id_col)
    print("Joined:", merged_data.shape)
    print(merged_data.columns)
    
    return merged_data

# Load data

In [5]:
try:
    bicizen = read_and_merge_data("bicizen", YEAR,'ROOT_ID')
except FileNotFoundError:
    #Make null dataframe to avoid errors in the rest of the code
    bicizen = gpd.GeoDataFrame(columns=['ROOT_ID', 'AADBT', 'Year', 'Desc_aforament', 'direction_counter', 'directionality', 'geometry'])
bicizen.head()

AADBT (26, 3)
Station (55, 5)
Joined: (26, 7)
Index(['ROOT_ID', 'Desc_aforament', 'direction_counter', 'directionality',
       'geometry', 'AADBT', 'Year'],
      dtype='object')


Unnamed: 0,ROOT_ID,Desc_aforament,direction_counter,directionality,geometry,AADBT,Year
0,1048273,C/ de Pi i Margall 114,N,,POINT (430189.262 4584777.934),383.739921,2024
1,1048896,C/ Jocs Florals 175,SE,,POINT (427815.131 4580216.804),488.159634,2024
2,1047975,Pg. Sant Joan 8,NW,,POINT (431434.433 4582633.172),4052.869891,2024
3,1048955,C/Rosselló 161,NE,,POINT (429152.250 4582499.693),215.127711,2024
4,1050910,"Passeig Maragall, 242",SE,,POINT (430865.603 4586454.646),504.215104,2024


In [6]:
bacc = read_and_merge_data("bacc", YEAR,'ROOT_ID')
bacc.head()

AADBT (5, 3)
Station (6, 5)
Joined: (5, 7)
Index(['ROOT_ID', 'Desc_aforament', 'direction_counter', 'directionality',
       'geometry', 'AADBT', 'Year'],
      dtype='object')


Unnamed: 0,ROOT_ID,Desc_aforament,direction_counter,directionality,geometry,AADBT,Year
0,1095689,Diagonal Plaza 5 Oros,NE,Unidirectional,POINT (429702.605 4583096.442),4541.630416,2024
1,1095690,Diagonal Amb Ganduxer,SW,Bidirectional,POINT (428053.412 4582591.463),5487.170484,2024
2,1095691,Meridiana Amb Aragó,N,Bidirectional,POINT (432038.016 4584486.767),4877.750214,2024
3,1095692,Diagonal Amb Glories,NE,Bidirectional,POINT (431752.814 4583786.194),2999.612144,2024
4,1095693,Gran Via Amb La Bordeta,SW,Bidirectional,POINT (428670.045 4580586.839),1942.134539,2024


In [7]:
# Read count data
dataset = "counter"
aadbt_data = pd.read_parquet(Path(INPUT_PROCESSED_DATA_PATH) / f"{dataset}_aadbt{YEAR}.parquet")
aadbt_data["Year"]=YEAR
aadbt_data = aadbt_data.rename(columns={"Id_aforament": "id"})
aadbt_data = aadbt_data[["id", 'AADBT', 'Year']]
print("AADBT", aadbt_data.shape)

# Read station data

station_data = gpd.read_parquet(Path(INPUT_PROCESSED_DATA_PATH) / f"013_oriented_stations2023.parquet")
station_data=station_data.drop(columns=['AADBT'])
print("Station", station_data.shape)

# Join the data
merged_data = pd.merge(station_data, aadbt_data, how='inner', left_on="id", right_on="id")
print("Joined:", merged_data.shape)
print(merged_data.columns)
counters = merged_data
counters.head()

AADBT (327, 3)
Station (348, 5)
Joined: (299, 7)
Index(['id', 'Desc_aforament', 'direction_counter', 'directionality',
       'geometry', 'AADBT', 'Year'],
      dtype='object')


Unnamed: 0,id,Desc_aforament,direction_counter,directionality,geometry,AADBT,Year
0,20001,DIPUTACIÓ - COMTE D'URGELL (carril BICI),SW,Unidirectional,POINT (429521.266 4581618.187),3916.892128,2024
1,20002,DIPUTACIÓ - COMTE BORELL (carrril BICI),NE,Unidirectional,POINT (429521.266 4581618.187),7152.887387,2024
2,20003,GRAN VIA DE LES CORTS CATALANES - MUNTANER (ca...,SW,Bidirectional,POINT (429904.531 4581790.384),3816.694268,2024
3,20005,MUNTANER - GRAN VIA DE LES CORTS CATALANES (ca...,SE,Bidirectional,POINT (430025.461 4581710.624),3055.405109,2024
4,20006,MUNTANER - SEPULVEDA (carril BICI sentit munta...,NW,Bidirectional,POINT (430027.221 4581710.534),2599.98366,2024


# Manage data

## Scale BiciZen and BACC

In [8]:
bicizen.loc[~bicizen["ROOT_ID"].isin([1055893, 1047975]), "AADBT"] = -477.175 + bicizen["AADBT"] * 3.266

# bicizen["AADBT"] = bicizen["AADBT"].clip(lower=1)
bicizen["AADBT"].describe()
# bacc["AADBT"]= -477.175 + bacc["AADBT"] * 3.266


count      26.000000
mean     2597.097832
std      2243.814939
min        75.255269
25%       794.627385
50%      1483.567717
75%      4381.662752
max      7606.183050
Name: AADBT, dtype: float64

## Concat data

In [9]:
# Ensure all GeoDataFrames have the same CRS
for df, name in zip([bicizen, bacc, counters], ["BiciZen", "BACC", "Counters"]):
    print(df.crs)
    if 'ROOT_ID' in df.columns:
        df.rename(columns={'ROOT_ID': 'id'}, inplace=True)
    elif 'Id_aforament' in df.columns:
        df.rename(columns={'Id_aforament': 'id'}, inplace=True)
    print(df.shape)
    df["Source"] = name  # Add a new column with the name of the dataframe
bicizen = bicizen.set_crs(epsg=25831)
bacc = bacc.set_crs(epsg=25831)
counters = counters.set_crs(epsg=25831)
# Concatenate the three datasets
datasets = pd.concat([bicizen, bacc, counters], ignore_index=True)


{"$schema": "https://proj.org/schemas/v0.7/projjson.schema.json", "type": "ProjectedCRS", "name": "ETRS89 / UTM zone 31N", "base_crs": {"name": "ETRS89", "datum_ensemble": {"name": "European Terrestrial Reference System 1989 ensemble", "members": [{"name": "European Terrestrial Reference Frame 1989"}, {"name": "European Terrestrial Reference Frame 1990"}, {"name": "European Terrestrial Reference Frame 1991"}, {"name": "European Terrestrial Reference Frame 1992"}, {"name": "European Terrestrial Reference Frame 1993"}, {"name": "European Terrestrial Reference Frame 1994"}, {"name": "European Terrestrial Reference Frame 1996"}, {"name": "European Terrestrial Reference Frame 1997"}, {"name": "European Terrestrial Reference Frame 2000"}, {"name": "European Terrestrial Reference Frame 2005"}, {"name": "European Terrestrial Reference Frame 2014"}, {"name": "European Terrestrial Reference Frame 2020"}], "ellipsoid": {"name": "GRS 1980", "semi_major_axis": 6378137, "inverse_flattening": 298.257

## Visualize

In [10]:
# Combine data into a single DataFrame for Plotly, including id or ROOT_ID
data = pd.concat([
    pd.DataFrame({'Dataset': 'BiciZen', 'AADBT': bicizen['AADBT'], 'ID': bicizen['id']}),
    pd.DataFrame({'Dataset': 'BACC', 'AADBT': bacc['AADBT'], 'ID': bacc['id']}),
    pd.DataFrame({'Dataset': 'Counters', 'AADBT': counters['AADBT'], 'ID': counters['id']})
])

# Create the boxplot
fig = px.box(data, x='Dataset', y='AADBT', hover_data=['ID'], title="Boxplot of AADBT for BiciZen, BACC, and Counters")
fig.show()


# Save output

In [11]:
if SAVE_OUTPUT:
    datasets.to_parquet(f'{OUTPUT_DATA_PATH}/datasets{YEAR}.parquet')
    