## Config

In [17]:
# parameters
SAVE_OUTPUT = True
SIZE_PLOTS = (12,10)
BUFFER_SIZE = 402.336 

#Location of the data
INPUT_DATA_PATH = "../data/raw/bacc"
OUTPUT_DATA_PATH = "../data/interim/bacc"

In [18]:
import numpy as np
import pandas as pd
import geopandas as gpd
from pathlib import Path


# Load data

In [19]:
counts=pd.read_parquet(Path(INPUT_DATA_PATH) / "data_bacc.parquet")
counts['Location'] = counts['Location'].apply(lambda x: x.split('_')[1])

# Data management

## Explore data

In [20]:
#Explore counts
print(counts.shape)
print(counts.columns)
print(counts.dtypes)
counts.tail()

(352, 22)
Index(['Location', 'date', 'Procesado video', 'Start time', 'End time',
       'Total vehicles', 'Bicing Mechanical Men', 'Bicing Mechanical Women',
       'Bicing Electric Men', 'Bicing Electric Women', 'Urban Mechanical Men',
       'Urban Mechanical Women', 'Urban Electric Men', 'Urban Electric Women',
       'Scooter Electric Men', 'Scooter Electric Women',
       'Foldable Mechanical Men', 'Foldable Mechanical Women',
       'Foldable Electric Men', 'Foldable Electric Women', 'Cargo-Bike',
       'Others'],
      dtype='object')
Location                     object
date                         object
Procesado video              object
Start time                   object
End time                     object
Total vehicles                int64
Bicing Mechanical Men         int64
Bicing Mechanical Women       int64
Bicing Electric Men           int64
Bicing Electric Women         int64
Urban Mechanical Men          int64
Urban Mechanical Women        int64
Urban Electric Men

Unnamed: 0,Location,date,Procesado video,Start time,End time,Total vehicles,Bicing Mechanical Men,Bicing Mechanical Women,Bicing Electric Men,Bicing Electric Women,...,Urban Electric Men,Urban Electric Women,Scooter Electric Men,Scooter Electric Women,Foldable Mechanical Men,Foldable Mechanical Women,Foldable Electric Men,Foldable Electric Women,Cargo-Bike,Others
347,DiagonalAmbGanduxer,2024-09-26,4,18:30:00,18:45:00,182,1,1,36,5,...,26,5,34,13,7,3,4,7,2,3
348,DiagonalAmbGanduxer,2024-09-26,5,18:45:00,19:00:00,186,1,0,37,13,...,23,7,36,9,9,2,7,3,0,5
349,DiagonalAmbGanduxer,2024-09-26,6,19:00:00,19:15:00,166,1,0,31,14,...,25,7,28,8,2,2,8,3,1,2
350,DiagonalAmbGanduxer,2024-09-26,7,19:15:00,19:30:00,210,1,0,41,20,...,21,1,32,14,9,5,8,6,1,4
351,DiagonalAmbGanduxer,2024-09-26,8,19:30:00,19:45:00,135,0,1,24,10,...,12,3,26,8,4,0,7,1,2,5


In [21]:
counts.describe()

Unnamed: 0,Total vehicles,Bicing Mechanical Men,Bicing Mechanical Women,Bicing Electric Men,Bicing Electric Women,Urban Mechanical Men,Urban Mechanical Women,Urban Electric Men,Urban Electric Women,Scooter Electric Men,Scooter Electric Women,Foldable Mechanical Men,Foldable Mechanical Women,Foldable Electric Men,Foldable Electric Women,Cargo-Bike,Others
count,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0,352.0
mean,128.446023,3.315341,1.840909,19.423295,9.866477,23.303977,9.352273,6.295455,2.75,22.107955,9.721591,6.289773,3.715909,4.775568,3.678977,1.039773,0.96875
std,55.239313,2.667733,1.84367,11.561791,6.370052,9.839231,5.473666,4.741546,2.653815,9.915296,6.370497,3.635236,2.780473,3.647295,3.296689,1.38515,1.18232
min,27.0,0.0,0.0,1.0,0.0,4.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,87.75,1.0,0.0,11.0,5.0,16.0,5.0,3.0,1.0,15.0,5.0,4.0,2.0,2.0,1.0,0.0,0.0
50%,124.5,3.0,1.0,17.0,9.0,23.0,8.0,5.0,2.0,21.0,8.0,6.0,3.0,4.0,3.0,1.0,1.0
75%,166.0,5.0,3.0,27.0,14.0,29.25,13.0,8.0,4.0,28.0,13.0,9.0,5.0,7.0,5.0,2.0,2.0
max,278.0,16.0,11.0,52.0,31.0,61.0,31.0,26.0,18.0,53.0,34.0,20.0,15.0,17.0,17.0,8.0,7.0


## Add location & BiciZen ID

In [22]:
locations = {
    'DiagonalAmb5D': {'lat': 41.396333, 'lon': 2.159083, 'ROOT_ID': 1095689},
    'DiagonalAmbGanduxer': {'lat': 41.391639, 'lon': 2.139417, 'ROOT_ID': 1095690},
    'MeridianaAmbAragó': {'lat': 41.409056, 'lon': 2.186861, 'ROOT_ID': 1095691},
    'DiagonalAmbGlories': {'lat': 41.402722, 'lon': 2.183528, 'ROOT_ID': 1095692},
    'GranVíaAmbLaBordeta': {'lat': 41.373639, 'lon': 2.147028, 'ROOT_ID': 1095693},
    'AragoAmbRCatalunya': {'lat': 41.391139, 'lon': 2.163694, 'ROOT_ID': 1095694}
}

# Join count with locations
locations_df = pd.DataFrame.from_dict(locations, orient='index').reset_index()
locations_df.columns = ['Location', 'Latitud', 'Longitud','ROOT_ID']
counts = counts.merge(locations_df, on='Location', how='left')
counts.columns



Index(['Location', 'date', 'Procesado video', 'Start time', 'End time',
       'Total vehicles', 'Bicing Mechanical Men', 'Bicing Mechanical Women',
       'Bicing Electric Men', 'Bicing Electric Women', 'Urban Mechanical Men',
       'Urban Mechanical Women', 'Urban Electric Men', 'Urban Electric Women',
       'Scooter Electric Men', 'Scooter Electric Women',
       'Foldable Mechanical Men', 'Foldable Mechanical Women',
       'Foldable Electric Men', 'Foldable Electric Women', 'Cargo-Bike',
       'Others', 'Latitud', 'Longitud', 'ROOT_ID'],
      dtype='object')

### Join with location

In [24]:
df=counts[['ROOT_ID','Latitud', 'Longitud', 'date', 'Procesado video', 'Start time','Total vehicles']]
gdf = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.Longitud, df.Latitud))
print(gdf.isna().sum().sum())
print(gdf.shape)
gdf.head()

0
(352, 8)


Unnamed: 0,ROOT_ID,Latitud,Longitud,date,Procesado video,Start time,Total vehicles,geometry
0,1095694,41.391139,2.163694,2023-09-28,1,07:45:00,54,POINT (2.16369 41.39114)
1,1095694,41.391139,2.163694,2023-09-28,2,08:00:00,40,POINT (2.16369 41.39114)
2,1095694,41.391139,2.163694,2023-09-28,3,08:15:00,67,POINT (2.16369 41.39114)
3,1095694,41.391139,2.163694,2023-09-28,4,08:30:00,55,POINT (2.16369 41.39114)
4,1095694,41.391139,2.163694,2023-09-28,5,08:45:00,79,POINT (2.16369 41.39114)


## Adapt data


In [26]:
#Scale to 1 hour
gdf["Factor"]=4
gdf["Count"]=gdf["Total vehicles"]*gdf["Factor"]
gdf["Count"].describe()



count     352.000000
mean      513.784091
std       220.957252
min       108.000000
25%       351.000000
50%       498.000000
75%       664.000000
max      1112.000000
Name: Count, dtype: float64

In [27]:
# Account for bidirectionality except for ROOT_ID 1095689
gdf.loc[gdf['ROOT_ID'] != 1095689, 'Count'] = gdf['Count'] / 2
gdf["Count"].describe()


count    352.000000
mean     304.579545
std      158.437184
min       54.000000
25%      179.500000
50%      291.000000
75%      400.500000
max      868.000000
Name: Count, dtype: float64

## Save output

In [29]:
if SAVE_OUTPUT:
    gdf.to_parquet(f'{OUTPUT_DATA_PATH}/BACC.parquet')

## Watermark

In [30]:
!python -m pip install watermark --quiet

In [31]:
%load_ext watermark

In [32]:
%watermark

Last updated: 2025-03-21T20:12:44.654442+01:00

Python implementation: CPython
Python version       : 3.11.9
IPython version      : 8.25.0

Compiler    : MSC v.1938 64 bit (AMD64)
OS          : Windows
Release     : 10
Machine     : AMD64
Processor   : AMD64 Family 25 Model 68 Stepping 1, AuthenticAMD
CPU cores   : 16
Architecture: 64bit



In [33]:
%watermark --iversions

geopandas: 0.13.2
pandas   : 2.0.3
numpy    : 1.24.4



In [34]:
!lsb_release -a

"lsb_release" no se reconoce como un comando interno o externo,
programa o archivo por lotes ejecutable.
