In [1]:
from dotenv import load_dotenv
import os
import pandas as pd
from sqlalchemy import create_engine

load_dotenv()

True

In [2]:

db_url = f"postgresql://{os.getenv('DB_USER')}:{os.getenv('DB_PASSWORD')}@{os.getenv('DB_HOST')}:{os.getenv('DB_PORT')}/postgres"
engine = create_engine(db_url)


sql_query = """
    SELECT time, olr, name, length, speed, speeduncapped, freeflow, jamfactor, confidence, traversability, subsegments, day_of_week, is_peak_hour, time_to_traverse, congestion_level, is_anomaly
    FROM diu.traffic_data;
"""
traffic_data = pd.read_sql(sql_query, engine)

In [3]:
traffic_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 436925 entries, 0 to 436924
Data columns (total 16 columns):
 #   Column            Non-Null Count   Dtype              
---  ------            --------------   -----              
 0   time              436925 non-null  datetime64[ns, UTC]
 1   olr               436925 non-null  object             
 2   name              427112 non-null  object             
 3   length            436925 non-null  int64              
 4   speed             414692 non-null  float64            
 5   speeduncapped     414692 non-null  float64            
 6   freeflow          436925 non-null  float64            
 7   jamfactor         436925 non-null  float64            
 8   confidence        414692 non-null  float64            
 9   traversability    436925 non-null  object             
 10  subsegments       215051 non-null  object             
 11  day_of_week       436342 non-null  float64            
 12  is_peak_hour      436342 non-null  object   

In [4]:
traffic_data.describe()

Unnamed: 0,length,speed,speeduncapped,freeflow,jamfactor,confidence,day_of_week,time_to_traverse
count,424378.0,402779.0,402779.0,424378.0,424378.0,402779.0,424378.0,402779.0
mean,3999.979888,16.730117,16.940686,16.907105,1.220472,0.939379,3.493979,246.840192
std,3563.142465,7.891661,8.159425,6.169363,2.472936,0.075843,1.91018,244.454095
min,11.0,0.277778,0.277778,2.777778,0.0,0.7,0.0,0.792
25%,968.0,11.944445,11.944445,11.944445,0.0,0.91,2.0,58.079995
50%,3075.0,15.0,15.0,16.11111,0.1,0.98,4.0,144.747697
75%,6246.0,20.833334,21.38889,20.833334,1.2,0.99,5.0,377.999974
max,15182.0,36.11111,47.222225,29.166668,10.0,0.99,6.0,3564.719715


In [5]:
traffic_data.head()

Unnamed: 0,time,olr,name,length,speed,speeduncapped,freeflow,jamfactor,confidence,traversability,subsegments,day_of_week,is_peak_hour,time_to_traverse,congestion_level,is_anomaly
0,2024-12-19 07:44:21.632000+00:00,CD0BEAA5OAYYECTyQQAJBQQCAuQACgUEAqx8APcBANcACQ...,Anschluss B61/Bielefeld-Zentrum,7660,13.888889,13.888889,16.38889,1.4,0.99,open,"[{'speed': 15.833334, 'length': 4861, 'freeFlo...",4,True,551.519996,Low,False
1,2024-12-27 20:00:48.505000+00:00,CCoBEAAmJQYWPiT7GQAJBQQBAk8ACgUEAZ8uABVt/NUACQ...,Ostring,3539,14.444445,14.444445,14.722222,0.1,0.99,open,,5,False,245.007683,Low,False
2,2024-12-27 17:00:48.359000+00:00,CCkBEAAlJAYGeiT1zQAJBQQAAdYACgUEAKhjAO2iDaEACQ...,Steinhagen,5227,35.27778,35.27778,29.166668,0.0,0.99,open,,5,True,148.16692,Low,False
3,2024-12-27 17:00:48.359000+00:00,CCgBEAAkIwYTOyUAYQAJBQQBAwAACgQDAQsAAAAACgAJBQ...,Bielefeld,11,6.388889,6.388889,10.555556,3.0,0.99,open,,5,True,1.721739,Moderate,True
4,2024-12-28 02:20:48.509000+00:00,CCkBEAAlJAYXkiTxsQAJBQQAASEACgUEALcaABT0EnkACQ...,Bielefeld-Ost,7071,23.333334,23.333334,28.611113,2.7,0.99,open,,6,False,303.042848,Low,False


In [8]:
numerical_columns = ['speed', 'speeduncapped', 'freeflow', 'jamfactor', 'confidence', 'time_to_traverse']
statistics = (
    traffic_data
    .groupby('olr')[numerical_columns]
    .agg(['mean', 'std', 'min', lambda x: x.quantile(0.25), 'median', lambda x: x.quantile(0.75), 'max'])
)


statistics.columns = ['_'.join(col).strip() for col in statistics.columns.values]
statistics.reset_index(inplace=True)
statistics

Unnamed: 0,olr,speed_mean,speed_std,speed_min,speed_<lambda_0>,speed_median,speed_<lambda_1>,speed_max,speeduncapped_mean,speeduncapped_std,...,confidence_median,confidence_<lambda_1>,confidence_max,time_to_traverse_mean,time_to_traverse_std,time_to_traverse_min,time_to_traverse_<lambda_0>,time_to_traverse_median,time_to_traverse_<lambda_1>,time_to_traverse_max
0,CCgBEAAkIwYJ/CT21gAJBQQEA3gACgQDBEsA/9H/zQAJBQ...,1.661706,0.311089,1.111111,1.388889,1.666667,1.944444,2.222222,1.661706,0.311089,...,0.97,0.9725,0.99,46.911350,9.866132,33.749999,38.571427,44.999999,53.999996,67.499995
1,CCgBEAAkIwYKUiT6OgAJBQQBA7wACgQDAUsA/5P//wAJBQ...,0.833333,0.000000,0.833333,0.833333,0.833333,0.833333,0.833333,0.833333,0.000000,...,0.89,0.8900,0.89,91.199993,0.000000,91.199993,91.199993,91.199993,91.199993,91.199993
2,CCgBEAAkIwYOoiT9OwAJBQQCAu4ACgQDAiwA/9gAHAAJBQ...,10.076958,1.733994,3.611111,8.611112,10.000000,11.111112,13.888889,10.097770,1.789326,...,0.84,0.9200,0.99,4.509364,0.868432,3.168000,3.960000,4.400000,5.109677,12.184615
3,CCgBEAAkIwYP7iT9YwAJBQQEAwYACgQDBCAAAAcAHAAJBQ...,1.314815,0.426041,0.555556,1.111111,1.388889,1.666667,1.944444,1.314815,0.426041,...,0.96,0.9700,0.98,27.049712,12.492285,15.942857,18.600000,22.319998,27.899998,55.799996
4,CCgBEAAkIwYQICT+eQAJBQQBAlcACgQDAUMAAFL/3wAJBQ...,7.489167,2.426333,1.944444,5.833334,7.222222,10.000000,13.888889,7.491807,2.434549,...,0.90,0.9600,0.99,10.160769,4.174483,4.824000,6.700000,9.276923,11.485714,34.457142
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
308,CD0BEAA5OAYcxyUACAAJBQQCA28ACgUEAoNjAAEuAAQACQ...,3.333334,0.000000,3.333334,3.333334,3.333334,3.333334,3.333334,3.333334,0.000000,...,0.94,0.9400,0.94,210.599989,0.000000,210.599989,210.599989,210.599989,210.599989,210.599989
309,CD0BEAA5OAYlZiUFFQAJBQQCA44ACgUEApkiAOQ1/U0ACQ...,13.743477,1.337370,9.444445,13.055556,13.888889,14.722222,16.111110,13.753883,1.359262,...,0.98,0.9900,0.99,629.747861,66.254340,531.744864,581.909443,616.823995,656.195722,907.094064
310,CDsBEAA3NgYYsCT6ggAJBQQEA48ACgQDBAoAAEH/+gAJBQ...,0.771605,0.122488,0.555556,0.833333,0.833333,0.833333,0.833333,0.771605,0.122488,...,0.95,0.9500,0.97,155.999988,30.955288,140.399989,140.399989,140.399989,140.399989,210.599983
311,CGIBEABeXQX7NyUHiwAJBQQCAykACgQDAmIABGwEYQAJBQ...,14.848143,1.055523,11.111112,14.166667,15.000000,15.833334,16.666668,14.848919,1.057013,...,0.93,0.9700,0.99,1027.919453,76.700388,910.919927,958.863118,1012.133333,1071.670563,1366.379891
