In [None]:
import sys
# add parent directory to sys.path so that python finds the modules
sys.path.append('..')

import pandas as pd
import scipy.stats
import numpy as np
from matplotlib import pyplot as plt
from matplotlib import rc

from db_utils import DatabaseConnection

In [None]:
# Retrieval of v_max. Note: the median of the top 10 velos is calculated per ride to avoid outliers.

with DatabaseConnection() as cur:
        cur.execute("""
        SELECT filename, PERCENTILE_CONT(0.5) WITHIN GROUP(ORDER BY foo.velo) 
        FROM (
                SELECT clean.filename, clean.velo, 
                        rank() OVER (
                                PARTITION BY filename
                                ORDER BY velo DESC
                        )
                FROM (
                        SELECT flat.filename filename, flat.velo velo
                        FROM (
                                SELECT ride.filename, unnest(ride.velos) velo
                                FROM ride
                        ) as flat
                        WHERE NULLIF(flat.velo, 'NaN') IS NOT NULL
                ) as clean
        ) as foo 
        WHERE velo < 14 AND velo > 0.1 AND foo.rank <= 10 
        GROUP BY filename
        """)

        res = cur.fetchall()

df = pd.DataFrame(res, columns=['filename', 'max_v'])


In [None]:
# Normal distribution

SUMO_DEFAULT_VALUE = 5.56

plt.figure(figsize=(8,8))
rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': 16})
rc('text', usetex=True)


plt.hist(df.max_v, bins=100, density=True, label=r'$v_{max}^{SimRa}$', alpha=0.5, zorder=1)

plt.vlines(SUMO_DEFAULT_VALUE, 0, 1, colors='green', linewidth=3, label=r'$v_{max}^{SUMO}$', zorder=3)

mean, std = scipy.stats.norm.fit(df.max_v)
x = np.linspace(0, 16, 1000)
y = scipy.stats.norm.pdf(x, mean, std)
plt.plot(x, y, color='orange', linewidth=3, label="$\mathcal{N}$" + f"$({mean:.2f}, {std:.2f})$", alpha=0.5, zorder=2)
plt.fill_between(x, y, [0] * len(x), color='orange', alpha=0.5, zorder=2)


plt.xlabel(r'$v_{max}$ in m/s')
plt.ylabel('Relative frequency')
plt.xlim(1, 15)
plt.ylim(0, 0.3)

plt.title("Maximum Velocity")
plt.legend()
plt.savefig("max_velo_analysis.png", dpi=300, bbox_inches='tight')

print(f"Mean: {mean}, Std: {std}")
print(f"Median: {df.max_v.median()}")
print(f"Lower cutoff (0.05 percentile): {scipy.stats.norm.ppf(0.05, mean, std)}")
print(f"Upper cutoff (0.95 percentile): {scipy.stats.norm.ppf(0.95, mean, std)}")
print(f"CDF at SUMO default value: {scipy.stats.norm(mean, std).cdf(1.20)}")

