In [None]:
import sys
# add parent directory to sys.path so that python finds the modules
sys.path.append('..')

import pandas as pd
import scipy.stats
import numpy as np
from matplotlib import rc
from matplotlib import pyplot as plt

from db_utils import DatabaseConnection

In [None]:
with DatabaseConnection() as cur:
    cur.execute("""select seg_id, max(accel), sum(duration), sum(dist) from accels where type='a' group by seg_id, filename having max(accel) < 2 and sum(dist)<350 and sum(dist)>20 and min(velo) = 0""")
    res = cur.fetchall()
df = pd.DataFrame(res, columns=['seg_id', 'max_a', 'duration', 'distance'])

In [None]:
# Normal distribution

SUMO_DEFAULT_VALUE = 1.2

plt.figure(figsize=(8,8))
rc('font', **{'family': 'serif', 'serif': ['Computer Modern'], 'size': 16})
rc('text', usetex=True)
plt.xlabel(r'$a_{max}$ in m/sÂ²')
plt.ylabel('Relative frequency')

plt.hist(df.max_a, bins=100, density=True, alpha=1, label=r'$a_{max}^{SimRa}$')

mean, std = scipy.stats.norm.fit(df.max_a)
x = np.linspace(0, 3, 1000)
y = scipy.stats.norm.pdf(x, mean, std)
plt.plot(x, y, color='orange', linewidth=3, label="$\mathcal{N}$" + f"$({mean:.2f}, {std:.2f})$", alpha=0.5, zorder=2)
plt.fill_between(x, y, [0] * len(x), color='orange', alpha=0.5, zorder=2)

# plt.vlines(df.max_a.median(), 0, 4, colors='orange', linewidth=3, label='median')
plt.vlines(SUMO_DEFAULT_VALUE, 0, 4, colors='green', linewidth=3, label=r'$a_{max}^{SUMO}$')

plt.xlim(0, 2.2)
plt.ylim(0, 1.8)

plt.legend()
plt.savefig("max_accel_analysis.png", dpi=300, bbox_inches='tight')

print(f"Mean: {mean}, Std: {std}")
print(f"Median: {df.max_a.median()}")

print(f"Mean: {mean}, Std: {std}")
print(f"Lower cutoff (0.05 percentile): {scipy.stats.norm.ppf(0.05, mean, std)}")
print(f"Upper cutoff (0.95 percentile): {scipy.stats.norm.ppf(0.95, mean, std)}")

print(f"CDF at SUMO default value: {scipy.stats.norm(mean, std).cdf(SUMO_DEFAULT_VALUE)}")