In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from py_scripts.db_fcns import save_df_to_db, get_df_from_db
from py_scripts.data_fcns import select_data_for_train, interpolate_time
from py_scripts.trainlocations import StationsAndDates, TrainLocations

In [None]:
reset = False

In [None]:
alkuasema = "TPE"
loppuasema = "JY"

alkupvm = "2018-01-01"
loppupvm = "2023-06-30"

asemat_ja_paivat = StationsAndDates(alkuasema, loppuasema, alkupvm, loppupvm)

In [None]:
tl = TrainLocations(asemat_ja_paivat)

In [None]:
tl.load_raw_data_from_db()

print(f"Trains loaded from DB: {len(tl.train_df)}")
print(f'Trains with timetables: {len(tl.timetables.apply(lambda r: (r["departureDate"], r["trainNumber"]), axis=1).unique())}')
print(f'Trains with locations: {len(tl.location_df_raw.apply(lambda r: (r["departureDate"], r["trainNumber"]), axis=1).unique())}')

In [None]:
tl.find_data(do_limiting=reset, force_reset=reset)   

if_exists_action = "replace" if reset else "fail"
tl.save_raw_data_to_db(if_exists_action=if_exists_action)

## Katsotaan eri reittivaihtoehtoja

In [None]:
routes = tl.get_routes()
routes[routes > 10]

In [None]:
pd.pivot_table(tl.train_df[tl.train_df["stations"].isin(routes[routes > 10].index)], values="timetableType", index=["stations"], columns=["trainType"], aggfunc=pd.Series.count)

## Valitaan reitti

In [None]:
route = routes.index[0]
route

In [None]:
do_processing = True

tl.load_checkpoint_data_from_db(route)


if tl.checkpoint_data_exists():
    do_processing = False

In [None]:
if do_processing:
    interval_dfs = tl.process_train_locations(route)

In [None]:
if do_processing:
    tl.calculate_best_distance_estimate(method="median", num_of_decimals=-2)

In [None]:
for dc in tl.interval_dfs:
    print(f"{dc.start_station}-{dc.end_station}: \t {dc.distance}")

In [None]:
if do_processing:
    _ = tl.filter_data_based_on_distance(percentage=2, min_error=500)

In [None]:
for dc in tl.interval_dfs:
    print(f"{dc.start_station}-{dc.end_station}: \t {len(dc.trains[dc.trains["in_analysis"] == True])}")

## Jaetaan kukin välimatka tasaisin välein oleviin "checkpointteihin" ja analysoidaan dataa niiden avulla

In [None]:
if do_processing:
    tl.focus_on_checkpoints(checkpoint_interval=50)

In [None]:
if do_processing:
    tl.save_checkpoint_data_to_db()

In [None]:
tl.calculate_accelerations()

In [None]:
tl.get_checkpoint_data_for_full_route()

In [None]:
tl.setup_for_clustering()

In [None]:
tl.run_kmeans_clustering(5)

In [None]:
tl.draw_cluster_centroids()

In [None]:
tl.compare_clusters()