<h2>Load packages and environment variables</h2>

In [16]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

from pathlib import Path
from dotenv import load_dotenv
import pytz

import os

load_dotenv(override=True)


True

<h2>Load data path</h2>

In [17]:
sample_realtime_data_path = Path(os.getenv("DATA_PATH")) / 'Historical GTFS and GTFS Realtime - Metro'
sample_static_data_path = Path(os.getenv("DATA_PATH")) / 'Timetables Complete GTFS'

[f for f in sample_realtime_data_path.glob("*.csv")]


[WindowsPath('C:/Users/lng2/Workings/python_training/sample_data/Historical GTFS and GTFS Realtime - Metro/TripUpdate_20220601.csv'),
 WindowsPath('C:/Users/lng2/Workings/python_training/sample_data/Historical GTFS and GTFS Realtime - Metro/TripUpdate_20230601.csv'),
 WindowsPath('C:/Users/lng2/Workings/python_training/sample_data/Historical GTFS and GTFS Realtime - Metro/VehiclePosition_20220601.csv'),
 WindowsPath('C:/Users/lng2/Workings/python_training/sample_data/Historical GTFS and GTFS Realtime - Metro/VehiclePosition_20230819.csv'),
 WindowsPath('C:/Users/lng2/Workings/python_training/sample_data/Historical GTFS and GTFS Realtime - Metro/VehiclePosition_20230820.csv')]

In [18]:
df_trip = pd.read_csv(sample_realtime_data_path / 'TripUpdate_20220601.csv')
df_stop = pd.read_csv(sample_static_data_path / 'stops.txt')


<h2>Set up SQL Database</h2>

In [26]:
from sqlalchemy import create_engine, select, Table, MetaData
from sqlalchemy.orm import sessionmaker

con_str = os.getenv("SQLDRIVER")
engine = create_engine(con_str)
Session = sessionmaker(engine)


<h2> Read in data into SQL Database </h2>

In [20]:
df_trip.to_sql('trip', engine, if_exists="replace", index=False, chunksize=20000)


2632513

In [21]:
df_stop.to_sql('stop', engine, if_exists="replace", index=False)


116702

In [22]:
df_trip_in_sql = pd.read_sql('trip', engine)

print([df_trip_in_sql.shape, df_trip.shape])


[(2632513, 27), (2632513, 27)]


In [23]:
df_stop_in_sql = pd.read_sql('stop', engine)

print([df_stop_in_sql.shape, df_stop.shape])


[(116702, 10), (116702, 10)]


<h2>Run queries directly on SQL Lite</h2>

In [36]:
metadata = MetaData()

stop_in_sql = Table('stop', metadata, autoload_with = engine)

with Session() as session:
    stmt = select(stop_in_sql.columns.stop_id, stop_in_sql.columns.stop_name).where(stop_in_sql.columns.stop_id == 2155269)
    df_stop_in_sql_filtered = pd.DataFrame(session.execute(stmt).all())

df_stop_in_sql_filtered


Unnamed: 0,stop_id,stop_name
0,2155269,"Tallawong Station, Platform 2"
