# Historical Trends: v1 vs v2

* Compare now that we changed up workflow
* How does v1 compare to v2 for the 3 dates in 2022?
* Q2 2022, v1 < v2 by a lot. But by Q4 2022, v1 > v2...
* Within the same warehouse regime, numbers look ok fluctuating

In [1]:
import altair as alt
import calitp.magics
import geopandas as gpd
import pandas as pd

from shared_utils import rt_dates, geography_utils
from shared_utils import calitp_color_palette as cp
from update_vars import BUS_SERVICE_GCS



In [2]:
date_q2 = rt_dates.PMAC["Q2_2022"]
date_q3 = rt_dates.PMAC["Q3_2022"]
date_q4 = rt_dates.PMAC["Q4_2022"]

def hours_by_category(date):
    
    df1 = gpd.read_parquet(
        f"{BUS_SERVICE_GCS}routes_categorized_{date}_v1.parquet")
    df2 = gpd.read_parquet(
        f"{BUS_SERVICE_GCS}routes_categorized_{date}_v2.parquet")
    
    agg1 = df1.groupby("category").agg(
        {"service_hours": "sum"}).reset_index()
    agg1["date"] = date
    
    agg2 = df2.groupby("category").agg(
        {"service_hours": "sum"}).reset_index()
    agg2["date"] = date
    
    return agg1, agg2

In [3]:
q2_v1, q2_v2 = hours_by_category(date_q2)
q3_v1, q3_v2 = hours_by_category(date_q3)
q4_v1, q4_v2 = hours_by_category(date_q4)

In [4]:
df1 = pd.concat(
    [q2_v1, q3_v1, q4_v1], 
    axis=0
)

df2 = pd.concat(
    [q2_v2, q3_v2, q4_v2], 
    axis=0
)

In [5]:
comparison = pd.merge(
    df1.rename(columns = {"service_hours": "v1_service_hours"}),
    df2.rename(columns = {"service_hours": "v2_service_hours"}),
    on = ["date", "category"],
    how = "inner",
    validate = "1:1",
)

comparison = comparison.assign(
    v1_diff_v2 = comparison.v1_service_hours - comparison.v2_service_hours
)
comparison

Unnamed: 0,category,v1_service_hours,date,v2_service_hours,v1_diff_v2
0,intersects_shn,52314.38,2022-05-04,66161.91,-13847.53
1,on_shn,15520.12,2022-05-04,18644.11,-3123.99
2,other,29510.79,2022-05-04,30316.55,-805.76
3,intersects_shn,54089.19,2022-08-17,56584.67,-2495.48
4,on_shn,15380.85,2022-08-17,15882.8,-501.95
5,other,26285.42,2022-08-17,25088.32,1197.1
6,intersects_shn,62629.75,2022-10-12,60287.32,2342.43
7,on_shn,16101.21,2022-10-12,15643.57,457.64
8,other,29107.18,2022-10-12,26756.03,2351.15


In [6]:
chart = (alt.Chart(comparison)
         .mark_bar()
         .encode(
             x="date:N",
             y="v1_diff_v2:Q",
             column = alt.Column("category:N"),
             color = alt.Color("category:N", 
                               scale = alt.Scale(
                                   range=cp.CALITP_CATEGORY_BRIGHT_COLORS)
                              ),
         )
        )
chart

In [7]:
def make_chart(df): 
    chart = (alt.Chart(df)
             .mark_bar()
             .encode(
                 x="date:N",
                 y="service_hours:Q",
                 column = "category:N",
                 color = alt.Color("category:N", 
                               scale = alt.Scale(
                                   range=cp.CALITP_CATEGORY_BRIGHT_COLORS)
                              ),
             )
            )
    
    return chart
    

In [8]:
make_chart(df1)

In [9]:
make_chart(df2)