In [0]:
import pandas as pd 
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [0]:
sql = """
    with rt_suite_aggregated as (
        select
            rt.flightkey,
            rt.route,
            rt.charge_dt,
            rt.flight_dt,
            rt.chargeproduct,
            flt.capacity,

            -- route carachteristics
            rtmap.type, 
            rtmap.region,

            -- statistics & metrics
            sum(rt.unt_net) as total_pax,
            sum(rt.rev_net) as total_rev

        from 
            data_experience_commercial.cbt_1423_rtsuite.master rt
        join 
            data_prod.silver_sanezdb.routemap rtmap on rt.route = rtmap.route
        join
            data_prod.silver_curated_eres.flight flt on rt.flightkey = flt.flightkey

        where 1=1 
            and rt.chargeproduct = 'Ticket'
            and rt.flight_dt between current_date() - 390 and current_date()
            and rt.route in (
            'MADEDI','EDIMAD','AMSNAP', 'NAPAMS', 'BCNMAN','MANBCN', 'NCELIS','LISNCE', "EDITFS","TFSEDI", "MANAGP", "AGPMAN", "MXPSUF", "SUFMXP", "MXPBRI", "BRIMXP"
            )

        group by
            rt.flightkey,
            rt.route,
            rt.charge_dt,
            rt.flight_dt,
            rt.chargeproduct,
            flt.capacity, 
            rtmap.type, 
            rtmap.region
    )

    select * from rt_suite_aggregated
"""

df = spark.sql(sql).toPandas()

In [0]:
df[ 'charge_dt'] = pd.to_datetime(df['charge_dt'])
df[ 'flight_dt'] = pd.to_datetime(df['flight_dt'])

In [0]:
df['week'] = df.flight_dt.dt.weekofyear
df['week_on'] = df.flight_dt.dt.weekofyear % 2

In [0]:
g = df.groupby(
    [
     'flightkey', 'route', 'flight_dt', 'chargeproduct', 
     'type', 'region', 'week_on', 'week'
    ]
).agg(
    {
     'capacity': 'last',
     'total_pax': 'sum',
     'total_rev': 'sum'}
).reset_index()

In [0]:
df.groupby(
    ['route', ]
)

In [0]:
df.columns

In [0]:
# data set work 
# Gather full dataset 
# create N random samples By route type 
# 

In [0]:
plt.plot(g.groupby('week').capacity.mean())