In [59]:
import calitp
from calitp.tables import tbl
from calitp import get_engine
from siuba import *

import pandas as pd
import numpy as np
import geopandas as gpd

Querying `mst_ridership` for `MST`, turning it into a Pandas Series using collect()

In [60]:
participant_mst = (tbl.views.mst_ridership()
    >> filter(_.participant_id =='mst')
    >> collect()
    )

Turning `participant_mst` into a DataFrame

In [61]:
df = pd.DataFrame(participant_mst)

Create new column `rel_to_nominal`, add values for the relation to 'nominal_amount`

In [62]:
conditions = [
    (df['charge_amount'] < df['nominal_amount']),
    (df['charge_amount'] == df['nominal_amount']),
    (df['charge_amount'] > df['nominal_amount'])
]
values = ['paid_less', 'paid_the_same', 'paid_more']
df['rel_to_nominal'] = np.select(conditions, values)

Counting the relations to nominal

In [63]:
(df
    >> count(_.rel_to_nominal)
)

Unnamed: 0,rel_to_nominal,n
0,paid_less,202
1,paid_the_same,32671


Counting relations to nominal by charge type

In [64]:
(df
    >> count(_.charge_type, _.rel_to_nominal)
)

Unnamed: 0,charge_type,rel_to_nominal,n
0,complete_variable_fare,paid_less,202
1,complete_variable_fare,paid_the_same,27854
2,flat_fare,paid_the_same,4817


Counting relation to nominal by charge amont and nominal amount

In [65]:
(df
    >> count(_.rel_to_nominal, _.charge_amount, _.nominal_amount)
)

Unnamed: 0,rel_to_nominal,charge_amount,nominal_amount,n
0,paid_less,0.0,1.5,34
1,paid_less,0.0,2.5,40
2,paid_less,0.0,3.5,2
3,paid_less,0.0,10.0,12
4,paid_less,0.5,1.5,6
5,paid_less,0.5,2.5,2
6,paid_less,1.0,1.5,16
7,paid_less,1.5,2.5,18
8,paid_less,1.5,3.5,10
9,paid_less,2.0,2.5,10


How many paid more than the nominal amount? (0)

In [75]:
(df
    >> filter(_.rel_to_nominal == "paid_more_than")
    >> summarize(paid_more_than = _.rel_to_nominal.count()
                )
)

Unnamed: 0,paid_more_than
0,0


How many paid less than the nominal amount?

In [80]:
(df
    >> filter(_.rel_to_nominal == "paid_less")
    >> summarize(n_fares = _.rel_to_nominal.count())
)

Unnamed: 0,n_fares
0,202


How many paid the same as the nominal amount?

In [81]:
(df
    >> filter(_.rel_to_nominal == "paid_the_same")
    >> summarize(n_fares = _.rel_to_nominal.count()
            )
)

Unnamed: 0,n_fares
0,32671


How many had a flat fare but paid less than the nominal amount?

In [82]:
(df
    >> filter(_.rel_to_nominal == "paid_less", _.charge_type == "flat_fare")
    >> summarize(n_fares = _.charge_type.count()
                )
)

Unnamed: 0,n_fares
0,0


How many paid $0

In [83]:
(df
    >> filter(_.charge_amount == 0.0)
    >> summarize(n_fares = _.charge_amount.count()
                )
)

Unnamed: 0,n_fares
0,252


How many paid less than the nominal amount, and paid $0

In [71]:
(df
    >> filter(_.rel_to_nominal == "paid_less", _.charge_amount == 0.0)
    >> summarize(n_fares = _.charge_amount.count()
                )
)

Unnamed: 0,n_fares
0,88


How many paid the same as the nominal amount, and paid $0

In [84]:
(df
    >> filter(_.rel_to_nominal=="paid_the_same", _.nominal_amount==0.0, _.charge_amount==0.0)
    >> summarize(n_fares = _.charge_amount.count()
                )
)

Unnamed: 0,n_fares
0,164


How many paid $0 by charge_type and rel_to_nominal

In [73]:
(df
    >> filter(_.charge_amount==0.0)
    >> count(_.charge_amount, _.charge_type, _.rel_to_nominal)
)

Unnamed: 0,charge_amount,charge_type,rel_to_nominal,n
0,0.0,complete_variable_fare,paid_less,88
1,0.0,complete_variable_fare,paid_the_same,164


How many paid less than the nominal amount, but paid more than $0

In [85]:
(df
    >> filter(_.rel_to_nominal=="paid_less", _.charge_amount!=0.0)
    >> summarize(n_fares = _.rel_to_nominal.count()
                )
)

Unnamed: 0,n_fares
0,114


Fare Tables - Route Type

| Route Type | Regular Fare | Discount Fare |
| ----------- | ----------- | --- |
| Local | 1.50 | 0.75 |
| Primary | 2.50 | 1.25 |
| Regional | 3.50 | 1.75 |
| Commuter | 12.00 | 6.00 |

Fare Tables - Distance

| Distance | Regular Contactless Fare | 
| ----------- | ----------- | 
| Up to 2.7 miles | 1.50 | 
| Between 2.7 and 14 miles | 2.50 |
| 14 – 50 miles | 3.50 | 
| 50 – 77 miles | 7.00 |
| 77 miles or more | 10.00 |
