Data location: https://kingcounty.gov/en/dept/metro/rider-tools/mobile-and-web-apps#toc-developer-resources

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mpldates
import seaborn as sns
import plotly as py
import dill
import os

from mypy.checker import and_conditional_maps

import GTFS_Functions as my

from numpy.lib.recfunctions import drop_fields

In [None]:
#solve the 'fuzzy text' issue by increasing the DPI with a whole format change (from Matplotlib)
%config InlineBackend.figure_format='retina'

In [None]:
#Import all dataframe pickles in pickle folder
pickle_in = os.listdir("./pickles")

for df in [x[0:-4] for x in pickle_in]:
    exec(f"{df} = pd.read_pickle('./pickles/{df}.pkl')")

pickle_in

In [None]:
stop_freq.info()

### Treatment
#### convert numbers to datetime

### Combine both Calendar dataframes

In [None]:
sns.set_context('paper')
sns.color_palette('blend:#3e6e64,#c3d4d0', as_cmap = True)
ax = (sns.heatmap(calendar_week.set_index("service_id").iloc[:,:-2], 
            yticklabels = True, 
            cbar = False, 
            cmap = sns.color_palette('blend:#cfd8d6,#3e6e64', as_cmap = True), 
            linewidth = 0.5
           )
    )
ax.set(xlabel = "Weekday",
      ylabel = "Service ID",
      title = "Heatmap: Likelyhood of Service"
     )
plt.show()

In [None]:
# Order service ids by how many trips they are assigned to, in descending order.
# service ids with no trips are appended to the end
cal_full_alt = calendar_full.set_index("service_id").copy()

x = pd.Index(set(cal_full_alt.index).difference(set(trips.index)))
x = trips.value_counts("service_id").index.append(x)
cal_full_alt_graph = cal_full_alt.reindex(x, copy = True)

# change timestamps in column axis to strings for visualization
cal_full_alt_graph.columns = [y.strftime("%b-%d") for y in cal_full_alt.columns]



sns.set_context('paper')
sns.color_palette('blend:#3e6e64,#c3d4d0', as_cmap = True)
ax = (sns.heatmap(cal_full_alt_graph.iloc[:,0:31], 
            yticklabels = True, 
            cbar = False, 
            cmap = sns.color_palette('blend:#cfd8d6,#3e6e64', as_cmap = True), 
            linewidth = 0.5
           )
    )
ax.set(xlabel = "Date",
      ylabel = "Service ID",
      title = "Service heatmap"
     )

#ax.xaxis.set_major_formatter(mpldates.DateFormatter("%d-%b")) doesnt work yet, moving on
xtix = ax.get_xticks()
ax.set_xticks(xtix[::7])
ax.set_xticklabels(cal_full_alt_graph.iloc[:,0:31].columns[::7])
ax.tick_params(axis = 'x', rotation = 0)
plt.show()

## Create Stop Frequency Chart tool

In [None]:
routes.loc[routes['route_short_name'] == "8", :]

In [None]:

def flatten_stop_freq(df):
    output = pd.DataFrame()
    days_of_week = ['monday','tuesday','wednesday','thursday','friday','saturday','sunday']
    for day in days_of_week:
        values = df.loc[df[day] == 1, :].copy()
        values.rename(columns = {f"time_f_{day}": 'time_f'}, inplace = True)
        values.drop(columns = days_of_week, inplace = True)
        values['day'] = day
        output = pd.concat([output, values])
    output.drop(columns = [f'time_f_{x}' for x in days_of_week], inplace = True)
    return output

stop_freq_flat = flatten_stop_freq(stop_freq)


In [None]:
# filters for frequency graph
filters = [[8402, 100275, 0, 'monday'], \
           [8402, 100275, 0, 'saturday'], \
           [8402, 100275, 0, 'sunday']]

def frequency_line_graph(df, filters):
    """
    
    
    :param df: stop_freq_flat
    :param filters: list of list in the form of [[stop_id, route_id, direction_id, day]]
    :return: shows a graph of the requested items, and returns the dataframe used to generate that graph
    """
    # filter and concatenate filtered values into a dataframe seaborn can handle effectively
    # columns are the x and y values, plus values differentiating each line on the chart (route_id, stop_id, direction_id, 'day'
    output = pd.DataFrame()
    for filter in filters:
        values = df.loc[((df['stop_id'] == filter[0]) & \
                                (df['route_id'] ==  filter[1]) & \
                                (df['direction_id'] == filter[2]) & \
                                (df['day'] == filter[3])), \
            ['arrival_time','time_f', 'stop_id', 'route_id', 'direction_id', 'day']]
        output = pd.concat([output, values])
        #output = pd.merge(left = output, right = values, how = 'outer', left_index = True, right_index = True)
    
    # convert date into a number (for seaborn to process it easily)
    output["delta_val"] = output["time_f"].map(lambda x: x.seconds/60)
    output["time_val"] = output["arrival_time"].map(lambda x: x.seconds/3600)
    
    # Build the chart
    # need to add a way to specify what dimension to use as hue etc
    # need to generate a title specific to the graph made
    sns.set_style("darkgrid")
    ax =(sns.lineplot(data = output,
                      x = 'time_val',
                      y = 'delta_val',
                      hue = 'day'
                     ))

    ax.set(xlabel = "Hour of the Day",
           ylabel = "minutes",
           title = "time between buses - Route 8 - dir 0, stop 8402"
           )

    #xtix = ax.get_xticks()
    ax.set_yticks([0,5,10,15,30,45,60])
    ax.set_ylim(0,65)
    ax.set_xticks([0,5,7,9,12,16,18,20,21,22,24])
    ax.set_xlim(0,24)
    
    return output

frequency_line_graph(stop_freq_flat, temp)
# plot it


In [None]:
# I have a map of the average service_ids for any given weekday (or specific day)
# For any given route and day, find all the trips.
# next lets get the start time for all of them

In [None]:

#attach routes to trips on route_id for route_names

#combine stop_times to trips on trip_id tring to pull the first arrival_time


In [None]:
def trips_merged(trips, stop_times, routes, calendar_week):
    """
    
    """
    #pull stop times
    stop_times["index_og"] = stop_times.index
    stop_times_index = stop_times.groupby(["trip_id"])[["arrival_time","index_og"]].min("arrival_time")["index_og"]
    trip_times = stop_times.loc[stop_times_index,["trip_id","arrival_time"]]

    #pull route names
    trip_routes = routes.loc[:,["route_id","route_short_name","route_desc"]]
    
    #pull service_weekdays
    # just drop it in
    
    trips_alt = pd.merge(left = trips, right = trip_times, how = "inner", on = "trip_id")
    
    trips_alt = pd.merge(left = trips_alt, right = trip_routes, how = "inner", on = "route_id")
    
    trips_alt = pd.merge(left = trips_alt, right = calendar_week, how = "inner", on = "service_id")

    return trips_alt
    
def routes_on_a_day(trips, weekday, route,direction):
    return trips.loc[((trips["route_short_name"] == route) & 
                      (trips[weekday] == 1) & \
                      (trips["direction_id"] == direction)),
                     :].sort_values("arrival_time")
    
def stop_time_to_datetime(value):
    time = value
    hour = int(time[0:2])
    if hour > 23:
        day = "1970-01-02"
        hour = hour - 24
        time = str(hour) + time[2:-1]
    else:
        day = "1970-01-01"
    return pd.to_datetime((day + " " + time))

def create_time_delta(df, time_col, delta_col):
    """
    outputs the same df, with a new column, delt_col
    
    the timedelta for each point is the time elapsed BEFORE the point.
    delta = Tn - T(n-1)
    
    df: dataframe with the datetime to build from
        dataframe the delta time will be added to
        
    time_col: name of the column with datetime information.
              must be in timestamp format
              
    delta_col: the name of the new column
    """
    # Create a column of the differences
    df[delta_col] = df[time_col].diff()
    
    # create a timedelta object equal to one day 
    one_day = pd.to_timedelta(1, unit='d')
    
    # calculate the time differential between the last and the first value.
    df.loc[df.index[0],delta_col] = \
                                    df[time_col].iloc[0] - \
                                    (df[time_col].iloc[-1] - one_day)
    
    return df

In [None]:
import GTFS_Functions as my

route_choice = "D Line"
weekday_choice = "friday"
direction_choice = 1

# merge data desired
trips_alt = trips_merged(trips, stop_times, routes, calendar_week)
# filter to the desired information
route_day = routes_on_a_day(trips_alt, weekday_choice, route_choice, direction_choice)
# filter to desired columns
route_day = route_day.loc[:,["service_id","arrival_time","route_short_name"]]
# Convert 'arrival time' to datetime
route_day['arrival_time'] = route_day['arrival_time'].map(stop_time_to_datetime)
# create time_delta: difference between arrival times
route_day = my.create_time_delta(route_day, "arrival_time", "time_delta")
route_day.head()

In [None]:
route_day.groupby(['service_id']).nunique()

In [None]:
# Convert timestamp and deltatime objects to numbers.  Graphing cannot understand datetime objects.
route_day["delta_val"] = route_day["time_delta"].map(lambda x: x.seconds/60)
route_day["time_val"] = route_day["arrival_time"].map(lambda x: x.hour + x.minute/60)

sns.set_style("darkgrid")
ax =(sns.lineplot(data = route_day,
                  x = 'time_val',
                  y = 'delta_val'
                 )
    )

ax.set(xlabel = "Hour of the Day",
      ylabel = "minutes",
      title = "time between buses - D Line - Weekday - dir 1"
     )

#xtix = ax.get_xticks()
ax.set_yticks([0,5,10,15,30,45,60])
ax.set_ylim(0,65)
ax.set_xticks([0,5,7,9,12,16,18,20,21,22,24])
ax.set_xlim(0,24)
#ax.set_xticklabels(cal_full_alt_graph.iloc[:,0:31].columns[::7])
#ax.tick_params(axis = 'x', rotation = 0)

plt.show()