## Visualization

This Notebook was created with the intention to visualize individual possible trip table. 

In [11]:
# Import modules
import pandas as pd
import folium
from shapely.geometry import LineString
from geopandas import GeoDataFrame
import os
import random

### Create a dataframe containing individual possible trips' ID and all the stops coordinates within a trip

In order to visualize individual possible trip(s), the first step is to get the coordinates information of all the stops within a trip. To do so, we merged "trip", "stop_time", and "stops" tables and then sliced out the needed columns.

In [2]:
# Create paths to the files that are going to be merged
trip = os.path.join("data/3-6", "trips.txt")
stop_time = os.path.join("data/3-6", "stop_times.txt")
stop = os.path.join("data/3-6", "stops.txt")

# Read the files into dataframe
df1 = pd.read_csv(trip, low_memory = False)
df2 = pd.read_csv(stop_time, low_memory = False)
df3 = pd.read_csv(stop, low_memory = False)

# Merge the first two dataframes
merge_1 = pd.merge(df1, df2, on ="trip_id", how = "left" )

# Merge the third dataframe
merge_2 = pd.merge(merge_1, df3, on = "stop_id", how = "left")

# Slice out the desired columns
slice1 = merge_2[["trip_id", "stop_lat", "stop_lon"]]
# Check the result
slice1.head(1)

Unnamed: 0,trip_id,stop_lat,stop_lon
0,8847474-DEC15-MVS-BUS-Weekday-01,44.932478,-93.343189


### Create lists to store needed information

Next, we read the individual possible trip table. There are several needed information we need to derive from this table, which are:
> 1. Individual Possible Trip ID
2. Individual Original Place coordinates
3. Individual Geton Stop coordinates and names
4. Individual Getoff Stop coordinates and names
5. Individual Boarding Place coordinates

In [3]:
# Read the individual possible trip table
df6 = pd.read_csv("data/individual_10.csv", low_memory = False)
df6.head(1)

Unnamed: 0.1,Unnamed: 0,indivi_id,route_id,route_short_name,stop_lat_x,stop_lon_x,stop_lat_y,stop_lon_y,stop_id_x,stop_id_y,stop_name_x,stop_name_y,DATE,BOARDING_LON,BOARDING_LAT,ORIGIN_LAT_100M,ORIGIN_LON_100M,trip_id,arrival_time
0,0,1,6-81,6,44.979544,-93.272558,44.948272,-93.298208,17947,1322,Hennepin Ave & 5th St S,Hennepin Ave & Lake St W,2016-04-11 00:00:00,-93.273144,44.980003,44.948,-93.299,"9181751-MAR16-MVS-BUS-Weekday-01,9181753-MAR16...","12:29:00,12:49:00,12:19:00,12:39:00,12:59:00"


We created several list to store these information

In [4]:
# Create a list to store trip id of individual possible trip
trip_lst = [[df6.at[index, "trip_id"]] for index, row in df6.iterrows()]
# Create a list to store coordinates of geton stops
geton_stop_lst = [(df6.at[index,"stop_lat_y"], df6.at[index,"stop_lon_y"])for index, row in df6.iterrows()]
# Create a list to store coordinates of getoff stops
getoff_stop_lst = [(df6.at[index,"stop_lat_x"], df6.at[index,"stop_lon_x"])for index, row in df6.iterrows()]
# Create a list to store coordinates of individual original place
origin_lst = [(df6.at[index,"ORIGIN_LAT_100M"], df6.at[index,"ORIGIN_LON_100M"])for index, row in df6.iterrows()]
# Create a list to store coordinates of individual boarding place
board_lst = [(df6.at[index,"BOARDING_LAT"], df6.at[index,"BOARDING_LON"])for index, row in df6.iterrows()]
# Create a list to store names of geton stops
geton_stop_name = [df6.at[index, "stop_name_y"] for index, row in df6.iterrows()]
# Create a list to store names of geoff stops
getoff_stop_name = [df6.at[index, "stop_name_x"] for index, row in df6.iterrows()]

### Slice out stops' latitute and longitude based on individual trip id

Since an individual might take multiple trips, we decided to create GeoJSON object of each possible trip, enabling user to toggle between different possible trips. To do so, we first extract all the stops' coordinates within a possible trip, then using "LineString" function to create a line geometry in Json format, and finally created a dataframe to store this line object. 

- We define a function to automatically extract individual possible trips' ID out from the original table and then based on trip ID to find all the stops' coordinates within the trip. 

In [5]:
# Return a dataframe containing individual possible trips and the corresponding stops' coordinates of each trip
def individual_trips(i):
    global trip_lst, slice1
    
    # Split out each trip_id using delimiter ","
    possbile_trips = trip_lst[i][0].split(",")
    # Slice out the rows based on trip_id
    lst = [slice1.loc[slice1["trip_id"] == trip] for trip in possbile_trips] 
    # Concatenat series 
    df = pd.concat(lst).reset_index(drop = True)
    
    return df 

Next, we defined a function to automatically create a dataframe containing the line geometry representing all the stops in a trip.

In [6]:
# Create a line dataframe containing the line geometry representing trip
def make_lines(i, t):
    global trip_lst
    
    # Put the set of stops' coordinates of a trip into a list
    lst = list(zip(list(individual_trips(i).loc[individual_trips(i).trip_id == trip_lst[i][0].split(",")[t]].stop_lon), 
                   list(individual_trips(i).loc[individual_trips(i).trip_id == trip_lst[i][0].split(",")[t]].stop_lat)))
    
    # Create a linestring object
    line = LineString(lst)
    
    # Create a dataframe containing line geometry 
    data = {"id":i+1, "geometry":[line]}
    df_line = pd.DataFrame(data, columns = ["id", "geometry"])
    df_line["id"] = df_line["id"].astype("int") # Convert data type of "id" to integer
    
    return df_line

Then, we defined a function to automatically count how many possible trips the individual has. Based on the previous defined function, a set of dataframes are created and then are appended to a list. Each element in this list is a dataframe representing a possible trip.

In [7]:
# Create a list to store a set of line dataframes
def line_df(i):
    df_lst = []
    for t in range(len(trip_lst[i][0].split(","))):
        df = make_lines(i, t)
        df_lst.append(df)
    return df_lst

Since we also want to visualize individual trip route, we defined a function to return a list containing the coordinates of individual original place, boarding place, and stops between geton and getoff stops.

In [8]:
# Create a list to store coordinates of individual original place, boarding place, and stops between geton and getoff stops
def make_individual_lines(i):
    global trip_lst, geton_stop_lst, getoff_stop_lst, origin_lst, board_lst
    
    # Put the set of stops' coordinates of a trip into a list
    lst = list(zip(list(individual_trips(i).loc[individual_trips(i).trip_id == trip_lst[i][0].split(",")[0]].stop_lat), 
                   list(individual_trips(i).loc[individual_trips(i).trip_id == trip_lst[i][0].split(",")[0]].stop_lon)))
    
    # Execlude the stops prior geton stop
    for e in range(len(lst)):
        if lst[e][0] == geton_stop_lst[i][0] or lst[e][1] == geton_stop_lst[i][1]:
            new = lst[e:]
    
    # Execlude the stops after getoff stop
    for f in range(len(new)):
        if new[f][0] == getoff_stop_lst[i][0] or new[f][1] == getoff_stop_lst[i][1]:
            new1 = new[:f+1]
    
    # Insert the origin and append the board       
    new1.insert(0, origin_lst[i])
    new1.append(board_lst[i])
    
    return new1

Finally, we defined a function to create to visualize information we are interested in. This function take the list containing dataframe representing possible trip and converted it to a geodataframe, enabling folium "GeoJson" function to read it directly. It also visualize individual trip route by using folium "Polyline" function. The individual original place, geton stop, getoff stop, and boarding place are also visualized as the circle markers on the map. This function also allows user to input the specific individual they want to look at. 

In [9]:
# Visualizing the individual possible trips and individual trip route
def visualization():
    # Users can determine which individual they want to see
    input1 = input("Which individual would you like to see?(1-10) ")
    # Make sure user input the correct value
    try:
        i = int(input1)
    except:
        print("Please enter valid number!!")
    
    # Define global variables
    global geton_stop_name, getoff_stop_name, geton_stop_lst, getoff_stop_lst, origin_lst, board_lst
    
    # Create a basemap
    m = folium.Map([44.943722, -93.094276], 
                    zoom_start = 12, tiles='Cartodb dark_matter')
    crs = {"init": "epsg:4326"} # Define coordinate system to WGS84
    
    # Add individual original place, geton stop, getoff stop, and boarding place to the basemap
    folium.CircleMarker(geton_stop_lst[i-1], radius = 5, fill = True, fill_color = "#1dff3b", 
                        fill_opacity = 0.75, weight = 0.3, 
                        tooltip = "Geton Stop, Name: {0}".format(geton_stop_name[i-1])).add_to(m)
    folium.CircleMarker(getoff_stop_lst[i-1], radius = 5, fill = True, fill_color = "#ff251d", 
                        fill_opacity = 0.75, weight = 0.3, 
                        tooltip = "Getoff Stop, Name: {0}".format(getoff_stop_name[i-1])).add_to(m)
    folium.CircleMarker(origin_lst[i-1], radius = 5, fill = True, fill_color = "#ff961d", 
                        fill_opacity = 0.75, weight = 0.3, 
                        tooltip = "Original Place").add_to(m)
    folium.CircleMarker(board_lst[i-1], radius = 5, fill = True, fill_color = "#301dff", 
                        fill_opacity = 0.75, weight = 0.3, 
                        tooltip = "Boarding Place").add_to(m)
    
    # Create a line representing individual trip route and add to the map
    folium.PolyLine(make_individual_lines(i-1),color = "#FF0000", weight = 2,
                    tooltip = "Individual {0} Trip Route".format(i)).add_to(m)
    
    # Create GeoJson object of each individual possible trip and add to the map
    for df in range(len(line_df(i-1))):
        line_gdf = GeoDataFrame(line_df(i-1)[df], crs = crs)
        colors = ["#F50057","#FFD700","#008000","#0000FF","#FF1493","#800000", "#2F4F4F","#4B0082", "#00FFFF", "#FFA500"]
        color = random.choice(colors)
        folium.GeoJson(line_gdf,name = "Possible Trip: {0}".format(df + 1),
                       style_function = lambda feature,color = color: {'color': color},
                       highlight_function = lambda x, color = color: {'weight':5,'color':color,'fillOpacity':1},
                       tooltip=folium.features.GeoJsonTooltip(fields=['id'], aliases=["Possible Trip:"])).add_to(m)
    
    # Add legend to the map through the help of HTML and add to the map                     
    legend_html = '''
                <div style="position: fixed; color: white;
                            bottom: 50px; left: 50px; width: 150px; height: 120px; 
                            border:2px solid white; z-index:9999; font-size:14px;font: "Times New Roman";
                            "><br>
                              &nbsp; Original Place &nbsp; <i class="fa fa-circle" style="color:#ff961d"></i><br>
                              &nbsp; Geton Stop &nbsp; <i class="fa fa-circle" style="color:#1dff3b"></i><br>
                              &nbsp; Getoff Stop &nbsp; <i class="fa fa-circle" style="color:#ff251d"></i><br>
                              &nbsp; Boarding Place &nbsp; <i class="fa fa-circle" style="color:#301dff"></i>
                </div>
                ''' 
    m.get_root().html.add_child(folium.Element(legend_html))
    
    # Enable layer control
    folium.LayerControl().add_to(m)
    return m

In [10]:
visualization()

Which individual would you like to see?(1-10) 2


RuntimeError: b'no arguments in initialization list'

This map shows an individual possible trip and individual trip route. The locations of the individual original place, geton stop, getoff stop, and boarding place. 