In [1]:
import numpy as np
import pandas as pd
import folium
import webbrowser
import os

In [2]:
# Load the data
df = pd.read_csv("pickup_times.csv", index_col = "location_id")
df_loc = pd.read_csv("locations.csv", index_col = "location_id")

# Function used to color the markers on the map
def pickup_color(pickup_time, green, yellow, orange):
    if pickup_time < green:
        return 'green'
    if pickup_time < yellow:
        return 'yellow'
    if pickup_time < orange:
        return 'orange'
    return 'red'

In [3]:
# Keep only the necessary data
df[['date', 'hour']] = df['iso_8601_timestamp'].str.split("T", expand=True)

# Keep only the hours (discard mins and seconds)
df['hour'] = df['hour'].str[0:2].astype(int)
df.drop(['iso_8601_timestamp'], axis=1, inplace=True)
#df.head()

In [4]:
# Input the date, and starting, ending time
# No error handling at the moment
inp_date = input("Enter a date yyyy-mm-dd :")

low_time = int(input("Enter the low time: "))
high_time = int(input("Enter the high time: "))

# take only the necessary rows based on the date and time frame 
# and then sort (faster to sort/process the remaining dataframe)
df_ans = df[(df['date'] == inp_date) &  (df['hour'] >= low_time) &  (df['hour'] < high_time)].sort_index()

df_ans = df_ans.reset_index()

Enter a date yyyy-mm-dd :2019-01-11
Enter the low time: 16
Enter the high time: 18


In [5]:
# The output is a list of values [location_id, sum of pickups from this location, 
# median of pickup times for this location]

# The output is a dictionary where it's key is the location_id 
# (fastest way to use it in the next step when I create the map)and the value is 
# a list [median, sum_of_pickups]

medians_dict = {}
total_pickups_dict = {}
#median = []

if (len(df_ans)): # if there are results
    prev = 0        # previous location_id
    prev_id = -1    # previous index

    for  index, row in df_ans.iterrows():
        loc_id = row['location_id']
        if (prev != loc_id): # found new location_id
            if (prev != 0): # append
                med = df_ans.loc[prev_id:index-1, "pickup_time"].median()
                # med = df_ans.iloc[prev_id:index, 1].median() # either use .iloc with column = 1
                #median.append([prev,index-prev_id, med])
                medians_dict[prev]=med
                total_pickups_dict[prev]= index-prev_id
            prev = loc_id
            prev_id = index

    #add the last one
    med = df_ans.loc[prev_id:, "pickup_time"].median()
    #median.append([prev,len(df_ans)-prev_id, med])
    medians_dict[prev]= med
    total_pickups_dict[prev]= len(df_ans)-prev_id

In [6]:
if (len(df_ans)): # if there are results
    fg_name = "Date: " + inp_date + ", timeframe: " + str(low_time) + " - " + str(high_time)

    lat = df_loc['latitude'].mean()
    lon = df_loc['longitude'].mean()
    # I could also center the map based on the lowest median (problem if there are more than one lowest values: use average?)
    map=folium.Map(location=[lat, lon],tiles="OpenStreetMap",zoom_start=13)

    all_medians = list(medians_dict.values())
    all_total_pickups = list(total_pickups_dict.values())

    min_median = min(all_medians)
    max_median = max(all_medians)

    min_pickups = min(all_total_pickups)
    max_pickups = max(all_total_pickups)

    fg=folium.FeatureGroup(name=fg_name)
    fg_stats = folium.FeatureGroup(name="Min/Max values")

    for index, row in df_loc.iterrows():
        lon = row['longitude']
        lat = row['latitude']

        # check if the location_id(=index) exist in the dictionary. 
        # If yes then we can print the info on the map
        med = medians_dict.get(index)
        total = total_pickups_dict.get(index)

        if (med != None): # found it, so add the info in order to print it on the map
            msg = "ID:" + str(index) + ", median: " + str(med) + " minutes (" + str(total) + " pickups)"

            fg.add_child(folium.CircleMarker(location=[lat,lon], radius=6, tooltip =msg,
                                             fill_color=pickup_color(med, 20, 25, 30), weight=1, fill=True,  color = 'black', fill_opacity=0.7))
            if med == min_median:
                fg_stats.add_child(folium.Marker(location=[lat,lon],popup=(folium.Popup("Lowest median: " + str(med))),
                                       icon=folium.Icon(color=pickup_color(med, 20, 25, 30),icon_color='black')))
            if med == max_median:
                fg_stats.add_child(folium.Marker(location=[lat,lon],popup=(folium.Popup("Higest median: " + str(med))),
                                       icon=folium.Icon(color=pickup_color(med, 20, 25, 30),icon_color='black')))
            if total == min_pickups:
                fg_stats.add_child(folium.Marker(location=[lat,lon],popup=(folium.Popup("Min pickups: " + str(min_pickups))),
                                       icon=folium.Icon(color='red',icon='bar-chart', prefix='fa')))   
            if total == max_pickups:
                fg_stats.add_child(folium.Marker(location=[lat,lon],popup=(folium.Popup("Max pickups: " + str(max_pickups))),
                                       icon=folium.Icon(color='blue',icon='bar-chart', prefix='fa')))   

    map.add_child(fg)
    map.add_child(fg_stats)

    map.add_child(folium.LayerControl())

    filename = "medians_" + inp_date + "_" + str(low_time) + "-" + str(high_time) + ".html"
    map_folder =  "map_data" #just to save all of them in a different folder

    rel_path = os.path.join(map_folder, filename)
    map.save(rel_path)
    
    # uncomment the following line if you need to open the map on the browser
    #webbrowser.open(rel_path, new=2)
    
    print("Map with results was created!")
    
    
     
    
else:
    print("No results for that day/hour(s) combination! Try again")


#print(abs_path)

Map with results was created!


In [7]:
# shows the map in the notebook
map