# <u>DATA VISUALIZATION ON INTERACTIVE MAPS USING FOLIUM</u>

## <u>PROBLEM STATEMENT</u>  :-

### <u>UBER DATASET VISUALIZATION</u>

 **The challenge is aimed at making use of python libraries and Data analysis techniques
 to visualize the Uber dataset. The dataset made available is on the locations of the each trips and Date & Time of each trip .**

### <u>PROBLEM DESCRIPTION</u> :
**Develop interesting visualization to
interactively explore this dataset.**

**DATASET LINK** [https://drive.google.com/drive/folders/1mx7gbeVsni8QvPqnNXvFs9CpUn5yu0Kt]


**Stage 1 – To explore the whole dataset, we look for innovative ideas and
applications which allow a user to explore the whole dataset.**

**Stage 2 – The application should have the capability to flag the relevant
parts of the dataset and show those in the form of an interactive viz.**

# **STAGE 1**

In [1]:
%pylab inline
import pandas as pd
import numpy as np

Populating the interactive namespace from numpy and matplotlib


## Loading csv file

In [2]:
data = pd.read_csv(r"C:\Users\sbwad\Desktop\data science\internship project\Uber-dataset\Uber-dataset\apr\aa.csv" , engine = 'python')

In [3]:
data.head()

Unnamed: 0,Date/Time,Lat,Lon,Base
0,4/1/2014 0:11:00,40.769,-73.9549,B02512
1,4/1/2014 0:17:00,40.7267,-74.0345,B02512
2,4/1/2014 0:21:00,40.7316,-73.9873,B02512
3,4/1/2014 0:28:00,40.7588,-73.9776,B02512
4,4/1/2014 0:33:00,40.7594,-73.9722,B02512


## Converting Date/Time format into simpler format
#### Reason for converting Date/Time format is that *Pandas* can easily understand and operate on Date/Time column


In [46]:
data['Date/Time'] = pd.to_datetime(data['Date/Time'])

In [47]:
data.head()

Unnamed: 0,Date/Time,Lat,Lon,Base
0,2014-04-01 00:11:00,40.769,-73.9549,B02512
1,2014-04-01 00:17:00,40.7267,-74.0345,B02512
2,2014-04-01 00:21:00,40.7316,-73.9873,B02512
3,2014-04-01 00:28:00,40.7588,-73.9776,B02512
4,2014-04-01 00:33:00,40.7594,-73.9722,B02512


In [48]:
data.loc[0 , 'Date/Time'].day_name()    ##function used to check weekday of a given date only be used when dates are in pandas Date/Time format

'Tuesday'

## Adding more informative columns to the dataset

In [9]:
data['DoW'] = data['Date/Time'].dt.day_name()   ##'dt' class allows us to get weekday of large series or dataset

In [10]:
data.head()

Unnamed: 0,Date/Time,Lat,Lon,Base,DoW
0,2014-04-01 00:11:00,40.769,-73.9549,B02512,Tuesday
1,2014-04-01 00:17:00,40.7267,-74.0345,B02512,Tuesday
2,2014-04-01 00:21:00,40.7316,-73.9873,B02512,Tuesday
3,2014-04-01 00:28:00,40.7588,-73.9776,B02512,Tuesday
4,2014-04-01 00:33:00,40.7594,-73.9722,B02512,Tuesday


In [11]:
data.set_index('Date/Time' , inplace=True)    ## uses the Date/Time column as index , it makes filtering easier

In [12]:
data['2014-04-01':'2014-04-07']              ## 2nd date is inclusive

Unnamed: 0_level_0,Lat,Lon,Base,DoW
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-04-01 00:11:00,40.7690,-73.9549,B02512,Tuesday
2014-04-01 00:17:00,40.7267,-74.0345,B02512,Tuesday
2014-04-01 00:21:00,40.7316,-73.9873,B02512,Tuesday
2014-04-01 00:28:00,40.7588,-73.9776,B02512,Tuesday
2014-04-01 00:33:00,40.7594,-73.9722,B02512,Tuesday
...,...,...,...,...
2014-04-07 22:55:00,40.7744,-73.8727,B02764,Monday
2014-04-07 23:06:00,40.7449,-73.9890,B02764,Monday
2014-04-07 23:17:00,40.7421,-74.0039,B02764,Monday
2014-04-07 23:25:00,40.7742,-73.9612,B02764,Monday


# **STAGE 2**
## Using Folium for interactive *Data Visualization*

In [13]:
import folium
from folium import plugins

In [14]:
m = folium.Map(location=[40.7690, -73.9549], zoom_start=6)  ## Basic syntax for using folium to load a base map

In [15]:
m

In [16]:
df=data[0:20]            ## slicing dataset to take a small part for faster visualization
df.head()

Unnamed: 0_level_0,Lat,Lon,Base,DoW
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-04-01 00:11:00,40.769,-73.9549,B02512,Tuesday
2014-04-01 00:17:00,40.7267,-74.0345,B02512,Tuesday
2014-04-01 00:21:00,40.7316,-73.9873,B02512,Tuesday
2014-04-01 00:28:00,40.7588,-73.9776,B02512,Tuesday
2014-04-01 00:33:00,40.7594,-73.9722,B02512,Tuesday


In [17]:
for (index, row) in df.iterrows():                                                        ## Looping through dataset to plot all the given locations
    folium.Marker(location=[row.loc['Lat'], row.loc['Lon']],popup=row.loc['DoW'],
                  icon=folium.Icon(color='blue', icon='cab', prefix='fa')).add_to(m)      ## Using 'Font Awesome' to customize icons

In [18]:
m

# Filtering data 
### We can filter different weekdays of the month for better comparison

In [19]:
filter_sunday = data['DoW'] == 'Sunday'           ## extracting all the data whoes weekday is sunday 
sun=data.loc[filter_sunday]                       ## and saving these data into a different dataframe   

In [20]:
sun[:'2014-04-07']                               ## filtering to get only first week of data for sunday                        
sun = sun[:20]                                   ## again slicing it to get only 20 entries for faster visualization

In [21]:
m_sun = folium.Map(location=[40.7690, -73.9549], zoom_start=10)

In [22]:
for (index, row) in sun.iterrows():                                                 
    folium.Marker(location=[row.loc['Lat'], row.loc['Lon']],popup=row.loc['DoW'],
                   icon=folium.Icon(color='red', icon='cab', prefix='fa')).add_to(m_sun)
    
    
    


## Adding measure_control funtion

In [23]:
measure_control = plugins.MeasureControl(position='topleft', primary_length_unit='meters')
m_sun.add_child(measure_control)

In [24]:
m_sun

In [25]:
filter_monday = data['DoW'] == 'Monday'
mon=data.loc[filter_monday]

In [26]:
mon[:'2014-04-07']
mon = mon[:20]
mon.head()

Unnamed: 0_level_0,Lat,Lon,Base,DoW
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-04-07 00:31:00,40.7205,-73.9939,B02512,Monday
2014-04-07 00:37:00,40.7407,-74.0077,B02512,Monday
2014-04-07 00:50:00,40.7591,-73.9892,B02512,Monday
2014-04-07 00:58:00,40.7419,-74.0034,B02512,Monday
2014-04-07 01:27:00,40.7419,-74.0034,B02512,Monday


In [27]:
m_mon = folium.Map(location=[40.7690, -73.9549], zoom_start=10)
for (index, row) in mon.iterrows():
    folium.Marker(location=[row.loc['Lat'], row.loc['Lon']],popup=row.loc['DoW'],
                   icon=folium.Icon(color='green', icon='cab', prefix='fa')).add_to(m_mon)
    
    
    
measure_control = plugins.MeasureControl(position='topleft', primary_length_unit='meters')
m_mon.add_child(measure_control)

In [28]:
m_mon

## Tuesday data

In [29]:
filter_tuesday = data['DoW'] == 'Tuesday'
tue=data.loc[filter_tuesday]

tue[:'2014-04-07']
tue = tue[:20]
tue.head()

Unnamed: 0_level_0,Lat,Lon,Base,DoW
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-04-01 00:11:00,40.769,-73.9549,B02512,Tuesday
2014-04-01 00:17:00,40.7267,-74.0345,B02512,Tuesday
2014-04-01 00:21:00,40.7316,-73.9873,B02512,Tuesday
2014-04-01 00:28:00,40.7588,-73.9776,B02512,Tuesday
2014-04-01 00:33:00,40.7594,-73.9722,B02512,Tuesday


In [30]:
m_tue = folium.Map(location=[40.7690, -73.9549], zoom_start=10)
for (index, row) in tue.iterrows():
    folium.Marker(location=[row.loc['Lat'], row.loc['Lon']],popup=row.loc['DoW'],
                   icon=folium.Icon(color='green', icon='cab', prefix='fa')).add_to(m_tue)
    
    
    
measure_control = plugins.MeasureControl(position='topleft', primary_length_unit='meters')
m_tue.add_child(measure_control)

## Wednesday data

In [31]:
filter_wed = data['DoW'] == 'Wednesday'
wed=data.loc[filter_wed]

wed[:'2014-04-07']
wed = wed[:20]
wed.head()

Unnamed: 0_level_0,Lat,Lon,Base,DoW
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-04-02 00:00:00,40.7458,-73.9843,B02512,Wednesday
2014-04-02 00:05:00,40.7285,-74.0467,B02512,Wednesday
2014-04-02 00:07:00,40.7732,-73.9546,B02512,Wednesday
2014-04-02 00:11:00,40.655,-73.9786,B02512,Wednesday
2014-04-02 00:21:00,40.7405,-74.004,B02512,Wednesday


In [32]:
m_wed = folium.Map(location=[40.7690, -73.9549], zoom_start=10)
for (index, row) in wed.iterrows():
    folium.Marker(location=[row.loc['Lat'], row.loc['Lon']],popup=row.loc['DoW'],
                   icon=folium.Icon(color='blue', icon='cab', prefix='fa')).add_to(m_wed)
    
    
measure_control = plugins.MeasureControl(position='topleft', primary_length_unit='meters')
m_wed.add_child(measure_control)

## Thursday data

In [33]:
filter_thu = data['DoW'] == 'Thursday'
thu=data.loc[filter_thu]

thu[:'2014-04-07']
thu = thu[:20]
thu.head()

Unnamed: 0_level_0,Lat,Lon,Base,DoW
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-04-03 00:00:00,40.7422,-74.0062,B02512,Thursday
2014-04-03 00:02:00,40.7239,-74.0034,B02512,Thursday
2014-04-03 00:02:00,40.7239,-74.0034,B02512,Thursday
2014-04-03 00:02:00,40.7104,-74.0118,B02512,Thursday
2014-04-03 00:10:00,40.7221,-73.9837,B02512,Thursday


In [34]:
m_thu = folium.Map(location=[40.7690, -73.9549], zoom_start=10)
for (index, row) in thu.iterrows():
    folium.Marker(location=[row.loc['Lat'], row.loc['Lon']],popup=row.loc['DoW'],
                   icon=folium.Icon(color='blue', icon='cab', prefix='fa')).add_to(m_thu)
    

measure_control = plugins.MeasureControl(position='topleft', primary_length_unit='meters')
m_thu.add_child(measure_control)

## Friday data

In [35]:
filter_fri = data['DoW'] == 'Friday'
fri=data.loc[filter_fri]

fri[:'2014-04-07']
fri = fri[:20]
fri.head()

Unnamed: 0_level_0,Lat,Lon,Base,DoW
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-04-04 00:03:00,40.7528,-73.9858,B02512,Friday
2014-04-04 00:05:00,40.7263,-74.0018,B02512,Friday
2014-04-04 00:12:00,40.7263,-73.9917,B02512,Friday
2014-04-04 00:12:00,40.7813,-73.9516,B02512,Friday
2014-04-04 00:14:00,40.717,-73.9987,B02512,Friday


In [36]:
m_fri = folium.Map(location=[40.7690, -73.9549], zoom_start=10)
for (index, row) in fri.iterrows():
    folium.Marker(location=[row.loc['Lat'], row.loc['Lon']],popup=row.loc['DoW'],
                   icon=folium.Icon(color='blue', icon='cab', prefix='fa')).add_to(m_fri)
    

measure_control = plugins.MeasureControl(position='topleft', primary_length_unit='meters')
m_fri.add_child(measure_control)

## Saturday data

In [37]:
filter_sat = data['DoW'] == 'Saturday'
sat=data.loc[filter_sat]

sat[:'2014-04-07']
sat = sat[:20]
sat.head()

Unnamed: 0_level_0,Lat,Lon,Base,DoW
Date/Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-04-05 00:00:00,40.769,-73.9825,B02512,Saturday
2014-04-05 00:00:00,40.7594,-73.9641,B02512,Saturday
2014-04-05 00:01:00,40.7113,-74.0173,B02512,Saturday
2014-04-05 00:02:00,40.806,-73.9652,B02512,Saturday
2014-04-05 00:04:00,40.7211,-74.0042,B02512,Saturday


In [38]:
m_sat = folium.Map(location=[40.7690, -73.9549], zoom_start=10)
for (index, row) in sat.iterrows():
    folium.Marker(location=[row.loc['Lat'], row.loc['Lon']],popup=row.loc['DoW'],
                   icon=folium.Icon(color='red', icon='cab', prefix='fa')).add_to(m_sat)
    
    

    
measure_control = plugins.MeasureControl(position='topleft', primary_length_unit='meters')
m_sat.add_child(measure_control)

## Adding all the weekday's data into a single map using ipywidgets

In [39]:
import ipywidgets

In [40]:
select_widget=ipywidgets.Select(
    options=['Monday','Tuesday', 'Wednesday','Thursday','Friday','Saturday','Sunday'])

# widget function
def select(weekday):
    if weekday == 'Monday':
        display(m_mon)
    if weekday == 'Tuesday':
        display(m_tue)
    if weekday == 'Wednesday':
        display(m_wed)
    if weekday == 'Thursday':
        display(m_thu)
    if weekday == 'Friday':
        display(m_fri)
    if weekday == 'Saturday':
        display(m_sat)
    if weekday == 'Sunday':
        display(m_sun)
        
# interaction between widgets and function    
ipywidgets.interact(select, weekday=select_widget)

interactive(children=(Select(description='weekday', options=('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Fr…

<function __main__.select(weekday)>