<h1 align='center'> Citi Bike Interactive Dashboard </h1>

### Current Citi Bike Locations

#### Because the data only includes rides taken through December 2016, this is an approximation of the number of current stations and their locations.

In [13]:
import pandas as pd
# Read new CSV as dataframe
current_stationsdf=pd.read_csv("/Users/hildavargas/Desktop/Springboard_Mini_Projects/Capstone_Project/CB_real_metrics/StationsAsofDec2016.csv")
current_stationsdf.head()

import folium

nyc_map=folium.Map(location=[40.756, -73.982], zoom_start=11.9, min_zoom=10, tiles='cartodbdark_matter')
for row in current_stationsdf.itertuples():
    folium.RegularPolygonMarker([row[3], row[4]], popup=row[2], radius=4).add_to(nyc_map)
    
nyc_map

### Number of Citi Bike Trips by Age Range and Gender

In [7]:
import numpy as np
import pandas as pd

# Null values written as "/N" -- replace with nulls
peopledf=pd.read_csv("/Users/hildavargas/Desktop/Springboard_Mini_Projects/Capstone_Project/CB_real_metrics/PeopleGenderAge.csv", na_values=[r'\N'])

# Replace Gender w/ 'Unknown, Male, Female' vs. '0, 1, 2'
peopledf.Gender=peopledf.Gender.replace(0, 'Unknown')
peopledf.Gender=peopledf.Gender.replace(1, 'Male')
peopledf.Gender=peopledf.Gender.replace(2, 'Female')

# Replace birth years < 1920 with nulls (see notebook "Identifying Outliers in Age Range" for reference)
peopledf.ix[peopledf['Birth Year'] < 1920, 'Birth Year'] = np.nan

# Drop rows with nulls (missing birth years and those < 1920)
peopledf=peopledf.dropna()
peopledf.head()

Unnamed: 0,Gender,Birth Year,Year,Month,Number of Trips
0,Female,1944.0,2013,7,100
1,Male,1921.0,2013,7,25
2,Male,1987.0,2013,7,17294
3,Female,1965.0,2013,7,2643
4,Male,1946.0,2013,7,976


In [8]:
# Create bins for age ranges based off the birth year values
bins=[]
for number in range(1915, 2015, 5):
   bins.append(number)

# Add age ranges column to dataframe
peopledf['Age Ranges']=pd.cut(peopledf['Birth Year'], precision=0, bins=bins)
peopledf.sort_values('Age Ranges')
peopledf.head()

Unnamed: 0,Gender,Birth Year,Year,Month,Number of Trips,Age Ranges
0,Female,1944.0,2013,7,100,"(1940, 1945]"
1,Male,1921.0,2013,7,25,"(1920, 1925]"
2,Male,1987.0,2013,7,17294,"(1985, 1990]"
3,Female,1965.0,2013,7,2643,"(1960, 1965]"
4,Male,1946.0,2013,7,976,"(1945, 1950]"


In [9]:
# Change age ranges to intervals that will be easier to understand in a plot
unique_ranges=peopledf['Age Ranges'].unique()
from collections import defaultdict
x=defaultdict(str)

for line in unique_ranges:
    if int(line[4]) % 2 ==0:
        z=line[1:4]+'1'
    else:
        z=line[1:4]+'6'
    d=line[7:11]
    result=z+'-'+d
    x[line]=result

peopledf['Age Range']=peopledf['Age Ranges'].map(x)
peopledf['Age Range'].unique()

array(['1941-1945', '1921-1925', '1986-1990', '1961-1965', '1946-1950',
       '1951-1955', '1931-1935', '1936-1940', '1981-1985', '1966-1970',
       '1956-1960', '1971-1975', '1926-1930', '1976-1980', '1991-1995',
       '1916-1920', '1996-2000'], dtype=object)

In [10]:
# Update dataframe to reflect new intervals
peopledf['Age Range']=peopledf['Age Ranges'].map(x)
peopledf.head()

Unnamed: 0,Gender,Birth Year,Year,Month,Number of Trips,Age Ranges,Age Range
0,Female,1944.0,2013,7,100,"(1940, 1945]",1941-1945
1,Male,1921.0,2013,7,25,"(1920, 1925]",1921-1925
2,Male,1987.0,2013,7,17294,"(1985, 1990]",1986-1990
3,Female,1965.0,2013,7,2643,"(1960, 1965]",1961-1965
4,Male,1946.0,2013,7,976,"(1945, 1950]",1946-1950


In [24]:
from ipywidgets import interact, Layout
from ipywidgets.widgets import SelectMultiple
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
import textwrap

# Create dictionary for new column in dataframe with the actual month name
months_name={}
months_name[1]='January'
months_name[2]="February"
months_name[3]= "March"
months_name[4]="April"
months_name[5]="May"
months_name[6]="June"
months_name[7]="July"
months_name[8]="August"
months_name[9]="September"
months_name[10]="October"
months_name[11]="November"
months_name[12]="December"

df=peopledf.copy()
df['Month Name']=df["Month"].map(months_name)

# Create lists of unique values for months and years for widget
months=['January', 'Februrary', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
years=sorted(df['Year'].unique().tolist())

# Define function for interactive widget
def make_plot(Months, Years):
    x=list(Months)
    z=list(Years)
    try:
        gooddata=df.loc[(df['Month Name'].isin(x)) & df['Year'].isin(z)].sort_values(by='Age Range')
        sns.set_style("whitegrid")
        plt.rcParams['figure.figsize']=(10,10)
        pal={'Female': "#f9a65a", 'Male': "#9e66ab", 'Unknown':"#af2b30"}
        plot=sns.pointplot('Age Range', 'Number of Trips', estimator=sum, data=gooddata, ci=None, color="#af2b30")
        title=plt.title("Number of Trips per Age Range", fontsize=16, fontweight="bold")
        title.set_position([.5, 1.05])
        plt.ylabel('Number of Trips')
        for item in plot.get_xticklabels():
            item.set_rotation(35)
        for tick in plot.xaxis.get_major_ticks():
                    tick.label.set_fontsize(10)
        for tick in plot.yaxis.get_major_ticks():
                    tick.label.set_fontsize(12)
        plot.yaxis.label.set_size(14)
        plot.xaxis.label.set_size(14)
        plot.xaxis.labelpad = 20
        plot.yaxis.labelpad = 20
        plt.gca().set_ylim(bottom=0)
        plot.get_yaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
        plt.show()
    
    except ValueError:
            text="This dataset spans from July 2013 (Citi Bike's opening to the public) to December 2016. Please make another selection and try again." 
            print(textwrap.fill(text, 100))
            
interact(make_plot, Months=SelectMultiple(options=months, value=['July']), Years=SelectMultiple(options=years, value=[2013]))

<function __main__.make_plot>

In [12]:
from ipywidgets import interact, Layout
from ipywidgets.widgets import SelectMultiple
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
import textwrap

# Create dictionary for new column in dataframe with the actual month name
months_name={}
months_name[1]='January'
months_name[2]="February"
months_name[3]= "March"
months_name[4]="April"
months_name[5]="May"
months_name[6]="June"
months_name[7]="July"
months_name[8]="August"
months_name[9]="September"
months_name[10]="October"
months_name[11]="November"
months_name[12]="December"

df=peopledf.copy()
df['Month Name']=df["Month"].map(months_name)

# Create lists of unique values for months and years for widget
months=['January', 'Februrary', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
years=sorted(df['Year'].unique().tolist())

# Define function for interactive widget
def make_plot2(Months, Years):
    x=list(Months)
    z=list(Years)
    try:
        gooddata=df.loc[(df['Month Name'].isin(x)) & df['Year'].isin(z)].sort_values(by='Age Range')
        sns.set_style("whitegrid")
        plt.rcParams['figure.figsize']=(10,8)
        pal={'Female': "#f9a65a", 'Male': "#9e66ab", 'Unknown':"#af2b30"}
        plot=sns.barplot('Age Range', 'Number of Trips', hue='Gender', estimator=sum, data=gooddata, ci=None, palette=pal)
        title=plt.title("Number of Trips per Age Range, Grouped by Gender", fontsize=16, fontweight="bold")
        title.set_position([.5, 1.05])
        plt.ylabel('Number of Trips')
        plt.legend(loc=1)
        for item in plot.get_xticklabels():
            item.set_rotation(40)
        for tick in plot.xaxis.get_major_ticks():
                    tick.label.set_fontsize(12)
        for tick in plot.yaxis.get_major_ticks():
                    tick.label.set_fontsize(12)
        plot.yaxis.label.set_size(14)
        plot.xaxis.label.set_size(14)
        plot.xaxis.labelpad = 20
        plot.yaxis.labelpad = 20
        plot.get_yaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
        plt.show()
    
    except ValueError:
            text="This dataset spans from July 2013 (Citi Bike's opening to the public) to December 2016. Please make another selection and try again." 
            print(textwrap.fill(text, 100))
            
interact(make_plot2, Months=SelectMultiple(options=months, value=['July']), Years=SelectMultiple(options=years, value=[2013]))b

<function __main__.make_plot2>

### Most Popular Times of the Day for Citi Bike Use

In [15]:
import pandas as pd
import numpy as np

timesdf=pd.read_csv("/Users/hildavargas/Desktop/Springboard_Mini_Projects/Capstone_Project/CB_real_metrics/MostPopularTimes.csv", na_values=[r'\N'], dtype={"Hour of the Day": int, "Gender": int, "Year": int, "Month": int, "Number of Trips":int})

timesdf

# Replace Gender w/ 'Unknown, Male, Female' vs. '0, 1, 2'
timesdf.Gender=timesdf.Gender.replace(0, 'Unknown')
timesdf.Gender=timesdf.Gender.replace(1, 'Male')
timesdf.Gender=timesdf.Gender.replace(2, 'Female')

# Replace birth years < 1920 with nulls (see notebook "Identifying Outliers in Age Range" for reference)
timesdf.ix[timesdf['Birth Year'] < 1920, 'Birth Year'] = np.nan

# Drop rows with nulls (missing birth years and those < 1920)
timesdf=timesdf.dropna()
timesdf.head()

Unnamed: 0,Hour of the Day,Gender,Birth Year,Year,Month,Number of Trips
0,19,Female,1986.0,2013,7,742
1,21,Male,1968.0,2013,7,324
2,11,Female,1986.0,2013,7,280
3,14,Male,1976.0,2013,7,803
4,20,Male,1932.0,2013,7,2


In [16]:
# Create bins for age ranges based off the birth year values
bins=[]
for number in range(1915, 2015, 5):
   bins.append(number)

# Add age ranges column to dataframe
timesdf['Age Ranges']=pd.cut(timesdf['Birth Year'], precision=0, bins=bins)
timesdf.sort_values('Age Ranges')
timesdf.head()

Unnamed: 0,Hour of the Day,Gender,Birth Year,Year,Month,Number of Trips,Age Ranges
0,19,Female,1986.0,2013,7,742,"(1985, 1990]"
1,21,Male,1968.0,2013,7,324,"(1965, 1970]"
2,11,Female,1986.0,2013,7,280,"(1985, 1990]"
3,14,Male,1976.0,2013,7,803,"(1975, 1980]"
4,20,Male,1932.0,2013,7,2,"(1930, 1935]"


In [17]:
# Change age ranges to intervals that will be easier to understand in a plot
unique_ranges=timesdf['Age Ranges'].unique()
from collections import defaultdict
x=defaultdict(str)

for line in unique_ranges:
    if int(line[4]) % 2 ==0:
        z=line[1:4]+'1'
    else:
        z=line[1:4]+'6'
    
    d=line[7:11]
    result=z+'-'+d
    x[line]=result


timesdf['Age Range']=timesdf['Age Ranges'].map(x)
timesdf.head()

Unnamed: 0,Hour of the Day,Gender,Birth Year,Year,Month,Number of Trips,Age Ranges,Age Range
0,19,Female,1986.0,2013,7,742,"(1985, 1990]",1986-1990
1,21,Male,1968.0,2013,7,324,"(1965, 1970]",1966-1970
2,11,Female,1986.0,2013,7,280,"(1985, 1990]",1986-1990
3,14,Male,1976.0,2013,7,803,"(1975, 1980]",1976-1980
4,20,Male,1932.0,2013,7,2,"(1930, 1935]",1931-1935


In [18]:
# Change hours of the day to time intervals that will be easier to understand in a plot

unique_times=timesdf['Hour of the Day'].unique()

dict={}
for i in unique_times:
    if i<11:
        dict[i]=str(i) + " " + "AM" + " "+ "-" + " "+str(i+1)+" "+"AM"
    if i>11:
        dict[i]=str(i-12) + " " + "PM" + " "+ "-" + " "+str(i-11)+" "+"PM"

dict[0]='12 AM - 1 AM'
dict[11]='11 AM - 12 PM'
dict[12]='12 PM - 1 PM'
dict[23]='11 PM - 12 AM'

timesdf['Time Range']=timesdf['Hour of the Day'].map(dict)
timesdf.head()

dict.values()

dict_values(['12 AM - 1 AM', '1 AM - 2 AM', '2 AM - 3 AM', '3 AM - 4 AM', '4 AM - 5 AM', '5 AM - 6 AM', '6 AM - 7 AM', '7 AM - 8 AM', '8 AM - 9 AM', '9 AM - 10 AM', '10 AM - 11 AM', '11 AM - 12 PM', '12 PM - 1 PM', '1 PM - 2 PM', '2 PM - 3 PM', '3 PM - 4 PM', '4 PM - 5 PM', '5 PM - 6 PM', '6 PM - 7 PM', '7 PM - 8 PM', '8 PM - 9 PM', '9 PM - 10 PM', '10 PM - 11 PM', '11 PM - 12 AM'])

In [19]:
df2=timesdf.copy()
df2=df2.drop(['Gender', 'Birth Year', 'Age Ranges', 'Age Range'], 1)
df2=df2.groupby(['Month', 'Year', 'Time Range', 'Hour of the Day']).sum().reset_index()
df2.head()

Unnamed: 0,Month,Year,Time Range,Hour of the Day,Number of Trips
0,1,2014,1 AM - 2 AM,1,1463
1,1,2014,1 PM - 2 PM,13,15625
2,1,2014,10 AM - 11 AM,10,12245
3,1,2014,10 PM - 11 PM,22,6911
4,1,2014,11 AM - 12 PM,11,12384


In [20]:
from ipywidgets import interact, Layout
from ipywidgets.widgets import SelectMultiple
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
import textwrap

# Create dictionary for new column in dataframe with the actual month name
months_name={}
months_name[1]='January'
months_name[2]="February"
months_name[3]= "March"
months_name[4]="April"
months_name[5]="May"
months_name[6]="June"
months_name[7]="July"
months_name[8]="August"
months_name[9]="September"
months_name[10]="October"
months_name[11]="November"
months_name[12]="December"

df=timesdf.copy()
df['Month Name']=df["Month"].map(months_name)

# Create lists of unique values for months and years for widget
months=['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
years=sorted(df['Year'].unique().tolist())

# Define function for interactive widget
def make_plot(Months, Years):
    x=list(Months)
    z=list(Years)
    
    try:
        sns.set_style("whitegrid")
        plt.rcParams['figure.figsize']=(8,8)
        gooddata=df.loc[(df['Month Name'].isin(x)) & df['Year'].isin(z)].sort_values(by='Hour of the Day')
        plot=sns.pointplot('Time Range', 'Number of Trips', data=gooddata, ci=None, estimator=sum, color="#af2b30")
        title=plt.title("Number of Trips by Time of the Day, Ungrouped", fontsize=16, fontweight="bold")
        title.set_position([.5, 1.05])
        plt.xlabel('Time')
        plt.ylabel('Number of Trips')
        for item in plot.get_xticklabels():
                item.set_rotation('vertical')
        for tick in plot.xaxis.get_major_ticks():
                tick.label.set_fontsize(11)
        for tick in plot.yaxis.get_major_ticks():
                tick.label.set_fontsize(12)
        plot.yaxis.label.set_size(14)
        plot.xaxis.label.set_size(14)
        plot.xaxis.labelpad = 20
        plot.yaxis.labelpad = 20
        plot.get_yaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
        plt.show()

    except ValueError:
            text="This dataset spans from July 2013 (Citi Bike's opening to the public) to December 2016. Please make another selection and try again." 
            print(textwrap.fill(text, 100))

interact(make_plot, Months=SelectMultiple(options=months, value=['July']), Years=SelectMultiple(options=years, value=[2013]))


<function __main__.make_plot>

In [21]:
from ipywidgets import interact, Layout
from ipywidgets.widgets import SelectMultiple
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib
%matplotlib inline
import textwrap

# Create dictionary for new column in dataframe with the actual month name
months_name={}
months_name[1]='January'
months_name[2]="February"
months_name[3]= "March"
months_name[4]="April"
months_name[5]="May"
months_name[6]="June"
months_name[7]="July"
months_name[8]="August"
months_name[9]="September"
months_name[10]="October"
months_name[11]="November"
months_name[12]="December"

df=timesdf.copy()
df['Month Name']=df["Month"].map(months_name)

# Create lists of unique values for months and years for widget
months=['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
years=sorted(df['Year'].unique().tolist())

# Define function for interactive widget
def make_plot2(Months, Years):
    x=list(Months)
    z=list(Years)
    
    try:
        sns.set_style("whitegrid")
        plt.rcParams['figure.figsize']=(10,8)
        gooddata=df.loc[(df['Month Name'].isin(x)) & df['Year'].isin(z)].sort_values(by='Hour of the Day')
        pal={'Female': "#f9a65a", 'Male': "#9e66ab", 'Unknown':"#af2b30"}
        plot=sns.barplot('Time Range', 'Number of Trips', hue='Gender', estimator=sum, data=gooddata, ci=None, palette=pal)
        title=plt.title("Number of Trips by Time of the Day, Grouped by Gender", fontsize=16, fontweight="bold")
        title.set_position([.5, 1.05])
        plt.xlabel('Time')
        plt.ylabel('Number of Trips')
        plt.legend(loc=1)
        for item in plot.get_xticklabels():
                item.set_rotation('vertical')
        for tick in plot.xaxis.get_major_ticks():
                tick.label.set_fontsize(11)
        for tick in plot.yaxis.get_major_ticks():
                tick.label.set_fontsize(12)
        plot.yaxis.label.set_size(14)
        plot.xaxis.label.set_size(14)
        plot.xaxis.labelpad = 20
        plot.yaxis.labelpad = 20
        plot.get_yaxis().set_major_formatter(matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
        plt.show()

    except ValueError:
            text="This dataset spans from July 2013 (Citi Bike's opening to the public) to December 2016. Please make another selection and try again." 
            print(textwrap.fill(text, 100))

interact(make_plot2, Months=SelectMultiple(options=months, value=['July']), Years=SelectMultiple(options=years, value=[2013]))


<function __main__.make_plot2>