In [1]:
%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

city_data_to_load = "city_data.csv"                             # File to Load 
ride_data_to_load = "ride_data.csv"                             # File to Load

city_df = pd.read_csv(city_data_to_load)                        # Read the City and Ride Data
ride_df = pd.read_csv(ride_data_to_load)

merge_df = pd.merge(ride_df, city_df, on="city", how="outer")   # Combine the data into a single dataset                                    

merged_city_type = merge_df.set_index("type")                   # Set the 'type' to be our index 
new=merged_city_type.reset_index()                              #new merged database with clean index
new.head(1000)                                                      #preview of the dataframe

Unnamed: 0,type,city,date,fare,ride_id,driver_count
0,Urban,Lake Jonathanshire,1/14/2018 10:14,13.83,5.739410e+12,5
1,Urban,Lake Jonathanshire,4/7/2018 20:51,31.25,4.441250e+12,5
2,Urban,Lake Jonathanshire,3/9/2018 23:45,19.89,2.389500e+12,5
3,Urban,Lake Jonathanshire,4/7/2018 18:09,24.28,7.796810e+12,5
4,Urban,Lake Jonathanshire,1/2/2018 14:14,13.89,4.242550e+11,5
5,Urban,Lake Jonathanshire,4/6/2018 11:30,16.84,6.164450e+12,5
6,Urban,Lake Jonathanshire,3/21/2018 0:18,37.95,8.353660e+12,5
7,Urban,Lake Jonathanshire,1/28/2018 0:07,5.67,9.756570e+12,5
8,Urban,Lake Jonathanshire,1/24/2018 12:24,34.65,3.319120e+12,5
9,Urban,Lake Jonathanshire,3/24/2018 16:27,14.94,1.670910e+12,5


In [None]:
#Creating series for rides per city for the three city types
rides_per_city_urban = new.loc[new["type"]== "Urban", :].groupby('city').count().ride_id 
rides_per_city_rural = new.loc[new["type"]== "Rural", :].groupby('city').count().ride_id  
rides_per_city_suburban = new.loc[new["type"]== "Suburban", :].groupby('city').count().ride_id  

#Creating series for average fare per city for the three city types
average_fare_city_urban = new.loc[new["type"]== "Urban", :].groupby('city').mean().fare  
average_fare_city_rural = new.loc[new["type"]== "Rural", :].groupby('city').mean().fare 
average_fare_city_suburban = new.loc[new["type"]== "Suburban", :].groupby('city').mean().fare  

#Creating series for driver count per city for the three city types
drivers_by_city_urban = new.loc[new["type"]== "Urban", :].groupby('city').sum().driver_count  
drivers_by_city_rural = new.loc[new["type"]== "Rural", :].groupby('city').sum().driver_count  
drivers_by_city_suburban = new.loc[new["type"]== "Suburban", :].groupby('city').sum().driver_count  

#new dataframe for city type urban
results_by_city_urban=pd.DataFrame({"Average Fare per City":average_fare_city_urban, 
                "Total Rides per City":rides_per_city_urban,
                 "Driver Count":drivers_by_city_urban})

#new dataframe for city type rural
results_by_city_rural=pd.DataFrame({"Average Fare per City":average_fare_city_rural, 
                "Total Rides per City":rides_per_city_rural,
                 "Driver Count":drivers_by_city_rural})

#new dataframe for suburban
results_by_city_suburban=pd.DataFrame({"Average Fare per City":average_fare_city_suburban, 
                "Total Rides per City":rides_per_city_suburban,
                 "Driver Count":drivers_by_city_suburban})

## Bubble Plot of Ride Sharing Data

In [None]:
#Creating the combined scatter plot for the three city types
ax=results_by_city_rural.plot(kind='scatter', x='Total Rides per City', y='Average Fare per City',
                           color='Yellow', label='Rural',edgecolor="black",alpha=0.5,grid=True,figsize=(8,6),
                           s=results_by_city_rural['Driver Count']);
results_by_city_suburban.plot(kind='scatter', x='Total Rides per City', y='Average Fare per City',
                           color='LightBlue', label='Suburban',edgecolor="black",ax=ax,alpha=0.5,grid=True,figsize=(8,6),
                           s=results_by_city_suburban['Driver Count']);
results_by_city_urban.plot(kind='scatter', x='Total Rides per City', y='Average Fare per City',
                           color='Red', label='Urban',edgecolor="black",ax=ax, alpha=0.5,grid=True,figsize=(8,6),
                           s=results_by_city_urban['Driver Count'],title="Pyber Ride Sharing Data 2016")

# add text annotation
ax.text(43, 40, "Circle size correlates with driver count per City", horizontalalignment='left', size='medium', 
        color='black', weight='semibold')
ax.text(43, 41, "Note:", horizontalalignment='left', size='medium', 
        color='black', weight='semibold')

ax.legend(markerscale=0.4)
plt.savefig("Pyber Ride Sharing Data 2016.png")
plt.show

## Total Rides by City Type

In [None]:
# Calculate Type Percents
percentages_by_type_urban=new.loc[new["type"]== "Urban", :].sum().fare 
percentages_by_type_rural=new.loc[new["type"]== "Rural", :].sum().fare
percentages_by_type_suburban=new.loc[new["type"]== "Suburban", :].sum().fare
total_fares_by_city_type=new['fare'].sum()

pie_urban=percentages_by_type_urban/total_fares_by_city_type
pie_rural=percentages_by_type_rural/total_fares_by_city_type
pie_suburban=percentages_by_type_suburban/total_fares_by_city_type

# Build Pie Chart
labels = ["Urban", "Rural", "Suburban"]
sizes = [pie_urban, pie_rural, pie_suburban]
colors = ["lightcoral", "orange", "lightskyblue"]
explode = (0.1, 0, 0)

plt.title("% of Total Fares by City Type")
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=280)

# Save Figure
plt.savefig("Percentage of Total Fares by City Type.png")

In [None]:
# Calculate Ride Percents
rides_by_type_urban=new.loc[new["type"]== "Urban", :].count().fare 
rides_by_type_rural=new.loc[new["type"]== "Rural", :].count().fare
rides_by_type_suburban=new.loc[new["type"]== "Suburban", :].count().fare
total_rides_by_city_type=new['fare'].count()

pie_urban2=rides_by_type_urban/total_rides_by_city_type
pie_rural2=rides_by_type_rural/total_rides_by_city_type
pie_suburban2=rides_by_type_suburban/total_rides_by_city_type

# Build Pie Chart
labels = ["Urban", "Rural", "Suburban"]
sizes = [pie_urban2, pie_rural2, pie_suburban2]
colors = ["lightcoral", "orange", "lightskyblue"]
explode = (0.1, 0, 0)

plt.title("% of Total Rides by City Type")
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=260)

# Save Figure
plt.savefig("Percentage of Total Rides by City Type.png")

## Total Drivers by City Type

In [None]:
# Calculate drivers Percents
driver_by_type_urban=new.loc[new["type"]== "Urban", :].sum().driver_count 
driver_by_type_rural=new.loc[new["type"]== "Rural", :].sum().driver_count
driver_by_type_suburban=new.loc[new["type"]== "Suburban", :].sum().driver_count
total_drivers_by_city_type=new['driver_count'].sum()

pie_urban3=driver_by_type_urban/total_drivers_by_city_type
pie_rural3=driver_by_type_rural/total_drivers_by_city_type
pie_suburban3=driver_by_type_suburban/total_drivers_by_city_type

# Build Pie Chart
labels = ["Urban", "Rural", "Suburban"]
sizes = [pie_urban3, pie_rural3, pie_suburban3]
colors = ["lightcoral", "orange", "lightskyblue"]
explode = (0.1, 0, 0)

plt.title("% of Total Drivers by City Type")
plt.pie(sizes, explode=explode, labels=labels, colors=colors,
        autopct="%1.1f%%", shadow=True, startangle=220)

# Save Figure
plt.savefig("Percentage of Total Drivers by City Type.png")

In [2]:
driver_by_type_urban=city_df.loc[city_df["type"]== "Urban", :].driver_count.sum()
percentages_by_type_urban=city_df.loc[city_df["type"]== "Urban", :].sum().fare 
driver_by_type_urban

AttributeError: 'Series' object has no attribute 'fare'

In [None]:
test=new['driver_count']*1.0
test.head(1000)

In [None]:
# Show Figure
plt.show()