Observable Trends in the Data: 
1) While 81% of all ride share drivers are in urban areas, urban drivers only receive 62% of the total fares.  

2) Since suburban and rural areas are less dense, drive times are likely higher, resulting in higher driver costs/revenue.  This is also reflected in the scatter plot as average fare increases as the geography becomes Suburban/Rural.  

3) Very little ride share activity in rural areas. 

In [None]:
%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# File to Load (Remember to change these)
city_data_to_load = "Resources/city_data.csv"
ride_data_to_load = "Resources/ride_data.csv"

# Read the City and Ride Data
city_data = pd.read_csv(city_data_to_load)
ride_data = pd.read_csv(ride_data_to_load)

# Combine the data into a single dataset
ride_data_complete = pd.merge(ride_data, city_data, on=["city", "city"])


In [None]:
ride_data.head()

In [None]:
city_data.head()

In [None]:
ride_data_complete.head()

In [None]:
ride_group = ride_data_complete.groupby(['city', 'type'])

rides = ride_group['city'].count()
rides_avg = ride_group['fare'].mean()
driver_count = ride_group['driver_count'].max()

driver_count

## Bubble Plot of Ride Sharing Data

In [None]:
ride_df = pd.DataFrame({"Total Rides": (rides),
                       "Average Fare per City": (rides_avg),
                       "Total Drivers": (driver_count)})


ride_df.head()

In [None]:
ride_chart = ride_df.plot(kind='scatter', x='Total Rides', y='Average Fare per City', s=driver_count*10,
                          marker="o", edgecolors='black', color='#ff9999', 
                          grid=True, figsize=(7,5))
ride_chart.text(15+10.2, 20.5, "Note: Circle Size correlates to driver count per city", 
                horizontalalignment='left', size='medium', color='black')

ride_chart.set_xlabel("Total Number of Rides (per City)")
ride_chart.set_ylabel("Average Fare ($)")
ride_chart.set_title("Pyber Ride Sharing Data 2016")

In [None]:
plt.tight_layout()
plt.savefig("Images/bubble.jpg")
plt.show()

In [None]:
city_type = ride_df.groupby("type")
city_type_data = city_type['Total Rides'].sum()
all_rides = sum(city_type_data)
percent_of_all_rides = city_type_data/all_rides

In [None]:
type_df = pd.DataFrame({"% of All Rides": (percent_of_all_rides)})
type_list = type_df.keys()

type_df.head()

In [None]:
explode = (0, 0, 0.1)
type_pie = type_df.plot(kind='pie', y=type_list, title="% of Total Rides by City Type", 
                        startangle=150, shadow=True, explode=explode, autopct='%.1f%%', radius=3)
plt.axis("equal")
plt.tight_layout()
plt.savefig("Images/fares_by_city_type.png")
plt.show

In [None]:
type_fare = ride_data_complete.groupby("type")

type_fare_sum = type_fare["fare"].sum()
type_fare_all = sum(type_fare_sum)

type_fare_percent = type_fare_sum/type_fare_all
type_fare_percent

type_fare_sum_df = pd.DataFrame({"% of Total Fare": (type_fare_percent)})
type_fare_list = type_fare_sum_df.keys()

type_fare_sum_df.head()

In [None]:
fare_pie = type_fare_sum_df.plot(kind='pie', y=type_fare_list, title="% of Total Fares by City Type",
                                startangle=150, shadow=True, explode=explode, autopct='%.1f%%', radius=3)

plt.axis("equal")
plt.show()
plt.tight_layout()
plt.savefig("Images/total_rides_by_city_type.png")

In [None]:
drivers = city_data.groupby("type")
driver_c = drivers["driver_count"].sum()

driver_all = sum(driver_c)
driver_percent = driver_c / driver_all

driver_df = pd.DataFrame({"% of Total Drivers": (driver_percent)})
driver_list = driver_df.keys()

driver_df.head()

In [None]:
driver_pie = driver_df.plot(kind='pie', y=driver_list, title="% of Total Drivers by City Type",
                           startangle=150, shadow=True, explode=explode, autopct='%.1f%%', radius=3)

plt.axis("equal")
plt.show()
plt.tight_layout()
plt.savefig("Images/total_drivers_by_city_type.png")