In [None]:
%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# File to Load (Remember to change these)
city_data_to_load = "data/city_data.csv"
ride_data_to_load = "data/ride_data.csv"

# Read the City and Ride Data
citydata = pd.read_csv(city_data_to_load)
ridedata = pd.read_csv(ride_data_to_load)

# Combine the data into a single dataset
data = pd.merge(ridedata, citydata, how="left", on=["city", "city"])

# Display the data table for preview
data.head()

## Bubble Plot of Ride Sharing Data

In [None]:
# Obtain the x and y coordinates for each of the three city types

combine_data = data.groupby(["type", "city"])

# the average fare
average_type = combine_data["fare"].mean()

#average fare for cities
total_type = combine_data["city"].count()

#total count for rides
total_drivers = combine_data["driver_count"].sum()


#total_drivers_city_and_type
combined_df = pd.DataFrame({
    "avg_fare": average_type, 
    "total_rides": total_type, 
    "total_drivers": total_drivers
})
#combined_df
combined_df["type"] = combined_df.index.map(lambda x: x[0])
combined_df["city"] = combined_df.index.map(lambda x: x[1])

#combined categories
rural = combined_df.loc[combined_df["type"] == "Rural"]
suburban = combined_df.loc[combined_df["type"] == "Suburban"]
urban = combined_df.loc[combined_df["type"] == "Urban"]

In [None]:
# Build the scatter plots for each city types

rural_x = rural["total_rides"]
rural_y = rural["avg_fare"]
rural_size = rural["total_drivers"] / 5

suburban_x = suburban["total_rides"]
suburban_y = suburban["avg_fare"]
suburban_size = suburban["total_drivers"] / 5

urban_x = urban["total_rides"] 
urban_y = urban["avg_fare"] 
urban_size = urban["total_drivers"] / 5

# Plot 3 separate scatterplots for each city type 
rural_handle = plt.scatter(rural_x, rural_y, marker="o", facecolors="gold", edgecolors="black", s=rural_size, alpha=0.8, label="Rural")
suburban_handle = plt.scatter(suburban_x, suburban_y, marker="o", facecolors="purple", edgecolors="black", s=suburban_size, alpha=0.8, label="Suburban")
urban_handle = plt.scatter(urban_x, urban_y, marker="o", facecolors="coral", edgecolors="black", s=urban_size, alpha=0.8, label="Urban")

# Create a legend
plt.legend(handles=[rural_handle, suburban_handle, urban_handle], loc="best")

plt.title("Pyber Ride Sharing Data (2016)")
plt.xlabel("Total Number of Rides (Per City)")
plt.ylabel("Average Fare ($)")
#plt.xlim(0, max(combined_df["total_rides"]) + 5) 
#plt.ylim(0, max(combined_df["avg_fare"]) + 5)
plt.grid()

# Save Figure
plt.savefig("results/Fig1.png")

plt.show()


## Total Fares by City Type

In [None]:
# Groupby City
group_by_type = data.groupby("type") 

# Calculate Type Percents 
total_fares = data["fare"].sum()

# Find the total fares by city type, then the percentage 
total_fares_by_city = group_by_type["fare"].sum()
percent_total = total_fares / total_fares * 100



In [None]:
# Build Pie Chart
explode = (0, 0, 0.08)
colors = ["gold", "lightskyblue", "lightcoral"]

percent_total.plot(kind="pie", explode=explode, colors=colors, autopct="%1.1f%%", shadow=True, startangle=140, figsize=(5, 5))

plt.title("% of Total Fares by City Type")

# Save Figure
plt.savefig("results/Fig2.png")

# Show Figure
plt.show()

## Total Rides by City Type

In [None]:
# Calculate Ride Percents
total_rides = len(data)

# Find the count for total rides by city type, then percentage 
total_rides_per_city = group_by_type["city"].count()
total_rides = total_rides_per_city / total_rides * 100


In [None]:
# Build Pie Chart
explode = (0, 0, 0.08)
colors = ["gold", "lightskyblue", "lightcoral"]

total_rides_per_city.plot(kind="pie", explode=explode, colors=colors, autopct="%1.1f%%", shadow=True, startangle=140, figsize=(5, 5))

plt.title("% of Total Rides by City Type")
plt.show()

# Save Figure
plt.savefig("results/Fig3.png")

# Show Figure
plt.show()

## Total Drivers by City Type

In [None]:
# Calculate Driver Percents

total_drivers = data["driver_count"].sum()

# Find the total drivers by city type, then the percentage 
total_drivers_byCity = group_by_type["driver_count"].sum()
total_percent = total_drivers_byCity/ total_drivers * 100



In [None]:
# Build Pie Charts
explode = (0, 0, 0.14)
colors = ["gold", "lightskyblue", "lightcoral"]

total_percent.plot(kind="pie", explode=explode, colors=colors, autopct="%1.1f%%", shadow=True, startangle=140, figsize=(5, 5))

plt.title("% of Total Drivers by City Type")
plt.show()

# Save Figure
plt.savefig("results/Fig4.png")

# Show Figure
plt.show()