In [None]:
# Your objective is to build a Bubble Plot that showcases the relationship between four key variables:
# Average Fare ($) Per City
# Total Number of Rides Per City
# Total Number of Drivers Per City
# City Type (Urban, Suburban, Rural)

In [None]:
# libraries
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns

In [None]:
# Load City CSV file
# Read the Ride Data
city_data = pd.read_csv("Resources/city_data.csv")

# Display the data table for preview
city_data.head()

In [None]:
# Load Ride CSV file
# Read the Ride Data
ride_data = pd.read_csv("Resources/ride_data.csv")

# Display the data table for preview
ride_data.head()

In [None]:
# Combine the data into a single dataset
data = pd.merge(city_data, ride_data, on = "city")
# Display the data table for preview
data.head()

In [None]:
# Combine the data into a single dataset
pyber_data = pd.merge(ride_data, city_data, on="city", how="left")

In [None]:
    # Bubble Plot of Ride Sharing Data

In [None]:
# Obtain the x and y coordinates for each of the three city types
urban_city_type = pyber_data[pyber_data["type"] == "Urban"]
rural_city_type = pyber_data[pyber_data["type"] == "Rural"]
suburban_city_type = pyber_data[pyber_data["type"] == "Suburban"]

# Build the scatter plots for each city types / # create data
x = np.random.rand(15)
y = x+np.random.rand(15)
z = x+np.random.rand(15)
z=z*z

# Number of rides, average fare, and number of drivers in Urban cities
x_urban = urban_city_type["ride_id"].count()
y_urban = urban_city_type["fare"].mean()
s_urban = urban_city_type["driver_count"].mean()

# Number of rides, average fare, and number of drivers in Rural cities
x_rural = rural_city_type["ride_id"].count()
y_rural = rural_city_type["fare"].mean()
s_rural = rural_city_type["driver_count"].mean()

# Number of rides, average fare, and number of drivers in Suburban cities
x_suburban = suburban_city_type["ride_id"].count()
y_suburban = suburban_city_type["fare"].mean()
s_suburban = suburban_city_type["driver_count"].mean()

# Build the scatter plots for each city types
plt.scatter(x_urban, y_urban, label = "Urban", s=s_urban * 10, color=["coral"], edgecolor="black", alpha = 0.75, marker="o")
plt.scatter(x_rural, y_rural, label = "Rural", s=s_rural * 10, color=["gold"], edgecolor="black", alpha = 0.75, marker="o")
plt.scatter(x_suburban, y_suburban, label = "Suburban", s=s_suburban  * 10, color=["lightskyblue"], edgecolor="black", alpha = 0.75, marker="o")
plt.grid()

# Incorporate the other graph properties / # Change color with c and alpha. I map the color to the X axis value.
plt.scatter(x, y, s=z*2000, c=x, cmap="Blues", alpha=0.4, edgecolors="grey", linewidth=2)
plt.xlabel("the X axis")
plt.ylabel("the Y axis")
plt.title("A colored bubble plot")

# Create a legend / # Add titles (main and on axis)
legend = plt.legend(fontsize = 12, title= "City Types", loc="best") 

# Incorporate a text label regarding circle size
plt.text(42,35,"Note: Disclaimer text about grouping sizes.", fontsize = 12)

In [None]:
# Save Figure
plt.savefig("bubble.png")

In [None]:
# In addition, you will be expected to produce the following three pie charts:
#  % of Total Fares by City Type
#  % of Total Rides by City Type
#  % of Total Drivers by City Type

In [None]:
     # Total Fares by City Type

In [None]:
# Calculate Type Percents
type_groupped = pyber_data.groupby(['type'])
fare_sum = type_groupped['fare'].sum()

# Build Pie Chart
labels = ["Rural","Suburban","Urban"]
explode = (0, 0, 0.1)
colors = ["gold", "lightskyblue", "lightcoral"]
plt.pie(fare_sum, explode=explode, labels=labels, colors=colors, autopct="%1.1f%%", shadow=True, startangle=150)
plt.title("% of Total Fares by City Type")

In [None]:
# Save Figure
plt.savefig("typepie.png")

In [None]:
    # Total Rides by City Type

In [None]:
# Calculate Ride Percents
rides_count = type_groupped['ride_id'].count()

# Build Pie Chart
labels = ["Rural","Suburban","Urban"]
explode = (0, 0, 0.1)
colors = ["gold", "lightskyblue", "lightcoral"]
plt.pie(rides_count, explode=explode, labels=labels, colors=colors, autopct="%1.1f%%", shadow=True, startangle=150)
plt.title("% of Total Rides by City Type")

In [None]:
# Save Figure
plt.savefig("citypie.png")

In [None]:
    # Total Drivers by City Type

In [None]:
type_groupped_drivers = city_data.groupby(['type'])
drivers_sum = type_groupped_drivers['driver_count'].sum()

# Build Pie Chart
labels = ["Rural","Suburban","Urban"]
explode = (0, 0, 0.1)
colors = ["gold", "lightskyblue", "lightcoral"]
plt.pie(drivers_sum, explode=explode, labels=labels, colors=colors,autopct="%1.1f%%", shadow=True, startangle=150)
plt.title("% of Total Drivers by City Type")

In [None]:
# Save Figure
plt.savefig("driverpie.png")