In [1]:
# Add Matplotlib inline magic command
%matplotlib inline
# Library dependencies and setup
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Data files to load
city_data_to_load = 'Resources/city_data.csv'
ride_data_to_load = 'Resources/ride_data.csv'

In [3]:
# Read the city data file and store it in a Pandas DataFrame
city_data_df = pd.read_csv(city_data_to_load)
city_data_df.head(10)

Unnamed: 0,city,driver_count,type
0,Richardfort,38,Urban
1,Williamsstad,59,Urban
2,Port Angela,67,Urban
3,Rodneyfort,34,Urban
4,West Robert,39,Urban
5,West Anthony,70,Urban
6,West Angela,48,Urban
7,Martinezhaven,25,Urban
8,Karenberg,22,Urban
9,Barajasview,26,Urban


In [4]:
# Read the ride data file and store it in a Pandas DataFrame
ride_data_df = pd.read_csv(ride_data_to_load)
ride_data_df.head(10)

Unnamed: 0,city,date,fare,ride_id
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344
5,South Latoya,2019-03-11 12:26:48,9.52,1994999424437
6,New Paulville,2019-02-27 11:17:56,43.25,793208410091
7,Simpsonburgh,2019-04-26 00:43:24,35.98,111953927754
8,South Karenland,2019-01-08 03:28:48,35.09,7995623208694
9,North Jasmine,2019-03-09 06:26:29,42.81,5327642267789


In [5]:
# Get the columns and the rows that are not null
city_data_df.count()
# Secondary way to check for any null values
city_data_df.isnull().sum()

city            0
driver_count    0
type            0
dtype: int64

In [6]:
# Get the data types of each column in city data
city_data_df.dtypes

city            object
driver_count     int64
type            object
dtype: object

In [7]:
# Get the unique values of the type of city
city_data_df['type'].value_counts()

Urban       66
Suburban    36
Rural       18
Name: type, dtype: int64

In [8]:
# Get the number of data points from the Urban cities [This step seems pointless... why would we not just use value counts?]
sum(city_data_df['type']=='Urban')

66

In [9]:
# Get the columns and rows that are not null in the ride data
ride_data_df.count()
# Alternate way to get the not null values
ride_data_df.isnull().sum()

city       0
date       0
fare       0
ride_id    0
dtype: int64

In [10]:
# Get the data types of each column in the ride data
ride_data_df.dtypes

city        object
date        object
fare       float64
ride_id      int64
dtype: object

In [11]:
# Combine the two dataframes into a single data set
pyber_data_df = pd.merge(ride_data_df, city_data_df, how='left', on=['city'])

# Display the DataFrame
pyber_data_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban


In [12]:
# Create the Urban city Data Frame
urban_cities_df = pyber_data_df[pyber_data_df['type'] == 'Urban']
urban_cities_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban


In [13]:
# Create the Suburban city Data Frame
suburban_cities_df = pyber_data_df[pyber_data_df['type'] == 'Suburban']
suburban_cities_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
1625,Barronchester,2019-01-27 03:08:01,27.79,6653622887913,11,Suburban
1626,East Kentstad,2019-04-07 19:44:19,18.75,6575961095852,20,Suburban
1627,Lake Omar,2019-01-17 21:33:35,21.71,966911700371,22,Suburban
1628,Myersshire,2019-02-27 17:38:39,17.1,5706770909868,19,Suburban
1629,West Hannah,2019-04-19 01:06:59,37.78,2273047151891,12,Suburban


In [14]:
# Create the Rural city Data Frame
rural_cities_df = pyber_data_df[pyber_data_df['type'] == 'Rural']
rural_cities_df

Unnamed: 0,city,date,fare,ride_id,driver_count,type
2250,Randallchester,2019-04-13 11:13:31,43.22,1076079536213,9,Rural
2251,North Holly,2019-02-02 14:54:00,12.42,1985256326182,8,Rural
2252,Michaelberg,2019-03-27 18:27:34,54.85,4421836952718,6,Rural
2253,Lake Latoyabury,2019-02-23 21:12:24,47.90,3269652929887,2,Rural
2254,Lake Latoyabury,2019-05-06 08:57:56,51.80,4018025271936,2,Rural
...,...,...,...,...,...,...
2370,Michaelberg,2019-04-29 17:04:39,13.38,8550365057598,6,Rural
2371,Lake Latoyabury,2019-01-30 00:05:47,20.76,9018727594352,2,Rural
2372,North Jaime,2019-02-10 21:03:50,11.11,2781339863778,1,Rural
2373,West Heather,2019-05-07 19:22:15,44.94,4256853490277,4,Rural


In [20]:
# Get the number of rides for urban cities
urban_ride_count = urban_cities_df.groupby(['city']).count()['ride_id']
urban_ride_count.head()

city
Amandaburgh        18
Barajasview        22
Carriemouth        27
Christopherfurt    27
Deanville          19
Name: ride_id, dtype: int64

In [23]:
# Get the number of rides for suburban cities
suburban_ride_count = suburban_cities_df.groupby(['city']).count()['ride_id']
suburban_ride_count.head()

city
Barronchester    16
Bethanyland      18
Brandonfort      19
Colemanland      22
Davidfurt        17
Name: ride_id, dtype: int64

In [24]:
# Get the number of ride for rural cities
rural_ride_count = rural_cities_df.groupby(['city']).count()['ride_id']
rural_ride_count.head(20)

city
Bradshawfurt         10
Garzaport             3
Harringtonfort        6
Jessicaport           6
Lake Jamie            6
Lake Latoyabury      11
Michaelberg          12
New Ryantown          6
Newtonview            4
North Holly           9
North Jaime           8
Penaborough           5
Randallchester        5
South Jennifer        7
South Marychester     8
South Saramouth       4
Taylorhaven           6
West Heather          9
Name: ride_id, dtype: int64

In [None]:
# Get the average fare for each city in the urban cities
urban_avg_fare = urban_cities_df.groupby(['city']).mean()['fare']
urban_avg_fare.head()

In [None]:
# Get the average fare for each city in suburban cities
suburban_avg_fare = suburban_cities_df.groupby(['city']).mean()['fare']
suburban_avg_fare.head()

In [None]:
# Get the average fare for each city in rural cities
rural_avg_fare = rural_cities_df.groupby(['city']).mean()['fare']
rural_avg_fare.head()

In [None]:
# Get the average number of drivers for each city in urban cities
urban_drivers= urban_cities_df.groupby(['city']).mean()['driver_count']
urban_drivers.head()

In [None]:
# Get the average number of drivers for each city in suburban cities
suburban_drivers = suburban_cities_df.groupby(['city']).mean()['driver_count']
suburban_drivers.head()

In [None]:
# Get the average number of drivers for each city in rural cities
rural_drivers = rural_cities_df.groupby(['city']).mean()['driver_count']
rural_drivers.head()

In [None]:
# Build the scatter plots for urban cities
plt.scatter(urban_ride_count, urban_avg_fare, s= 10*urban_drivers, color='coral', edgecolor='black', linewidths=1, alpha=0.8, label='Urban')
plt.title("PyBer Ride-Sharing Data (2019)")
plt.ylabel("Average Fare ($)")
plt.xlabel("Total Number of Rides (Per City)")
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Build the scatter plots for suburban cities
plt.scatter(suburban_ride_count, suburban_avg_fare, s= 10*suburban_drivers, color='skyblue', edgecolor='black', linewidths=1, alpha=0.8, label='Suburban')
plt.title("PyBer Ride-Sharing Data (2019)")
plt.ylabel("Average Fare ($)")
plt.xlabel("Total Number of Rides (Per City)")
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Build the scatter plots for rural cities
plt.scatter(rural_ride_count, rural_avg_fare, s= 10*rural_drivers, color='gold', edgecolor='black', linewidths=1, alpha=0.8, label='Rural')
plt.title("PyBer Ride-Sharing Data (2019)")
plt.ylabel("Average Fare ($)")
plt.xlabel("Total Number of Rides (Per City)")
plt.grid(True)
plt.legend()
plt.show()

In [None]:
# Add the scatter charts for each type of city 
plt.subplots(figsize=(10,6))
plt.scatter(urban_ride_count, urban_avg_fare, s= 10*urban_drivers, color='coral', edgecolor='black', linewidths=1, alpha=0.8, label='Urban')
plt.scatter(suburban_ride_count, suburban_avg_fare, s= 10*suburban_drivers, color='skyblue', edgecolor='black', linewidths=1, alpha=0.8, label='Suburban')
plt.scatter(rural_ride_count, rural_avg_fare, s= 10*rural_drivers, color='gold', edgecolor='black', linewidths=1, alpha=0.8, label='Rural')

# Add the necessary titles and labels to the scatter chart
plt.title("PyBer Ride-Sharing Data (2019)", fontsize=20)
plt.ylabel("Average Fare ($)", fontsize=12)
plt.xlabel("Total Number of Rides (Per City)", fontsize=12)
plt.grid(True)

# Create a legend
lgnd = plt.legend(fontsize='12', mode='Expanded', scatterpoints=1, loc='best', title='City Types')
lgnd.legendHandles[0]._sizes = [75]
lgnd.legendHandles[1]._sizes = [75]
lgnd.legendHandles[2]._sizes = [75]
lgnd.get_title().set_fontsize(12)

# Incorporate a text lable about the circle size
plt.text(42,35, "Note: Circle size correlates with driver count per city.", fontsize='12')

# Save the scatter plot figure
plt.savefig("analysis/Fig1.png")

# Show the plot
plt.show()



In [None]:
# Get summary statistics.
urban_cities_df.describe()

In [None]:
# Get summary statistics.
suburban_cities_df.describe()

In [None]:
# Get summary statistics.
rural_cities_df.describe()

In [None]:
# Get summary statistics.
urban_ride_count.describe()

In [None]:
# Get summary statistics.
suburban_ride_count.describe()

In [None]:
# Get summary statistics.
rural_ride_count.describe()

In [None]:
# Calculate the mean of the ride count for each city type.
round(urban_ride_count.mean(),2), round(suburban_ride_count.mean(),2), round(rural_ride_count.mean(),2)

In [None]:
# Calculate the median of the ride count for each city type.
round(urban_ride_count.median(),2), round(suburban_ride_count.median(),2), round(rural_ride_count.median(),2)

In [None]:
urban_ride_count.mode()


In [None]:
suburban_ride_count.mode()


In [None]:
rural_ride_count.mode()

In [None]:
# Import NumPy and the stats module from SciPy.
import numpy as np
import scipy.stats as sts

In [None]:
# Calculate the measures of central tendency for the ride count for the urban cities.
mean_urban_ride_count = np.mean(urban_ride_count)
print(f"The mean for the ride counts for urban trips is {mean_urban_ride_count:.2f}.")

median_urban_ride_count = np.median(urban_ride_count)
print(f"The median for the ride counts for urban trips is {median_urban_ride_count}.")

mode_urban_ride_count = sts.mode(urban_ride_count)
print(f"The mode for the ride counts for urban trips is {mode_urban_ride_count}.")

In [None]:
# Calculate the measures of central tendency for the ride count for the suburban cities.
mean_suburban_ride_count = np.mean(suburban_ride_count)
print(f"The mean for the ride counts for suburban trips is {mean_suburban_ride_count:.2f}.")

median_suburban_ride_count = np.median(suburban_ride_count)
print(f"The median for the ride counts for suburban trips is {median_suburban_ride_count}.")

mode_suburban_ride_count = sts.mode(suburban_ride_count)
print(f"The mode for the ride counts for suburban trips is {mode_suburban_ride_count}.")

In [None]:
# Calculate the measures of central tendency for the ride count for the rural cities.
mean_rural_ride_count = np.mean(rural_ride_count)
print(f"The mean for the ride counts for rural trips is {mean_rural_ride_count:.2f}.")

median_rural_ride_count = np.median(rural_ride_count)
print(f"The median for the ride counts for rural trips is {median_rural_ride_count}.")

mode_rural_ride_count = sts.mode(rural_ride_count)
print(f"The mode for the ride counts for rural trips is {mode_rural_ride_count}.")

In [None]:
# Get the fares for the urban cities.
urban_fares = urban_cities_df["fare"]
urban_fares.head()

In [None]:
# Calculate the measures of central tendency for the average fare for the urban cities.
mean_urban_fares = np.mean(urban_fares)
print(f"The mean fare price for urban trips is ${mean_urban_fares:.2f}.")

median_urban_fares = np.median(urban_fares)
print(f"The median fare price for urban trips is ${median_urban_fares:.2f}.")

mode_urban_fares = sts.mode(urban_fares)
print(f"The mode fare price for urban trips is {mode_urban_fares}.")

In [None]:
# Get the fares for suburban cities
suburban_fares = suburban_cities_df["fare"]
suburban_fares.head()

In [None]:
# Calculate the mean, median, mode for average fare for suburban cities
mean_suburban_fares = np.mean(suburban_fares)
print(f"The mean fare price for suburban trips is ${mean_suburban_fares:.2f}.")

median_suburban_fares = np.median(suburban_fares)
print(f"The median fare price for suburban trips is ${median_suburban_fares:.2f}.")

mode_suburban_fares = sts.mode(suburban_fares)
print(f"The mode fare price for suburban trips is {mode_suburban_fares}.")

In [None]:
# Get the fares for rural cities
rural_fares = rural_cities_df['fare']
rural_fares.head()

In [None]:
# Calculate the mean, median, mode for average fare for rural cities
mean_rural_fares = np.mean(rural_fares)
print(f"The mean fare price for rural trips is ${mean_rural_fares:.2f}.")

median_rural_fares = np.median(rural_fares)
print(f"The median fare price for rural trips is ${median_rural_fares:.2f}.")

mode_rural_fares = sts.mode(rural_fares)
print(f"The mode fare price for rural trips is {mode_rural_fares}.")

In [None]:
# Get the driver count data from Urban cities
urban_driver_count = urban_cities_df['driver_count']
urban_driver_count.head()

In [None]:
# Calculate the mean, median, mode for driver count in urban cities
mean_urban_driver_count = np.mean(urban_driver_count)
median_urban_driver_count = np.median(urban_driver_count)
mode_urban_driver_count = sts.mode(urban_driver_count)

print(f'For urban city driver count, the mean is {mean_urban_driver_count:.2f}, the median is {median_urban_driver_count}, and the mode is {mode_urban_driver_count}.')

In [None]:
# Get the driver count data for Suburban cities
suburban_driver_count = suburban_cities_df['driver_count']
suburban_driver_count.head()

In [None]:
# Calculate the mean, median, mode for driver count in suburban cities
mean_suburban_driver_count = np.mean(suburban_driver_count)
median_suburban_driver_count = np.median(suburban_driver_count)
mode_suburban_driver_count = sts.mode(suburban_driver_count)

print(f'For suburban city driver count, the mean is {mean_suburban_driver_count:.2f}, the median is {median_suburban_driver_count}, and the mode is {mode_suburban_driver_count}.')

In [None]:
# Get the driver count for Rural cities
rural_driver_count = rural_cities_df['driver_count']
rural_driver_count.head()

In [None]:
# Calculate the mean, median, mode for driver count in rural cities
mean_rural_driver_count = np.mean(rural_driver_count)
median_rural_driver_count = np.median(rural_driver_count)
mode_rural_driver_count = sts.mode(rural_driver_count)

print(f'For rural city driver count, the mean is {mean_rural_driver_count:.2f}, the median is {median_rural_driver_count}, and the mode is {mode_rural_driver_count}.')

In [None]:
# Create a box-and-whisker plot for the urban cities ride count.
x_labels = ["Urban"]
fig, ax = plt.subplots()
ax.boxplot(urban_ride_count, labels=x_labels)
# Add the title, y-axis label and grid.
ax.set_title('Ride Count Data (2019)')
ax.set_ylabel('Number of Rides')
ax.set_yticks(np.arange(10, 41, step=2.0))
ax.grid()
plt.show()

In [None]:
urban_ride_count.describe()

In [None]:
suburban_ride_count.describe()

In [None]:
# Create a box-and-whisker plot for the suburban cities ride count.
x_labels = ['Suburban']
fig, ax = plt.subplots()
ax.boxplot(suburban_ride_count, labels=x_labels)
# Add the title, y-asix label and grid
ax.set_title('Ride Count Data (2019)')
ax.set_ylabel('Number of Rides')
ax.set_yticks(np.arange(8, 29, step=2.0))
ax.grid()
plt.show()

In [None]:
rural_ride_count.describe()

In [None]:
# Create a box-and-whisker plot for the rural cities ride count.
x_labels = ['Rural']
fig, ax = plt.subplots()
ax.boxplot(rural_ride_count, labels=x_labels)
# Add the title, y-asix label and grid
ax.set_title('Ride Count Data (2019)')
ax.set_ylabel('Number of Rides')
ax.set_yticks(np.arange(1, 14, step=1.0))
ax.grid()
plt.show()

In [None]:
# Add all ride count box-and-whisker plots to the same graph.
x_labels = ["Urban", "Suburban","Rural"]
ride_count_data = [urban_ride_count, suburban_ride_count, rural_ride_count]
fig, ax = plt.subplots(figsize=(10, 6))
ax.set_title('Ride Count Data (2019)',fontsize=20)
ax.set_ylabel('Number of Rides',fontsize=14)
ax.set_xlabel("City Types",fontsize=14)
ax.boxplot(ride_count_data, labels=x_labels)
ax.set_yticks(np.arange(0, 45, step=3.0))
ax.grid()
# Save the figure.
plt.savefig("analysis/Fig2.png")
plt.show()

In [None]:
# Get the city that matches 39.
urban_city_outlier = urban_ride_count[urban_ride_count==39].index[0]
print(f"{urban_city_outlier} has the highest rider count.")

In [None]:
# Create a box-and-whisker plot for the urban fare data.
x_labels = ["Urban"]
fig, ax = plt.subplots()
ax.boxplot(urban_fares, labels=x_labels)
# Add the title, y-axis label and grid.
ax.set_title('Ride Fare Data (2019)')
ax.set_ylabel('Fare($USD)')
ax.set_yticks(np.arange(0, 51, step=5.0))
ax.grid()
plt.show()
print("Summary Statistics")
urban_fares.describe()

In [None]:
# Create a box-and-whisker plot for the suburban fare data.
x_labels = ["Suburban"]
fig, ax = plt.subplots()
ax.boxplot(suburban_fares, labels=x_labels)
# Add the title, y-axis label and grid.
ax.set_title('Ride Fare Data (2019)')
ax.set_ylabel('Fare($USD)')
ax.set_yticks(np.arange(5, 60, step=5.0))
ax.grid()
plt.show()
print("Summary Statistics")
suburban_fares.describe()

In [None]:
# Create a box-and-whisker plot for the rural fare data.
x_labels = ["Rural"]
fig, ax = plt.subplots()
ax.boxplot(rural_fares, labels=x_labels)
# Add the title, y-axis label and grid.
ax.set_title('Ride Fare Data (2019)', fontsize=20)
ax.set_ylabel('Fare($USD)', fontsize=12)
ax.set_yticks(np.arange(5, 70, step=10.0))
ax.grid()
plt.show()
print("Summary Statistics")
rural_fares.describe()

In [None]:
# Create a box-and-whisker plot for all city fare data
x_labels = ['Urban', 'Suburban', 'Rural']
city_fare_data = [urban_fares, suburban_fares, rural_fares]
fig, ax = plt.subplots(figsize=(10,6))
ax.boxplot(city_fare_data, labels=x_labels)
# Add the title, y-axis label and grid
ax.set_title('Ride Fare Data (2019)', fontsize=20)
ax.set_ylabel('Fare ($USD)', fontsize=14)
ax.set_xlabel('City types', fontsize=14)
ax.set_yticks(np.arange(0, 60, step=5.0))
ax.grid()
plt.savefig('analysis/Fig3.png')
plt.show()



In [None]:
# Create the box-and-whisker plot for the urban driver count data.
x_labels = ["Urban"]
fig, ax = plt.subplots()
ax.boxplot(urban_drivers,labels=x_labels)
# Add the title, y-axis label and grid.
ax.set_title('Driver Count Data (2019)')
ax.set_ylabel('Number of Drivers)')
ax.set_yticks(np.arange(0, 90, step=5.0))
ax.grid()
plt.show()
print("Summary Statistics")
urban_drivers.describe()

In [None]:
# Create the box-and-whisker plot for the suburban driver count data.
x_labels = ["Suburban"]
fig, ax = plt.subplots()
ax.boxplot(suburban_drivers,labels=x_labels)
# Add the title, y-axis label and grid.
ax.set_title('Driver Count Data (2019)')
ax.set_ylabel('Number of Drivers)')
ax.set_yticks(np.arange(0, 35, step=5.0))
ax.grid()
plt.show()
print("Summary Statistics")
suburban_drivers.describe()

In [None]:
# Create the box-and-whisker plot for the rural driver count data.
x_labels = ["Rural"]
fig, ax = plt.subplots()
ax.boxplot(rural_drivers,labels=x_labels)
# Add the title, y-axis label and grid.
ax.set_title('Driver Count Data (2019)')
ax.set_ylabel('Number of Drivers)')
ax.set_yticks(np.arange(0, 12, step=1.0))
ax.grid()
plt.show()
print("Summary Statistics")
rural_drivers.describe()

In [None]:
# Create the box-and-whisker plot for the all cities driver count data.
x_labels = ["Urban", "Suburban", "Rural"]
city_driver_count = [urban_drivers, suburban_drivers, rural_drivers]
fig, ax = plt.subplots(figsize=(10,6))
ax.boxplot(city_driver_count ,labels=x_labels)
# Add the title, y-axis label and grid.
ax.set_title('Driver Count Data (2019)', fontsize=20)
ax.set_xlabel('City Types', fontsize=14)
ax.set_ylabel('Number of Drivers)', fontsize=14)
ax.set_yticks(np.arange(0, 80, step=5.0))
ax.grid()
plt.savefig('analysis/Fig4.png')
plt.show()

In [None]:
# Calculate the percentage of fare for each city type.
type_percents = 100 * pyber_data_df.groupby(["type"]).sum()["fare"] / pyber_data_df["fare"].sum()
type_percents

In [None]:
# Import mpl to change the plot configurations using rcParams.
import matplotlib as mpl
# Build the percentage of fares by city type pie chart.
plt.subplots(figsize=(10, 6))
plt.pie(type_percents, 
        labels = ['Rural', 'Suburban', 'Urban'],
        colors = ['gold','lightskyblue','lightcoral'],
        explode = [0,0,0.1],
        autopct = '%1.1f%%',
        shadow = True,
        startangle = 150)
plt.title('% of Total Fares by City', fontsize=20)
# Change the default font size from 10 to 14.
mpl.rcParams['font.size'] = 14
# Save Figure
plt.savefig("analysis/Fig5.png")
plt.show()

In [None]:
# Calculate the percentage of ride for each city type
ride_percents = 100 * pyber_data_df.groupby(['type']).count()['ride_id'] / pyber_data_df['ride_id'].count()
ride_percents

In [None]:
# Build the percentage of rides for each city type
plt.subplots(figsize=(10,6))
plt.pie(ride_percents,
        labels = ['Rural', 'Suburban', 'Urban'],
        colors = ['gold','lightskyblue','lightcoral'],
        explode = [0,0,0.1],
        autopct = '%1.1f%%',
        shadow = True,
        startangle = 150)
plt.title('% of Total Rides by City Type', fontsize=20)
# Change the default font size from 10 to 14
mpl.rcParams['font.size'] = 14
# Save Figure
plt.savefig('analysis/Fig6.png')
plt.show()

In [None]:
# Calculate the percentage of drivers for each city type.
driver_percents = 100 * city_data_df.groupby(['type']).sum()['driver_count'] / city_data_df['driver_count'].sum()
driver_percents

In [None]:
# Build the percentage of drivers for each city type
plt.subplots(figsize=(10,6))
plt.pie(driver_percents,
        labels = ['Rural', 'Suburban', 'Urban'],
        colors = ['gold','lightskyblue','lightcoral'],
        explode = [0,0,0.1],
        autopct = '%1.1f%%',
        shadow = True,
        startangle = 150)
plt.title('% of Total Drivers by City Type')
# Change the default font size from 10 to 14
mpl.rcParams['font.size'] = 14
# Save Figure
plt.savefig('analysis/Fig7.png')
plt.show()