In [89]:
# Add Matplotlib inline command
%matplotlib inline

# import dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import os

In [90]:
# Load files and Read Data

city_data_to_load = os.path.join("Resources", "city_data.csv")
ride_data_to_load = os.path.join("Resources", "ride_data.csv")

city_data_df = pd.read_csv(city_data_to_load)
ride_data_df = pd.read_csv(ride_data_to_load)

In [91]:
# Merge the DataFrames

pyber_data_df = pd.merge(ride_data_df, city_data_df, how='left', on=['city', 'city'])
pyber_data_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban


In [92]:
# Get the unique values of the type of city

city_data_df['type'].unique()

array(['Urban', 'Suburban', 'Rural'], dtype=object)

In [93]:
# Create separate DataFrames for Urban, Suburban, and Rural data

urban_cities_df = pyber_data_df[pyber_data_df['type'] == "Urban"]
suburban_cities_df = pyber_data_df[pyber_data_df['type'] == "Subrban"]
rural_cities_df = pyber_data_df[pyber_data_df['type'] == "Rural"]

In [94]:
# Find total rides per city type

total_rides = pyber_data_df.groupby(['type']).count()['ride_id']
total_rides

type
Rural        125
Suburban     625
Urban       1625
Name: ride_id, dtype: int64

In [95]:
# Find total drivers per city type

total_drivers = city_data_df.groupby(['type']).sum()['driver_count']
total_drivers

type
Rural         78
Suburban     490
Urban       2405
Name: driver_count, dtype: int64

In [96]:
# Calculate total fares by city type

total_fares = pyber_data_df.groupby(['type']).sum()['fare']
total_fares

type
Rural        4327.93
Suburban    19356.33
Urban       39854.38
Name: fare, dtype: float64

In [97]:
# Calculate average fare per ride by city type

average_fare_per_ride = pyber_data_df.groupby(['type']).mean()['fare']
average_fare_per_ride           

type
Rural       34.623440
Suburban    30.970128
Urban       24.525772
Name: fare, dtype: float64

In [98]:
# Calculate average fare by driver by city type

average_fare_per_driver = total_fares/total_drivers
average_fare_per_driver

type
Rural       55.486282
Suburban    39.502714
Urban       16.571468
dtype: float64

In [99]:
# create Summary DataFrame

summary_df = pd.DataFrame({
            "Total Rides": total_rides, 
            "Total Drivers": total_drivers, 
            "Total Fares": total_fares, 
            "Average Fare Per Ride": average_fare_per_ride, 
            "Average Fare Per Driver": average_fare_per_driver})

summary_df

Unnamed: 0_level_0,Total Rides,Total Drivers,Total Fares,Average Fare Per Ride,Average Fare Per Driver
type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Rural,125,78,4327.93,34.62344,55.486282
Suburban,625,490,19356.33,30.970128,39.502714
Urban,1625,2405,39854.38,24.525772,16.571468


In [100]:
# Format Summary DataFrame

# Remove index
summary_df.index.name = None

# add thousands comma to Total Rides and Total Drivers
summary_df["Total Rides"] = summary_df["Total Rides"].map("{:,}".format)

summary_df["Total Drivers"] = summary_df["Total Drivers"].map("{:,}".format)

summary_df["Total Fares"] = summary_df["Total Fares"].map("${:,.2f}".format)

summary_df["Average Fare Per Ride"] = summary_df["Average Fare Per Ride"].map("${:,.2f}".format)

summary_df["Average Fare Per Driver"] = summary_df["Average Fare Per Driver"].map("${:,.2f}".format)

In [101]:
summary_df

Unnamed: 0,Total Rides,Total Drivers,Total Fares,Average Fare Per Ride,Average Fare Per Driver
Rural,125,78,"$4,327.93",$34.62,$55.49
Suburban,625,490,"$19,356.33",$30.97,$39.50
Urban,1625,2405,"$39,854.38",$24.53,$16.57
