In [1]:
# Add Matplotlib inline magic command
%matplotlib inline

# Dependencies and setup
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
import numpy as np
import scipy.stats as sts

In [2]:
# Files to load
city_data_to_load = "Resources/city_data.csv"
ride_data_to_load = "Resources/ride_data.csv"

In [3]:
# Read the data files and store both as separate Pandas DataFrames
city_data_df = pd.read_csv(city_data_to_load)
# print(city_data_df.head())
ride_data_df = pd.read_csv(ride_data_to_load)
# print(ride_data_df.head())

ride_data_df

Unnamed: 0,city,date,fare,ride_id
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344
...,...,...,...,...
2370,Michaelberg,2019-04-29 17:04:39,13.38,8550365057598
2371,Lake Latoyabury,2019-01-30 00:05:47,20.76,9018727594352
2372,North Jaime,2019-02-10 21:03:50,11.11,2781339863778
2373,West Heather,2019-05-07 19:22:15,44.94,4256853490277


In [4]:
# # INSPECT THE CITY DATA
# # 1. Get the columns and the rows that are not null
# print(city_data_df.count())

# # 2. Get the columns and the rows that are null
# print(city_data_df.isnull().sum())

# # 3. Get the data types of each column
# print(city_data_df.dtypes)

# # 4. Get the unique values of the type of city
# print(city_data_df["type"].unique())

# # 5. Get the number of data points from the Urban cities
# print(sum(city_data_df["type"]=="Urban"))
# print(sum(city_data_df["type"]=="Suburban"))
# print(sum(city_data_df["type"]=="Rural"))

In [5]:
# # INSPECT THE RIDE DATA
# # 1. Get the columns and the rows that are not null
# print(ride_data_df.count())

# # 2. Get the columns and the rows that are null
# print(ride_data_df.isnull().sum())

# # 3. Get the data types of each column
# print(ride_data_df.dtypes)

In [6]:
# Combine the data into a single dataset
pyber_data_df = pd.merge(ride_data_df, city_data_df, how="left", on=["city","city"])
pyber_data_df

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,2019-01-14 10:14:22,13.83,5739410935873,5,Urban
1,South Michelleport,2019-03-04 18:24:09,30.24,2343912425577,72,Urban
2,Port Samanthamouth,2019-02-24 04:29:00,33.44,2005065760003,57,Urban
3,Rodneyfort,2019-02-10 23:22:03,23.44,5149245426178,34,Urban
4,South Jack,2019-03-06 04:28:35,34.58,3908451377344,46,Urban
...,...,...,...,...,...,...
2370,Michaelberg,2019-04-29 17:04:39,13.38,8550365057598,6,Rural
2371,Lake Latoyabury,2019-01-30 00:05:47,20.76,9018727594352,2,Rural
2372,North Jaime,2019-02-10 21:03:50,11.11,2781339863778,1,Rural
2373,West Heather,2019-05-07 19:22:15,44.94,4256853490277,4,Rural


# Module 5 Challenge Psuedocode

## Deliverable 1 - A Summary DataFrame
1. Caculate total rides per city-type
2. Caculate total drivers per city-type
3. Caculate total fares per city-type
4. Caculate average fare per ride per city-type
5. Caculate average fare per driver per city-type
6. Create new DataFrame
7. Format new DataFrame

In [7]:
# 1. Total rides per city-type

total_rides = pyber_data_df.groupby(["type"]).count()["ride_id"]
# total_rides

In [8]:
# 2. Total drivers per city-type
total_drivers = city_data_df.groupby(["type"]).sum()["driver_count"]
# total_drivers

In [9]:
# 3. Total fares per city-type
total_fares = pyber_data_df.groupby(["type"]).sum()["fare"]
# total_fares

In [10]:
# 4. Average fare per ride per city-type
average_fare_ride = total_fares / total_rides
# average_fare_ride

In [11]:
# 5. Average fare per driver per city-type
average_fare_driver = total_fares / total_drivers
# average_fare_driver

In [12]:
# 6. Creation of summary DataFrame 
challenge_summary_df = pd.DataFrame ({
    "Total Rides": total_rides,
    "Total Drivers": total_drivers,
    "Total Fares": total_fares,
    "Average Fare per Ride": average_fare_ride,
    "Average Fare per Driver": average_fare_driver
})
# challenge_summary_df

In [13]:
# 7. Format DataFrame
challenge_summary_df.index.name = None
challenge_summary_df["Total Rides"] = challenge_summary_df["Total Rides"].map("{:,.0f}".format)
challenge_summary_df["Total Drivers"] = challenge_summary_df["Total Drivers"].map("{:,.0f}".format)
challenge_summary_df["Total Fares"] = challenge_summary_df["Total Fares"].map("${:,.2f}".format)
challenge_summary_df["Average Fare per Ride"] = challenge_summary_df["Average Fare per Ride"].map("${:,.2f}".format)
challenge_summary_df["Average Fare per Driver"] = challenge_summary_df["Average Fare per Driver"].map("${:,.2f}".format) 

In [14]:
challenge_summary_df

Unnamed: 0,Total Rides,Total Drivers,Total Fares,Average Fare per Ride,Average Fare per Driver
Rural,125,78,"$4,327.93",$34.62,$55.49
Suburban,625,490,"$19,356.33",$30.97,$39.50
Urban,1625,2405,"$39,854.38",$24.53,$16.57


## Deliverable 2 - Multi-Line Plot for the Sum of he Fares for Each City Type
1. 