In [1]:
# Add Matplotlib inline magic command
%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os

In [2]:
# Files to load
city_data_to_load = "C:/Users/esobieski/Documents/Berkeley/PyBer_Analysis/Resources/city_data.csv"
ride_data_to_load = "C:/Users/esobieski/Documents/Berkeley/PyBer_Analysis/Resources/ride_data.csv"

In [3]:
# Read the city data file and store it in a pandas DataFrame.
city_data_df = pd.read_csv(city_data_to_load)

In [4]:
# Read the ride data file and store it in a pandas DataFrame.
ride_data_df = pd.read_csv(ride_data_to_load)

In [5]:
# Combine the data into a single dataset
pyber_data_df = pd.merge(ride_data_df, city_data_df, how="outer", on=["city", "city"])

# Display the DataFrame
pyber_data_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,1/14/2019 10:14,13.83,5739410000000.0,5,Urban
1,Lake Jonathanshire,4/7/2019 20:51,31.25,4441250000000.0,5,Urban
2,Lake Jonathanshire,3/9/2019 23:45,19.89,2389500000000.0,5,Urban
3,Lake Jonathanshire,4/7/2019 18:09,24.28,7796810000000.0,5,Urban
4,Lake Jonathanshire,1/2/2019 14:14,13.89,424255000000.0,5,Urban


In [6]:
# Get the total number of rides for urban, suburban and rural city types.
total_ride_count = pyber_data_df.groupby(["type"]).count()["ride_id"]
total_ride_count

type
Rural        125
Suburban     625
Urban       1625
Name: ride_id, dtype: int64

In [7]:
# Get the total number of drivers for urban, suburban and rural city types.
driver_total = city_data_df.groupby(["type"]).sum()["driver_count"]
driver_total

type
Rural         78
Suburban     490
Urban       2405
Name: driver_count, dtype: int64

In [8]:
# Get the total number of fares for urban, suburban and rural city types.
fare_total = pyber_data_df.groupby(["type"]).sum()["fare"]
fare_total

type
Rural        4327.93
Suburban    19356.33
Urban       39854.38
Name: fare, dtype: float64

In [9]:
# Get Average fare per ride for urban, suburban and rural city types.
avg_fare_per_ride = fare_total/total_ride_count
avg_fare_per_ride

type
Rural       34.623440
Suburban    30.970128
Urban       24.525772
dtype: float64

In [10]:
# Get Average fare per driver for urban, suburban and rural city types.
avg_fare_per_driver = fare_total/driver_total
avg_fare_per_driver

type
Rural       55.486282
Suburban    39.502714
Urban       16.571468
dtype: float64

In [11]:
#create PyBer_Summary Dataframe, with formatting
PyBer_summary_df=pd.DataFrame([{'Total Rides': total_ride_count,
                  'Total Drivers': driver_total,
                  'Total Fares': fare_total,
                  'Avg Fare per Ride': avg_fare_per_ride,
                  'Avg Fare per Driver': avg_fare_per_driver}])
PyBer_summary_df.groupby(['type'])
PyBer_summary_df

Unnamed: 0,Total Rides,Total Drivers,Total Fares,Avg Fare per Ride,Avg Fare per Driver
0,type Rural 125 Suburban 625 Urban ...,type Rural 78 Suburban 490 Urban ...,type Rural 4327.93 Suburban 19356.33...,type Rural 34.623440 Suburban 30.9701...,type Rural 55.486282 Suburban 39.5027...


In [13]:
# Format the "Total Students" to have the comma for a thousands separator.
PyBer_summary_df["Total Rides"] = PyBer_summary_df["Total Rides"].map("{:,}".format)
PyBer_summary_df["Total Drivers"] = PyBer_summary_df["Total Drivers"].map("{:,}".format)
PyBer_summary_df["Total Fares"] = PyBer_summary_df["Total Fares"].map("${:,.2f}".format)
PyBer_summary_df["Avg Fare per Ride"] = PyBer_summary_df["Avg Fare per Ride"].map("${:,.2f}".format)
PyBer_summary_df["Avg Fare per Driver"] = PyBer_summary_df["Avg Fare per Driver"].map("${:,.2f}".format)
PyBer_summary_df

TypeError: unsupported format string passed to Series.__format__

In [None]:
# 1. Rename columns in pyber_data_df

In [None]:
# 2. Set index to the Date column

In [None]:
# 3. Create new dataframe for fares and include only the Date, City Type, 

In [None]:
# 4. Drop the extra date column

In [None]:
# 5. Set the index to the datetime data type

In [None]:
# 6. Check the Dataframe using info() method to make sure the index is a datetime data type

In [None]:
# 7. Calculate the sum() of fares by the type of city and date

In [None]:
# 8. Reset the index, which is needed for step 10

In [None]:
# 9. Create a pivot table DataFrame with the Date as the index and Columns="City Type" with the Fare for each Date in each row.  
# Note: there will be NaNs in some forws, which will be taken care of when you sum based on Date.


In [None]:
# 10. Create a new DataFrame from the pivot table DataFrame on the given dates '2019-01-10':'2019-04-28' using loc

In [None]:
# 11. Create a new Dataframe by setting the Dataframe you created in Step 11 with resample() in weekly bins, 
# and calculate the sum of the fares each week

In [None]:
# 12. Using Object-Oriented (ax) interface method, plot the dataframe you created in Step 11 with resample() using the df.plot() function.
# Import the style from matplotlib
import matplotlib.pyplot as plt
# Use the graph style fivethirtyeight
plt.style.use('fivethirtyeight')
# Make sure the figure is not too small

# Create a stacked line chart using fivethirtyeight plot for total fare by city type.
fig, ax = plt.subplots()
df.plot(x='Date', y='Fare', ylim=[0,2500], figsize=[12,12], ax=ax)
# Add the title, y-axis label and y.
ax.set_title('Total Fare by City Type', fontsize=20)
ax.set_xlabel('2019',fontsize=14)
ax.set_ylabel('Fare ($USD)', fontsize=14)
ax.set_yticks(np.arange(0, 2500, step=500))
# Add the legend.
ax.legend()
# Save the figure to the "analysis" folder
plt.savefig("C:/Users/esobieski/Documents/Berkeley/PyBer_Analysis/Analysis/Fig10.png")
plt.show()