# Pyber Challenge

### 4.3 Loading and Reading CSV files

In [None]:
# Add Matplotlib inline magic command
%matplotlib inline

# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# File to Load (Remember to change these)
city_data_to_load = "Resources/city_data.csv"
ride_data_to_load = "Resources/ride_data.csv"

# Read the City and Ride Data
city_data_df = pd.read_csv(city_data_to_load)
ride_data_df = pd.read_csv(ride_data_to_load)


### Merge the DataFrames

In [None]:

# Combine the data into a single dataset
pyber_data_df = pd.merge(ride_data_df, city_data_df, how="left", on=["city", "city"])

# Display the data table for preview
pyber_data_df



## Deliverable 1: Get a Summary DataFrame 

In [None]:
#  1. Get the total rides for each city type

rides_by_city = pyber_data_df["ride_id"].groupby(pyber_data_df["type"]).count()
rides_by_city = rides_by_city.rename('Total Rides')
rides_by_city



In [None]:
# 2. Get the total drivers for each city type

drivers_by_city = city_data_df["driver_count"].groupby(city_data_df["type"]).sum()
drivers_by_city = drivers_by_city.rename('Total Drivers')
drivers_by_city

In [None]:
#  3. Get the total amount of fares for each city type

fares_by_city = pyber_data_df["fare"].groupby(pyber_data_df["type"]).sum()
fares_by_city = fares_by_city.rename('Total Fares')
fares_by_city

In [None]:
#  4. Get the average fare per ride for each city type. 

average_fare_by_city_type = pyber_data_df.groupby(pyber_data_df["type"]).mean()["fare"]
average_fare_by_city_type = average_fare_by_city_type.rename('Average Fare per Ride')
average_fare_by_city_type

In [None]:
# 5. Get the average fare per driver for each city type. 

average_fare_per_driver_by_city_type = fares_by_city / drivers_by_city
average_fare_per_driver_by_city_type = average_fare_per_driver_by_city_type.rename('Average Fare per Driver')
average_fare_per_driver_by_city_type

In [None]:
#  6. Create a PyBer summary DataFrame. 
pd.options.display.float_format = '${:,.2f}'.format
pyber_summary_df = pd.concat([rides_by_city, drivers_by_city, fares_by_city, average_fare_by_city_type, average_fare_per_driver_by_city_type], axis=1)    


pyber_summary_df


In [None]:
#  7. Cleaning up the DataFrame. Delete the index name
pyber_summary_df.index.name = None

pyber_summary_df

## Deliverable 2.  Create a multiple line plot that shows the total weekly of the fares for each type of city.

In [None]:
# 1. Read the merged DataFrame
pyber_data_df


In [None]:
# 2. Using groupby() to create a new DataFrame showing the sum of the fares 
#  for each date where the indices are the city type and date.

fare_by_date_df = pyber_data_df.groupby(['date', 'type'])[['fare']].mean()
fare_by_date_df

In [None]:
# 3. Reset the index on the DataFrame you created in #1. This is needed to use the 'pivot()' function.
fare_by_date_df = fare_by_date_df.reset_index()
fare_by_date_df

In [None]:
# 4. Create a pivot table with the 'date' as the index, the columns ='type', and values='fare' 
# to get the total fares for each type of city by the date. 

fare_pivot_table = fare_by_date_df.pivot_table(['fare'], index=['date'], columns='type')
#fare_pivot_table = fare_by_date_df.pivot_table(['fare'], index=['date'], columns='type')
fare_pivot_table

In [None]:
# 5. Create a new DataFrame from the pivot table DataFrame using loc on the given dates, '2019-01-01':'2019-04-29'.
date_range_df = fare_pivot_table.loc['2019-01-01':'2019-04-29']
date_range_df

In [None]:
# 6. Set the "date" index to datetime datatype. This is necessary to use the resample() method in Step 8.
date_range_df.index = pd.to_datetime(date_range_df.index)
date_range_df


In [None]:
# 7. Check that the datatype for the index is datetime using df.info()
date_range_df.info()

In [None]:
# 8. Create a new DataFrame using the "resample()" function by week 'W' and get the sum of the fares for each week.
weekly_fare_df = date_range_df.resample('W-Sun').sum()

weekly_fare_df

In [None]:
# 8. Using the object-oriented interface method, plot the resample DataFrame using the df.plot() function. 

# Import the style from Matplotlib.
from matplotlib import style
# Use the graph style fivethirtyeight.
style.use('fivethirtyeight')

weekly_fare_df['x1'] = weekly_fare_df.index
weekly_fare_df.plot(kind='line', x='x1', y=[0, 1, 2])



plt.tight_layout
plt.ylabel("Fare ($USD)")
plt.xlabel("")
plt.title("Total Fare by City Type")
plt.legend(["Rural", "Suburban", "Urban"], title="type", loc="center")

plt.savefig("Analysis/PyBer_fare_summary", bbox_inches = "tight")
plt.show()