# Pyber Challenge

### 4.3 Loading and Reading CSV files

In [1]:
# Add Matplotlib inline magic command
%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd

# File to Load 
city_data_to_load = "Resources/city_data.csv"
ride_data_to_load = "Resources/ride_data.csv"

# Read the City and Ride Data
city_data_df = pd.read_csv(city_data_to_load)
ride_data_df = pd.read_csv(ride_data_to_load)

### Merge the DataFrames

In [13]:
# Combine the data into a single dataset
pyber_data_df = pd.merge(ride_data_df, city_data_df, how="left", on=["city", "city"])

# Display the data table for preview
pyber_data_df.tail(100)

Unnamed: 0,city,date,fare,ride_id,driver_count,type
2275,North Jaime,2019-03-06 09:09:23,44.17,1152195873170,1,Rural
2276,Penaborough,2019-02-24 00:44:00,21.89,2069309881916,6,Rural
2277,North Jaime,2019-03-12 13:05:56,23.21,5987447089759,1,Rural
2278,Lake Latoyabury,2019-04-16 21:12:48,24.10,4896030744313,2,Rural
2279,South Jennifer,2019-03-07 19:10:13,48.79,9730627367446,7,Rural
...,...,...,...,...,...,...
2370,Michaelberg,2019-04-29 17:04:39,13.38,8550365057598,6,Rural
2371,Lake Latoyabury,2019-01-30 00:05:47,20.76,9018727594352,2,Rural
2372,North Jaime,2019-02-10 21:03:50,11.11,2781339863778,1,Rural
2373,West Heather,2019-05-07 19:22:15,44.94,4256853490277,4,Rural


In [4]:
#Check to see if there is any null data. If there is it needs to be cleaned
pyber_data_df.isnull().sum() 

city            0
date            0
fare            0
ride_id         0
driver_count    0
type            0
dtype: int64

In [5]:
#check to see dataframe types

pyber_data_df.dtypes

city             object
date             object
fare            float64
ride_id           int64
driver_count      int64
type             object
dtype: object

In [10]:
#check the unique values in the city and type objects to understand data better

# print(f'City: {pyber_data_df['city'].unique()}')

cities = pyber_data_df['city'].unique()
print(f'Cities: {cities}')

types = pyber_data_df['type'].unique()
print(f'Types: {types}')


Cities: ['Lake Jonathanshire' 'South Michelleport' 'Port Samanthamouth'
 'Rodneyfort' 'South Jack' 'South Latoya' 'New Paulville' 'Simpsonburgh'
 'South Karenland' 'North Jasmine' 'New Kimberlyborough' 'West Angela'
 'Roberthaven' 'North Jason' 'Williamsview' 'Leahton' 'West Anthony'
 'New Paulton' 'West Patrickchester' 'Deanville' 'West Josephberg'
 'West Samuelburgh' 'West Heidi' 'Loganberg' 'Huntermouth' 'Grahamburgh'
 'Port Frank' 'East Kaylahaven' 'West Robert' 'North Markport'
 'Richardfort' 'Reynoldsfurt' 'Justinberg' 'Martinezhaven' 'Jerryton'
 'Lake Scottton' 'South Evanton' 'Rogerston' 'West Ericstad' 'Hurleymouth'
 'Joneschester' 'Port Angela' 'Barajasview' 'West Christopherberg'
 'New Jacobville' 'Port David' 'Valentineton' 'Carriemouth' 'Williamsstad'
 'Royland' 'Karenberg' 'Christopherfurt' 'Liumouth' 'Johnton' 'Erikaland'
 'Raymondhaven' 'Karenside' 'Lake Danielberg' 'South Phillip'
 'North Madeline' 'Port Johnbury' 'Robertport' 'Pattyland' 'Amandaburgh'
 'North Barbara'

## Deliverable 1: Get a Summary DataFrame 

In [3]:
#  1. Get the total rides for each city type

# a. create city group 
city_group = pyber_data_df.groupby("city")

#b. count number of rides by groups created for city 
count_ride_city_group = city_group["city"].count()

count_ride_city_group

city
Amandaburgh         18
Barajasview         22
Barronchester       16
Bethanyland         18
Bradshawfurt        10
                    ..
West Robert         31
West Samuelburgh    25
Williamsonville     14
Williamsstad        23
Williamsview        20
Name: city, Length: 120, dtype: int64

In [4]:
# 2. Get the total drivers for each city type


In [5]:
#  3. Get the total amount of fares for each city type


In [6]:
#  4. Get the average fare per ride for each city type. 


In [7]:
# 5. Get the average fare per driver for each city type. 


In [8]:
#  6. Create a PyBer summary DataFrame. 


In [9]:
#  7. Cleaning up the DataFrame. Delete the index name
pyber_summary_df.index.name = None

In [10]:
#  8. Format the columns.


## Deliverable 2.  Create a multiple line plot that shows the total weekly of the fares for each type of city.

In [11]:
# 1. Read the merged DataFrame


In [12]:
# 2. Using groupby() to create a new DataFrame showing the sum of the fares 
#  for each date where the indices are the city type and date.


In [13]:
# 3. Reset the index on the DataFrame you created in #1. This is needed to use the 'pivot()' function.
# df = df.reset_index()


In [14]:
# 4. Create a pivot table with the 'date' as the index, the columns ='type', and values='fare' 
# to get the total fares for each type of city by the date. 


In [15]:
# 5. Create a new DataFrame from the pivot table DataFrame using loc on the given dates, '2019-01-01':'2019-04-29'.



In [16]:
# 6. Set the "date" index to datetime datatype. This is necessary to use the resample() method in Step 8.
# df.index = pd.to_datetime(df.index)

In [17]:
# 7. Check that the datatype for the index is datetime using df.info()


In [18]:
# 8. Create a new DataFrame using the "resample()" function by week 'W' and get the sum of the fares for each week.


In [19]:
# 8. Using the object-oriented interface method, plot the resample DataFrame using the df.plot() function. 

# Import the style from Matplotlib.
from matplotlib import style
# Use the graph style fivethirtyeight.
style.use('fivethirtyeight')

