
## Load the CSV Files


#####  Import the Pandas and Matplotlib libraries with the Pyplot module

In [1]:
# Add Matplotlib inline magic command

%matplotlib inline
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Declare variables that connect to the CSV files in the Resources folder
# Files to load

city_data_to_load = "Resources/city_data.csv"
ride_data_to_load = "Resources/ride_data.csv"



## Read each CSV file in Pandas.

In [3]:
# Read the city data file and store it in a pandas DataFrame.
city_data_df = pd.read_csv(city_data_to_load)
city_data_df.head(10)

Unnamed: 0,city,driver_count,type
0,Richardfort,38,Urban
1,Williamsstad,59,Urban
2,Port Angela,67,Urban
3,Rodneyfort,34,Urban
4,West Robert,39,Urban
5,West Anthony,70,Urban
6,West Angela,48,Urban
7,Martinezhaven,25,Urban
8,Karenberg,22,Urban
9,Barajasview,26,Urban


In [4]:
# Read the ride data file and store it in a pandas DataFrame.
ride_data_df = pd.read_csv(ride_data_to_load)
ride_data_df.head(10)

Unnamed: 0,city,date,fare,ride_id
0,Lake Jonathanshire,1/14/2018 10:14,13.83,5739410000000.0
1,South Michelleport,3/4/2018 18:24,30.24,2343910000000.0
2,Port Samanthamouth,2/24/2018 4:29,33.44,2005070000000.0
3,Rodneyfort,2/10/2018 23:22,23.44,5149250000000.0
4,South Jack,3/6/2018 4:28,34.58,3908450000000.0
5,South Latoya,3/11/2018 12:26,9.52,1995000000000.0
6,New Paulville,2/27/2018 11:17,43.25,793208000000.0
7,Simpsonburgh,4/26/2018 0:43,35.98,111954000000.0
8,South Karenland,1/8/2018 3:28,35.09,7995620000000.0
9,North Jasmine,3/9/2018 6:26,42.81,5327640000000.0




##  Explore the Data in Pandas before Merge



### Inspect the City Data DataFrame

In [5]:
# Get the columns and the rows that are not null.
# To get the name of each column and the number of rows that are not null, we can use the df.count() method.

city_data_df.count()

city            120
driver_count    120
type            120
dtype: int64

In [6]:
# to make sure there are no null values: Get the columns and the rows that are null.
city_data_df.isnull().sum()

city            0
driver_count    0
type            0
dtype: int64

In [7]:
# Get the data types of each column.
# To get the data types of each column, we use the dtypes on the DataFrame.

city_data_df.dtypes

city            object
driver_count     int64
type            object
dtype: object

In [8]:
# How many data points are there for each type of city. Use the sum() method on the city_data_df 
# Use the unique() method on a specific column,to return an array, or list, of all the unique values of that column

# Get the unique values of the column type of city.
city_data_df["type"].unique()

array(['Urban', 'Suburban', 'Rural'], dtype=object)

In [14]:
# To get the total_number of data points for the cities,eg. Urban:
# Use the sum() method on the city_data_df for the type column where the condition equals either Urban, Suburban, or Rural.

# Get the number of data points from the Urban cities.
sum(city_data_df["type"]=="Urban")
sum(city_data_df["type"]=="Suburban")
sum(city_data_df["type"]=="Rural")

18

### Inspect the Ride Data DataFrame

In [15]:
# Get the columns and the rows that are not null.
ride_data_df.count()

city       2375
date       2375
fare       2375
ride_id    2375
dtype: int64

In [16]:
# Get the columns and the rows that are not null.
ride_data_df.isnull().sum()

city       0
date       0
fare       0
ride_id    0
dtype: int64

In [17]:
# Get the data types of each column.
ride_data_df.dtypes

city        object
date        object
fare       float64
ride_id    float64
dtype: object



## Merge DataFrames

In [18]:
# Merge on a column with the same data, and the same column name, in both DataFrames.
# Merge Syntax: new_df = pd.merge(leftdf, rightdf, on=["column_leftdf", "column_rightdf"])

# Combine the data into a single dataset
pyber_data_df = pd.merge(ride_data_df, city_data_df, how="left", on=["city", "city"])

# Display the DataFrame
pyber_data_df.head()

Unnamed: 0,city,date,fare,ride_id,driver_count,type
0,Lake Jonathanshire,1/14/2018 10:14,13.83,5739410000000.0,5,Urban
1,South Michelleport,3/4/2018 18:24,30.24,2343910000000.0,72,Urban
2,Port Samanthamouth,2/24/2018 4:29,33.44,2005070000000.0,57,Urban
3,Rodneyfort,2/10/2018 23:22,23.44,5149250000000.0,34,Urban
4,South Jack,3/6/2018 4:28,34.58,3908450000000.0,46,Urban
