# Import all important libraries needed

In [None]:
import pandas as pd                 # For data manipulation
import numpy as np                  # For numerical operations
import matplotlib.pyplot as plt     # For plotting
import seaborn as sns               # For advanced visualizations
import datetime                     # For date/time manipulation

# Preparing our data for analysis

In [None]:
# Load Divvy_trips_2019_Q1.csv
q1_2019 = pd.read_csv('datasets/Divvy_Trips_2019_Q1.csv') 

# Load Divvy_Trips_2020_Q1.csv
q1_2020 = pd.read_csv('datasets/Divvy_Trips_2020_Q1.csv')

In [None]:
# Display the first few rows of q1_2019
q1_2019.head()

In [None]:
# Display the first few rows of q1_2020
q1_2020.head()

In [None]:
# Standardizing column names of q1_2019 to match the structure of q1_2020 
q1_2019.rename(columns={
    'trip_id': 'ride_id',
    'bikeid': 'rideable_type',
    'start_time': 'started_at',
    'end_time': 'ended_at',
    'from_station_name': 'start_station_name',
    'from_station_id': 'start_station_id',
    'to_station_name': 'end_station_name',
    'to_station_id': 'end_station_id',
    'usertype': 'member_casual'
}, inplace=True)

In [None]:
# Check data structure
q1_2019.info()

In [None]:
# Check data structure
q1_2020.info()

In [None]:
# Convert ride_id and rideable_type to string
q1_2019['ride_id'] = q1_2019['ride_id'].astype(str)
q1_2019['rideable_type'] = q1_2019['rideable_type'].astype(str)

In [None]:
# Combine both datasets and name the new table: all_trips
all_trips = pd.concat([q1_2019, q1_2020], ignore_index=True)

In [None]:
# Drop irrelevant columns
columns_to_drop = ['start_lat', 'start_lng', 'end_lat', 'end_lng', 'birthyear', 'gender', 'tripduration']
all_trips = all_trips.drop(columns=[col for col in columns_to_drop if col in all_trips.columns])

In [None]:
# Display first couple of rows of the new all_trips table
all_trips.head()

In [None]:
# Display info
all_trips.info()

# Processing our data for analysis

In [None]:
# Drop any null values from the table
all_trips.dropna(inplace=True)

In [None]:
# Check if all null rows have been removed
all_trips.isnull().sum()

In [None]:
# Make sure both started_at and ended_at are datetime 
all_trips['started_at'] = pd.to_datetime(all_trips['started_at'])
all_trips['ended_at'] = pd.to_datetime(all_trips['ended_at'])

print(all_trips.dtypes)

In [None]:
# Create a day_of_week column to our table
all_trips['day_of_week'] = all_trips['started_at'].dt.day_name()

In [None]:
# Create a ride_length column to our table
# ride_length will be measured in seconds
all_trips['ride_length'] = (all_trips['ended_at'] - all_trips['started_at']).dt.total_seconds()

In [None]:
# Display new table with new columns
all_trips.head()

In [None]:
# Display last few rows of all_trips
all_trips.tail()

In [None]:
# Replace Subscriber with member and Customer with casual
all_trips['member_casual'] = all_trips['member_casual'].replace({
    'Subscriber': 'member',
    'Customer': 'casual'
})

In [None]:
# Check to see if the proper naming conventions were applied
all_trips['member_casual'].value_counts()

In [None]:
# Find negative values in ride_length column
negative_values = all_trips[all_trips['ride_length'] < 0]
negative_values

In [None]:
# Remove any rows with negative ride_length values or where the start station is HQ QR
# Create a new table called all_trips_v2
# This table will be used for further analysis
all_trips_v2 = all_trips[~((all_trips['start_station_name'] == 'HQ QR') | (all_trips['ride_length'] < 0))]

In [None]:
all_trips_v2.head()

In [None]:
all_trips_v2.tail()

# Conduct Descriptive Analysis 

In [None]:
# Check the mean, median, mode, max, and min of ride_length
mean_ride_length = all_trips_v2['ride_length'].mean()
median_ride_length = all_trips_v2['ride_length'].median()
mode_ride_length = all_trips_v2['ride_length'].mode()
max_ride_length = all_trips_v2['ride_length'].max()
min_ride_length = all_trips_v2['ride_length'].min()

# Print out the values for each
print(f"Mean: {mean_ride_length}")
print(f"Median: {median_ride_length}")
print(f"Mode: {mode_ride_length.values}")
print(f"Max: {max_ride_length}")
print(f"Min: {min_ride_length}")

In [None]:
# Compare mean, median, mode, max and min between member types
print(all_trips_v2.groupby('member_casual')['ride_length'].mean())
print(all_trips_v2.groupby('member_casual')['ride_length'].median())
print(all_trips_v2.groupby('member_casual')['ride_length'].max())
print(all_trips_v2.groupby('member_casual')['ride_length'].min())
