The number of bike-sharing trips is a classic example of a time series, as it reflects the
demand levels at various stations over time. Leveraging historical trip data allows us to
train models and forecast future demand.
# Question 1: Load the trip data from 2013 to 2023

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import os

### The functions below for processing data from 2013-2019

In [2]:
def process_citibike_data(df):
    # Standardize the column names to handle variations
    df.columns = df.columns.str.strip()  # Remove leading/trailing spaces

    # Rename columns with variations to a consistent name
    if 'Start Time' in df.columns or 'start Time' in df.columns:
        df.rename(columns={'Start Time': 'starttime', 'start Time': 'starttime'}, inplace=True)
    
    if 'Gender' in df.columns:
        df.rename(columns={'Gender': 'gender'}, inplace=True)
    
    if 'User Type' in df.columns:
        df.rename(columns={'User Type': 'usertype'}, inplace=True)

    # Rename other columns with variations
    if 'Bike ID' in df.columns:
        df.rename(columns={'Bike ID': 'bikeid'}, inplace=True)
    if 'End Station ID' in df.columns:
        df.rename(columns={'End Station ID': 'end station id'}, inplace=True)
    if 'End Station Name' in df.columns:
        df.rename(columns={'End Station Name': 'end station name'}, inplace=True)
    if 'Start Station Name' in df.columns:
        df.rename(columns={'Start Station Name': 'start station id'}, inplace=True)
    if 'Trip Duration' in df.columns:
        df.rename(columns={'Trip Duration': 'tripduration'}, inplace=True)

    # Convert 'starttime' column to datetime
    df['starttime'] = pd.to_datetime(df['starttime'])

    # One-hot encode 'gender' and 'usertype' columns
    df = pd.get_dummies(df, columns=['gender', 'usertype'], prefix=['gender', 'usertype'])

    # Set starttime as index and resample daily
    df.set_index('starttime', inplace=True)

    # Resample daily and aggregate
    daily_data = df.resample('D').agg({
        'tripduration': 'mean',                       # Average trip duration per day
        'bikeid': 'count',                            # Total trips per day
        'start station id': 'nunique',                # Unique start stations per day
        'end station id': 'nunique',                  # Unique end stations per day
        'end station name': 'nunique',                # Unique end station names per day
        'gender_0': 'sum',                            # Total count of gender 0
        'gender_1': 'sum',                            # Total count of gender 1
        'gender_2': 'sum',                            # Total count of gender 2
        'usertype_Customer': 'sum',                   # Total count of Customer
        'usertype_Subscriber': 'sum',                 # Total count of Subscriber
    }).rename(columns={
        'bikeid': 'total_trips', 
        'tripduration': 'avg_tripduration', 
        'start station id': 'unique_start_stations', 
        'end station id': 'unique_end_stations',
        'end station name': 'unique_end_station_names'
    })

    return daily_data

def process_citibike_trip_data_for_year_old(year):
    # Initialize an empty list to store the processed daily data for the year
    all_daily_data = []

    # List of months to process (from 01 to 12)
    months = [f"{month:02d}" for month in range(1, 13)]
    
    # Process each month for the given year
    for month in months:
        # Loop through each month's files (in case there are multiple for each month)
        for i in range(1, 4):  # Adjust the range based on the number of files per month (e.g., 1 to 3)
            try:
                # Construct the file name for each file
                file_path = f"Dataset/{year}{month}-citibike-tripdata_{i}.csv"
                
                # Read the CSV file
                df = pd.read_csv(file_path)
                
                # Process the data
                daily_data = process_citibike_data(df)  # No need to pass year, it's not needed here
                
                # Append the result to the list
                all_daily_data.append(daily_data)
            except FileNotFoundError:
                print(f"File not found: {file_path}")
            except KeyError as e:
                print(f"KeyError: {e} in file {file_path}")
            except Exception as e:
                print(f"Error: {e} in file {file_path}")

    # Concatenate all the daily data DataFrames into one
    final_daily_data = pd.concat(all_daily_data, axis=0)
    
    # Resample daily and aggregate by the date for the given year
    final_daily_data = final_daily_data.groupby(final_daily_data.index.date).agg({
        'avg_tripduration': 'mean',                       # Average trip duration per day
        'total_trips': 'sum',                              # Total count of trips per day (sum)
        'unique_start_stations': 'sum',                    # Sum of unique start stations per day
        'unique_end_stations': 'sum',                      # Sum of unique end stations per day
        'usertype_Customer': 'sum',                        # Total count of Customer per day
        'usertype_Subscriber': 'sum',                      # Total count of Subscriber per day
    })

    return final_daily_data


### Load the trip data from 2013 to 2023 (from 2013 to 2019) by passing them in the function above and concatinating them

In [10]:
final_daily_data_2013 = process_citibike_trip_data_for_year_old(2013)
final_daily_data_2014 = process_citibike_trip_data_for_year_old(2014)
final_daily_data_2015 = process_citibike_trip_data_for_year_old(2015)
final_daily_data_2016 = process_citibike_trip_data_for_year_old(2016)
final_daily_data_2017 = process_citibike_trip_data_for_year_old(2017)
final_daily_data_2018 = process_citibike_trip_data_for_year_old(2018)
final_daily_data_2019 = process_citibike_trip_data_for_year_old(2019)

# Combine all the yearly data into a single DataFrame
final_daily_data_all_years = pd.concat([
    final_daily_data_2013,
    final_daily_data_2014,
    final_daily_data_2015,
    final_daily_data_2016,
    final_daily_data_2017,
    final_daily_data_2018,
    final_daily_data_2019
], axis=0)

File not found: Dataset/201301-citibike-tripdata_1.csv
File not found: Dataset/201301-citibike-tripdata_2.csv
File not found: Dataset/201301-citibike-tripdata_3.csv
File not found: Dataset/201302-citibike-tripdata_1.csv
File not found: Dataset/201302-citibike-tripdata_2.csv
File not found: Dataset/201302-citibike-tripdata_3.csv
File not found: Dataset/201303-citibike-tripdata_1.csv
File not found: Dataset/201303-citibike-tripdata_2.csv
File not found: Dataset/201303-citibike-tripdata_3.csv
File not found: Dataset/201304-citibike-tripdata_1.csv
File not found: Dataset/201304-citibike-tripdata_2.csv
File not found: Dataset/201304-citibike-tripdata_3.csv
File not found: Dataset/201305-citibike-tripdata_1.csv
File not found: Dataset/201305-citibike-tripdata_2.csv
File not found: Dataset/201305-citibike-tripdata_3.csv
File not found: Dataset/201306-citibike-tripdata_2.csv
File not found: Dataset/201306-citibike-tripdata_3.csv
File not found: Dataset/201307-citibike-tripdata_2.csv
File not f

In [11]:
final_daily_data_all_years

Unnamed: 0,avg_tripduration,total_trips,unique_start_stations,unique_end_stations,usertype_Customer,usertype_Subscriber
2013-06-01,2507.860124,8722,328,327,3583,5139
2013-06-02,2066.265919,15971,332,332,10736,5235
2013-06-03,1938.590945,7598,325,324,2985,4613
2013-06-04,3116.571981,15782,329,330,6954,8828
2013-06-05,2468.716953,15690,331,331,7071,8619
...,...,...,...,...,...,...
2019-12-27,984.353244,31808,870,863,4576,27232
2019-12-28,1045.719074,28520,861,863,6528,21992
2019-12-29,1120.374777,17968,847,848,3469,14499
2019-12-30,716.906327,12138,808,810,750,11388


### This function is to preprocess for data from 2020 to 2022 as they have different table structures

In [4]:
# Different funtion was created as data from 2020 to 2022 had a different dictionary

def process_citibike_trip_data_new(df, start_date):
    # Ensure 'started_at' is in datetime format
    df['started_at'] = pd.to_datetime(df['started_at'], errors='coerce')

    # Filter the DataFrame for entries after the specified start date
    df = df[df['started_at'] > pd.to_datetime(start_date)]

    # Rename columns if they exist
    if 'ride_id' in df.columns:
        df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
    if 'member_casual' in df.columns:
        df.rename(columns={'member_casual': 'usertype'}, inplace=True)
    if 'usertype' in df.columns:
        df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
    if 'start_station_id' in df.columns:
        df.rename(columns={'start_station_id': 'start station id'}, inplace=True)
    if 'end_station_id' in df.columns:
        df.rename(columns={'end_station_id': 'end station id'}, inplace=True)
    if 'started_at' in df.columns:
        df.rename(columns={'started_at': 'starttime'}, inplace=True)

    # Convert 'starttime' and 'ended_at' to datetime if they exist
    if 'starttime' in df.columns:
        df['starttime'] = pd.to_datetime(df['starttime'], errors='coerce')
    if 'ended_at' in df.columns:
        df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')

    # Calculate 'tripduration' if both 'starttime' and 'ended_at' exist
    if 'starttime' in df.columns and 'ended_at' in df.columns:
        df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()

    # One-hot encode 'usertype' if it exists
    if 'usertype' in df.columns:
        df = pd.get_dummies(df, columns=['usertype'], prefix=['usertype'])

    # Set 'starttime' as index
    df.set_index('starttime', inplace=True)

    # Resample daily and aggregate
    daily_data = df.resample('D').agg({
        'tripduration': 'mean',                       # Average trip duration per day
        'bikeid': 'count',                            # Total trips per day
        'start station id': 'nunique',                # Unique start stations per day
        'end station id': 'nunique',                  # Unique end stations per day
        'usertype_Customer': 'sum',                   # Total count of Customer
        'usertype_Subscriber': 'sum',                 # Total count of Subscriber
    }).rename(columns={
        'bikeid': 'total_trips', 
        'tripduration': 'avg_tripduration', 
        'start station id': 'unique_start_stations', 
        'end station id': 'unique_end_stations',
    })

    return daily_data

# Define a function to process all files for a given year
def process_citibike_trip_data_for_year(year):
    # List of months to process (from 01 to 12)
    months = [f"{month:02d}" for month in range(1, 13)]
    
    # Initialize an empty list to store the processed daily data
    all_daily_data = []

    for month in months:
        # Loop through each month's files (in case there are multiple for each month)
        for i in range(1, 4):  # Adjust the range based on the number of files per month (e.g., 1 to 3)
            try:
                # Construct the file name for each file
                file_path = f"Datasets/{year}{month}-citibike-tripdata_{i}.csv"
                
                # Read the CSV file
                df = pd.read_csv(file_path)
                
                # Process the data
                daily_data = process_citibike_trip_data_new(df, f"{year}-{month}-01")
                
                # Append the result to the list
                all_daily_data.append(daily_data)
            except FileNotFoundError:
                print(f"File not found: {file_path}")
            except KeyError as e:
                print(f"KeyError: {e} in file {file_path}")
            except Exception as e:
                print(f"Error: {e} in file {file_path}")

    # Concatenate all the daily data DataFrames into one
    final_daily_data = pd.concat(all_daily_data)
    
    # Resample daily and aggregate by the date
    final_daily_data = final_daily_data.groupby(final_daily_data.index.date).agg({
        'avg_tripduration': 'mean',                       # Average trip duration per day
        'total_trips': 'sum',                              # Total count of trips per day (sum)
        'unique_start_stations': 'sum',                    # Sum of unique start stations per day
        'unique_end_stations': 'sum',                      # Sum of unique end stations per day
        'usertype_Customer': 'sum',                   # Total count of Customer per day
        'usertype_Subscriber': 'sum',                 # Total count of Subscriber per day
    })

    return final_daily_data

### Load the trip data from 2020 to 2023 (from 2020 to 2022) by passing them in the function above

In [6]:
final_daily_data_2020 = process_citibike_trip_data_for_year(2020)
final_daily_data_2021 = process_citibike_trip_data_for_year(2021)
final_daily_data_2022 = process_citibike_trip_data_for_year(2022)

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202001-citibike-tripdata_3.csv
File not found: Datasets/202002-citibike-tripdata_1.csv
File not found: Datasets/202002-citibike-tripdata_2.csv
File not found: Datasets/202002-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202003-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202004-citibike-tripdata_2.csv
File not found: Datasets/202004-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202005-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202006-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202011-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202012-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202101-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202102-citibike-tripdata_2.csv
File not found: Datasets/202102-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202103-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202109-citibike-tripdata_1.csv
File not found: Datasets/202109-citibike-tripdata_2.csv
File not found: Datasets/202109-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202112-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202201-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202202-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202203-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['starttime'] = pd.to_datetime(df['starttime'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds(

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

File not found: Datasets/202212-citibike-tripdata_3.csv


### Concatinating data from 2020 to 2023

In [7]:
# Combine all the yearly data into a single DataFrame
final_daily_data_all_years_new = pd.concat([
    final_daily_data_2020,
    final_daily_data_2021,
    final_daily_data_2022,
], axis=0)

final_daily_data_all_years_new


Unnamed: 0,avg_tripduration,total_trips,unique_start_stations,unique_end_stations,usertype_Customer,usertype_Subscriber
2020-01-01,1064.574958,18170,2594,1222,3169,15001
2020-01-02,844.852008,41625,2916,1278,4332,37293
2020-01-03,792.627478,31927,2855,1259,2512,29415
2020-01-04,847.713359,29495,2883,1276,3393,26102
2020-01-05,866.963623,27647,2845,1262,2881,24766
...,...,...,...,...,...,...
2022-12-27,786.805191,33044,4857,1788,4232,28812
2022-12-28,909.072184,44027,5211,1835,7032,36995
2022-12-29,945.735399,51674,5342,1843,9581,42093
2022-12-30,1052.715608,62298,5494,1873,15021,47277


### Finally concating the two final datasets one from 2013 to 2019 and 2020 to 2022 to form a single dataset

In [None]:
final_daily_data_all_years = pd.read_csv('')

In [12]:
df = pd.concat([
    final_daily_data_all_years,
    final_daily_data_all_years_new], axis=0)
df

Unnamed: 0,avg_tripduration,total_trips,unique_start_stations,unique_end_stations,usertype_Customer,usertype_Subscriber
2013-06-01,2507.860124,8722,328,327,3583,5139
2013-06-02,2066.265919,15971,332,332,10736,5235
2013-06-03,1938.590945,7598,325,324,2985,4613
2013-06-04,3116.571981,15782,329,330,6954,8828
2013-06-05,2468.716953,15690,331,331,7071,8619
...,...,...,...,...,...,...
2022-12-27,786.805191,33044,4857,1788,4232,28812
2022-12-28,909.072184,44027,5211,1835,7032,36995
2022-12-29,945.735399,51674,5342,1843,9581,42093
2022-12-30,1052.715608,62298,5494,1873,15021,47277


### Exporting the final dataset

In [13]:
df.to_csv("final_dataset.csv")

## Preprocessing the 2023 data set so that it can be used as the test dataset

In [14]:
final_daily_data_2023 = process_citibike_trip_data_for_year(2023)
final_daily_data_2023

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202301-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202302-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['starttime'] = pd.to_datetime(df['starttime'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds(

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

Unnamed: 0,avg_tripduration,total_trips,unique_start_stations,unique_end_stations,usertype_Customer,usertype_Subscriber
2023-01-01,1328.998497,50642,5639,1999,15842,34800
2023-01-02,995.238437,58240,5751,2047,13691,44549
2023-01-03,818.404087,51789,5592,2018,5978,45811
2023-01-04,884.513871,74453,5988,2078,10636,63817
2023-01-05,780.155063,71440,5910,2066,8897,62543
...,...,...,...,...,...,...
2023-12-27,734.784758,42288,4829,2478,6288,36000
2023-12-28,781.788595,46808,4914,2507,7493,39315
2023-12-29,874.132830,69036,5221,2579,15058,53978
2023-12-30,838.007221,56099,5023,2546,12301,43798


In [15]:
final_daily_data_2023.to_csv("2023.csv")