# Question 4
Examine the daily demand change pattern using the total number of trips recorded at
each hour of the day (Task 1, Question 3). You may observe different patterns across
stations.

In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
import os

###  The functions below for processing data from 2013-2019

In [1]:
def process_citibike_data(df):
    # Standardize the column names to handle variations
    df.columns = df.columns.str.strip()  # Remove leading/trailing spaces

    # Rename columns with variations to a consistent name
    if 'Start Time' in df.columns or 'start Time' in df.columns:
        df.rename(columns={'Start Time': 'starttime', 'start Time': 'starttime'}, inplace=True)
    
    if 'Gender' in df.columns:
        df.rename(columns={'Gender': 'gender'}, inplace=True)
    
    if 'User Type' in df.columns:
        df.rename(columns={'User Type': 'usertype'}, inplace=True)

    # Rename other columns with variations
    if 'Bike ID' in df.columns:
        df.rename(columns={'Bike ID': 'bikeid'}, inplace=True)
    if 'End Station ID' in df.columns:
        df.rename(columns={'End Station ID': 'end station id'}, inplace=True)
    if 'End Station Name' in df.columns:
        df.rename(columns={'End Station Name': 'end station name'}, inplace=True)
    if 'Start Station Name' in df.columns:
        df.rename(columns={'Start Station Name': 'start station id'}, inplace=True)
    if 'Trip Duration' in df.columns:
        df.rename(columns={'Trip Duration': 'tripduration'}, inplace=True)

    # Convert 'starttime' column to datetime
    df['starttime'] = pd.to_datetime(df['starttime'])

    # One-hot encode 'gender' and 'usertype' columns
    df = pd.get_dummies(df, columns=['gender', 'usertype'], prefix=['gender', 'usertype'])

    # Set starttime as index
    df.set_index('starttime', inplace=True)

    # Resample hourly and aggregate
    hourly_data = df.resample('H').agg({
        'tripduration': 'mean',                       # Average trip duration per hour
        'bikeid': 'count',                            # Total trips per hour
        'start station id': 'nunique',                # Unique start stations per hour
        'end station id': 'nunique',                  # Unique end stations per hour
        'end station name': 'nunique',                # Unique end station names per hour
        'gender_0': 'sum',                            # Total count of gender 0
        'gender_1': 'sum',                            # Total count of gender 1
        'gender_2': 'sum',                            # Total count of gender 2
        'usertype_Customer': 'sum',                   # Total count of Customer
        'usertype_Subscriber': 'sum',                 # Total count of Subscriber
    }).rename(columns={
        'bikeid': 'total_trips', 
        'tripduration': 'avg_tripduration', 
        'start station id': 'unique_start_stations', 
        'end station id': 'unique_end_stations',
        'end station name': 'unique_end_station_names'
    })

    return hourly_data

def process_citibike_trip_data_for_year_old(year):
    # Initialize an empty list to store the processed hourly data for the year
    all_hourly_data = []

    # List of months to process (from 01 to 12)
    months = [f"{month:02d}" for month in range(1, 13)]
    
    # Process each month for the given year
    for month in months:
        # Loop through each month's files (in case there are multiple for each month)
        for i in range(1, 4):  # Adjust the range based on the number of files per month (e.g., 1 to 3)
            try:
                # Construct the file name for each file
                file_path = f"../../Dataset/{year}{month}-citibike-tripdata_{i}.csv"
                
                # Read the CSV file
                df = pd.read_csv(file_path)
                
                # Process the data
                hourly_data = process_citibike_data(df)  # No need to pass year, it's not needed here
                
                # Append the result to the list
                all_hourly_data.append(hourly_data)
            except FileNotFoundError:
                print(f"File not found: {file_path}")
            except KeyError as e:
                print(f"KeyError: {e} in file {file_path}")
            except Exception as e:
                print(f"Error: {e} in file {file_path}")

    # Concatenate all the hourly data DataFrames into one
    final_hourly_data = pd.concat(all_hourly_data, axis=0)
    
    # Resample hourly and aggregate by the hour for the given year
    final_hourly_data = final_hourly_data.groupby(final_hourly_data.index).agg({
        'avg_tripduration': 'mean',                       # Average trip duration per hour
        'total_trips': 'sum',                              # Total count of trips per hour (sum)
        'unique_start_stations': 'sum',                    # Sum of unique start stations per hour
        'unique_end_stations': 'sum',                      # Sum of unique end stations per hour
        'usertype_Customer': 'sum',                        # Total count of Customer per hour
        'usertype_Subscriber': 'sum',                      # Total count of Subscriber per hour
    })

    return final_hourly_data


### Load the trip data from 2013 to 2023 (from 2013 to 2019) by passing them in the function above and concatinating them

In [8]:
final_daily_data_2013 = process_citibike_trip_data_for_year_old(2013)
final_daily_data_2014 = process_citibike_trip_data_for_year_old(2014)
final_daily_data_2015 = process_citibike_trip_data_for_year_old(2015)
final_daily_data_2016 = process_citibike_trip_data_for_year_old(2016)
final_daily_data_2017 = process_citibike_trip_data_for_year_old(2017)
final_daily_data_2018 = process_citibike_trip_data_for_year_old(2018)
final_daily_data_2019 = process_citibike_trip_data_for_year_old(2019)

# Combine all the yearly data into a single DataFrame
final_daily_data_all_years = pd.concat([
    final_daily_data_2013,
    final_daily_data_2014,
    final_daily_data_2015,
    final_daily_data_2016,
    final_daily_data_2017,
    final_daily_data_2018,
    final_daily_data_2019
], axis=0)

File not found: ../../Dataset/201301-citibike-tripdata_1.csv
File not found: ../../Dataset/201301-citibike-tripdata_2.csv
File not found: ../../Dataset/201301-citibike-tripdata_3.csv
File not found: ../../Dataset/201302-citibike-tripdata_1.csv
File not found: ../../Dataset/201302-citibike-tripdata_2.csv
File not found: ../../Dataset/201302-citibike-tripdata_3.csv
File not found: ../../Dataset/201303-citibike-tripdata_1.csv
File not found: ../../Dataset/201303-citibike-tripdata_2.csv
File not found: ../../Dataset/201303-citibike-tripdata_3.csv
File not found: ../../Dataset/201304-citibike-tripdata_1.csv
File not found: ../../Dataset/201304-citibike-tripdata_2.csv
File not found: ../../Dataset/201304-citibike-tripdata_3.csv
File not found: ../../Dataset/201305-citibike-tripdata_1.csv
File not found: ../../Dataset/201305-citibike-tripdata_2.csv
File not found: ../../Dataset/201305-citibike-tripdata_3.csv
File not found: ../../Dataset/201306-citibike-tripdata_1.csv
File not found: ../../Da

ValueError: No objects to concatenate

In [5]:
final_daily_data_all_years

Unnamed: 0_level_0,avg_tripduration,total_trips,unique_start_stations,unique_end_stations,usertype_Customer,usertype_Subscriber
starttime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-06-01 00:00:00,3363.506579,152,101,105,36,116
2013-06-01 01:00:00,1591.509804,102,74,71,29,73
2013-06-01 02:00:00,2167.194030,67,52,55,27,40
2013-06-01 03:00:00,2669.121951,41,32,31,19,22
2013-06-01 04:00:00,1396.687500,16,15,13,3,13
...,...,...,...,...,...,...
2019-12-31 19:00:00,1101.596072,1171,481,487,155,1016
2019-12-31 20:00:00,973.432545,719,365,382,98,621
2019-12-31 21:00:00,3862.616216,555,325,311,86,469
2019-12-31 22:00:00,885.276786,560,302,296,126,434


### This function is to preprocess for data from 2020 to 2022 as they have different table structures

In [27]:
def process_citibike_trip_data_new(df, start_date):
    # Ensure 'started_at' is in datetime format
    df['started_at'] = pd.to_datetime(df['started_at'], errors='coerce')

    # Filter the DataFrame for entries after the specified start date
    df = df[df['started_at'] > pd.to_datetime(start_date)]

    # Rename columns if they exist
    if 'ride_id' in df.columns:
        df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
    if 'member_casual' in df.columns:
        df.rename(columns={'member_casual': 'usertype'}, inplace=True)
    if 'usertype' in df.columns:
        df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
    if 'start_station_id' in df.columns:
        df.rename(columns={'start_station_id': 'start station id'}, inplace=True)
    if 'end_station_id' in df.columns:
        df.rename(columns={'end_station_id': 'end station id'}, inplace=True)
    if 'started_at' in df.columns:
        df.rename(columns={'started_at': 'starttime'}, inplace=True)

    # Convert 'starttime' and 'ended_at' to datetime if they exist
    if 'starttime' in df.columns:
        df['starttime'] = pd.to_datetime(df['starttime'], errors='coerce')
    if 'ended_at' in df.columns:
        df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')

    # Calculate 'tripduration' if both 'starttime' and 'ended_at' exist
    if 'starttime' in df.columns and 'ended_at' in df.columns:
        df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()

    # One-hot encode 'usertype' if it exists
    if 'usertype' in df.columns:
        df = pd.get_dummies(df, columns=['usertype'], prefix=['usertype'])

    # Set 'starttime' as index
    df.set_index('starttime', inplace=True)

    # Resample hourly and aggregate
    hourly_data = df.resample('H').agg({
        'bikeid': 'count',                            # Total trips per hour
    }).rename(columns={
        'bikeid': 'total_trips',    
    })

    return hourly_data

def process_citibike_trip_data_for_year(year):
    # List of months to process (from 01 to 12)
    months = [f"{month:02d}" for month in range(1, 13)]
    
    # Initialize an empty list to store the processed hourly data
    all_hourly_data = []

    for month in months:
        # Loop through each month's files (in case there are multiple for each month)
        for i in range(1, 4):  # Adjust the range based on the number of files per month (e.g., 1 to 3)
            try:
                # Construct the file name for each file
                file_path = f"Datasets/{year}{month}-citibike-tripdata_{i}.csv"
                
                # Read the CSV file
                df = pd.read_csv(file_path)
                
                # Process the data
                hourly_data = process_citibike_trip_data_new(df, f"{year}-{month}-01")
                
                # Append the result to the list
                all_hourly_data.append(hourly_data)
            except FileNotFoundError:
                print(f"File not found: {file_path}")
            except KeyError as e:
                print(f"KeyError: {e} in file {file_path}")
            except Exception as e:
                print(f"Error: {e} in file {file_path}")

    # Concatenate all the hourly data DataFrames into one
    final_hourly_data = pd.concat(all_hourly_data)
    
    return final_hourly_data


### Load the trip data from 2020 to 2023 (from 2020 to 2022) by passing them in the function above and concatinating them

In [31]:
final_daily_data_2020 = process_citibike_trip_data_for_year(2020)
final_daily_data_2021 = process_citibike_trip_data_for_year(2021)
final_daily_data_2022 = process_citibike_trip_data_for_year(2022)
final_daily_data_2023 = process_citibike_trip_data_for_year(2023)

# Combine all the yearly data into a single DataFrame
final_daily_data_all_years_new = pd.concat([
    final_daily_data_2020,
    final_daily_data_2021,
    final_daily_data_2022,
    final_daily_data_2023,
], axis=0)

final_daily_data_all_years_new


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202001-citibike-tripdata_3.csv
File not found: Datasets/202002-citibike-tripdata_1.csv
File not found: Datasets/202002-citibike-tripdata_2.csv
File not found: Datasets/202002-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202003-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202004-citibike-tripdata_2.csv
File not found: Datasets/202004-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202005-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202006-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202011-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202012-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202101-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202102-citibike-tripdata_2.csv
File not found: Datasets/202102-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202103-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,co

File not found: Datasets/202109-citibike-tripdata_1.csv
File not found: Datasets/202109-citibike-tripdata_2.csv
File not found: Datasets/202109-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202112-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202201-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202202-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202203-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['starttime'] = pd.to_datetime(df['starttime'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds(

File not found: Datasets/202212-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202301-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

File not found: Datasets/202302-citibike-tripdata_3.csv


  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'member_casual': 'usertype'}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['usertype'] = df['usertype'].map({'member': 'Subscriber', 'casual': 'Customer'})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in t

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['ended_at'] = pd.to_datetime(df['ended_at'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['tripduration'] = (df['ended_at'] - df['starttime']).dt.total_seconds()
  df = pd.read_csv(file_path)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.rename(columns={'ride_id': 'bikeid'}, inplace=True)
A value is trying to be set on a copy

Unnamed: 0_level_0,total_trips
starttime,Unnamed: 1_level_1
2020-01-01 00:00:00,413
2020-01-01 01:00:00,496
2020-01-01 02:00:00,380
2020-01-01 03:00:00,195
2020-01-01 04:00:00,102
...,...
2023-12-31 19:00:00,277
2023-12-31 20:00:00,225
2023-12-31 21:00:00,215
2023-12-31 22:00:00,200


### Combining the two dataset to get final datset

In [49]:
import pandas as pd

# Concatenate the 'total_trips' columns, preserving the index
combined_total_trips = pd.concat(
    [final_daily_data_all_years['total_trips'], final_daily_data_all_years_new['total_trips']],
    axis=0
)

# View the combined Series
combined_total_trips

starttime
2013-06-01 00:00:00    152
2013-06-01 01:00:00    102
2013-06-01 02:00:00     67
2013-06-01 03:00:00     41
2013-06-01 04:00:00     16
                      ... 
2023-12-31 19:00:00    277
2023-12-31 20:00:00    225
2023-12-31 21:00:00    215
2023-12-31 22:00:00    200
2023-12-31 23:00:00    188
Name: total_trips, Length: 144912, dtype: int64

In [50]:
# Reset index
combined_total_trips = combined_total_trips.reset_index()

# Group by 'starttime' and sum
combined_total_trips = combined_total_trips.groupby('starttime').sum()

# Reset index again
combined_total_trips = combined_total_trips.reset_index()

# # Display the DataFrame
combined_total_trips = combined_total_trips[['starttime','total_trips']]

In [51]:
combined_total_trips

Unnamed: 0,starttime,total_trips
0,2013-06-01 00:00:00,152
1,2013-06-01 01:00:00,102
2,2013-06-01 02:00:00,67
3,2013-06-01 03:00:00,41
4,2013-06-01 04:00:00,16
...,...,...
91363,2023-12-31 19:00:00,2749
91364,2023-12-31 20:00:00,2579
91365,2023-12-31 21:00:00,2152
91366,2023-12-31 22:00:00,1970


### Exporting

In [52]:
combined_total_trips.to_csv('final_clustering.csv')