In [1]:
import pandas as pd
import os
from datetime import datetime

In [2]:
directory = 'Resources/'

# Initialize an empty list to store DataFrames
dfs = []

# Iterate through the CSV files in the directory
for filename in os.listdir(directory):
    if filename.endswith(".csv"):
        file_path = os.path.join(directory, filename)
        df = pd.read_csv(file_path)
        dfs.append(df)

# Combine the DataFrames in the list into one DataFrame
citibike22_df = pd.concat(dfs, ignore_index=True)

In [3]:
citibike22_df.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual
0,3255D3E3F33CDC45,classic_bike,2022-03-18 15:38:17,2022-03-18 15:45:34,Mama Johnson Field - 4 St & Jackson St,HB404,South Waterfront Walkway - Sinatra Dr & 1 St,HB103,40.74314,-74.040041,40.736982,-74.027781,casual
1,17FA5604A37338F9,electric_bike,2022-03-04 16:44:48,2022-03-04 16:50:45,Baldwin at Montgomery,JC020,Grove St PATH,JC005,40.723659,-74.064194,40.719586,-74.043117,member
2,7DEC9ADDB8D6BBE1,electric_bike,2022-03-13 17:44:32,2022-03-13 17:54:44,Baldwin at Montgomery,JC020,Grove St PATH,JC005,40.723659,-74.064194,40.719586,-74.043117,member
3,9D69F74EEF231A2E,classic_bike,2022-03-13 15:33:47,2022-03-13 15:41:22,Baldwin at Montgomery,JC020,Grove St PATH,JC005,40.723659,-74.064194,40.719586,-74.043117,member
4,C84AE4A9D78A6347,classic_bike,2022-03-11 12:21:18,2022-03-11 12:33:24,Baldwin at Montgomery,JC020,Grove St PATH,JC005,40.723659,-74.064194,40.719586,-74.043117,member


In [4]:
column_rename_dict = {
    'ride_id': 'Ride ID',
    'rideable_type': 'Bike Type',
    'started_at': 'Started Ride',
    'ended_at': 'Ended Ride',
    'start_station_name': 'Start Station Name',
    'start_station_id': 'Start Station ID',
    'end_station_name': 'End Station Name',
    'end_station_id': 'End Station ID',
    'start_lat': 'Starting Latitude',
    'start_lng': 'Starting Longitude',
    'end_lat': 'Ending Latitude',
    'end_lng': 'Ending Longitude',
    'member_casual': 'User Type'
    
}


citibike22_df = citibike22_df.rename(columns=column_rename_dict)
citibike22_df.tail()

Unnamed: 0,Ride ID,Bike Type,Started Ride,Ended Ride,Start Station Name,Start Station ID,End Station Name,End Station ID,Starting Latitude,Starting Longitude,Ending Latitude,Ending Longitude,User Type
895480,D438F1622839AC50,classic_bike,2022-12-06 15:43:38,2022-12-06 15:53:57,Dey St,JC065,Riverview Park,JC057,40.737828,-74.067083,40.744319,-74.043991,member
895481,747A63A8E782D171,electric_bike,2022-12-08 08:17:51,2022-12-08 08:23:33,9 St HBLR - Jackson St & 8 St,HB305,City Hall - Washington St & 1 St,HB105,40.747907,-74.038412,40.73736,-74.03097,casual
895482,AE090858CFDE6E82,electric_bike,2022-12-23 14:10:07,2022-12-23 14:14:18,Mama Johnson Field - 4 St & Jackson St,HB404,City Hall - Washington St & 1 St,HB105,40.74314,-74.040041,40.73736,-74.03097,member
895483,B3CC8E70AF4E259C,classic_bike,2022-12-02 04:43:25,2022-12-02 04:46:55,Mama Johnson Field - 4 St & Jackson St,HB404,City Hall - Washington St & 1 St,HB105,40.743135,-74.04008,40.73736,-74.03097,member
895484,176B601F21327350,classic_bike,2022-12-30 14:50:17,2022-12-30 14:55:37,14 St Ferry - 14 St & Shipyard Ln,HB202,City Hall - Washington St & 1 St,HB105,40.752747,-74.024035,40.73736,-74.03097,member


In [5]:
citibike22_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 895485 entries, 0 to 895484
Data columns (total 13 columns):
 #   Column              Non-Null Count   Dtype  
---  ------              --------------   -----  
 0   Ride ID             895485 non-null  object 
 1   Bike Type           895485 non-null  object 
 2   Started Ride        895485 non-null  object 
 3   Ended Ride          895485 non-null  object 
 4   Start Station Name  895475 non-null  object 
 5   Start Station ID    895475 non-null  object 
 6   End Station Name    892281 non-null  object 
 7   End Station ID      892281 non-null  object 
 8   Starting Latitude   895485 non-null  float64
 9   Starting Longitude  895485 non-null  float64
 10  Ending Latitude     893515 non-null  float64
 11  Ending Longitude    893515 non-null  float64
 12  User Type           895485 non-null  object 
dtypes: float64(4), object(9)
memory usage: 88.8+ MB


In [6]:
citibike22_df.dropna()

citibike22_df = citibike22_df.drop_duplicates()

citibike22_df = citibike22_df.reset_index(drop=True)

In [7]:
citibike22_df['Started Ride'] = pd.to_datetime(citibike22_df['Started Ride'])
citibike22_df['Month'] = citibike22_df['Started Ride'].dt.month
citibike22_df = citibike22_df.sort_values(by='Month')
citibike22_df.head()

Unnamed: 0,Ride ID,Bike Type,Started Ride,Ended Ride,Start Station Name,Start Station ID,End Station Name,End Station ID,Starting Latitude,Starting Longitude,Ending Latitude,Ending Longitude,User Type,Month
58672,79E5141AA29B983B,classic_bike,2022-01-18 16:35:19,2022-01-18 16:42:58,7 St & Monroe St,HB304,Hoboken Terminal - Hudson St & Hudson Pl,HB101,40.746413,-74.037977,40.735938,-74.030305,member,1
69519,D98524150BC64CCD,classic_bike,2022-01-19 18:44:11,2022-01-19 18:48:41,8 St & Washington St,HB603,9 St HBLR - Jackson St & 8 St,HB305,40.745984,-74.028199,40.747907,-74.038412,member,1
69518,6F36E358655212DE,classic_bike,2022-01-08 17:35:44,2022-01-08 18:12:35,8 St & Washington St,HB603,Washington St,JC098,40.745984,-74.028199,40.724294,-74.035483,member,1
69517,3CD1733725FEFE63,classic_bike,2022-01-02 19:56:50,2022-01-02 20:13:44,Hamilton Park,JC009,Jersey & 6th St,JC027,40.727596,-74.044247,40.725289,-74.045572,member,1
69516,6023A999BA25C364,classic_bike,2022-01-06 23:07:36,2022-01-06 23:09:15,Hamilton Park,JC009,Jersey & 6th St,JC027,40.727596,-74.044247,40.725289,-74.045572,member,1


In [8]:
# Define a dictionary to map month numbers to month names
month_mapping = {
    1: 'January',
    2: 'February',
    3: 'March',
    4: 'April',
    5: 'May',
    6: 'June',
    7: 'July',
    8: 'August',
    9: 'September',
    10: 'October',
    11: 'November',
    12: 'December'
}

# Use the .replace() method to rename the 'Start Month' values
citibike22_df['Month'] = citibike22_df['Month'].replace(month_mapping)

In [15]:
citibike22_df['Started Ride'] = pd.to_datetime(citibike22_df['Started Ride'])
citibike22_df['Day of Week'] = citibike22_df['Started Ride'].dt.day_name()

In [16]:
citibike22_df['Started Ride'] = pd.to_datetime(citibike22_df['Started Ride'])
citibike22_df['Time of Day'] = citibike22_df['Started Ride'].dt.hour
# Define a function to convert 24-hour time to AM/PM format
def convert_to_ampm(hour):
    if hour < 12:
        return f"{hour} AM"
    elif hour == 12:
        return "12 PM"
    else:
        return f"{hour - 12} PM"

# Apply the function to the 'Time of Day' column
citibike22_df['Time of Day'] = citibike22_df['Time of Day'].apply(convert_to_ampm)
citibike22_df.head()

Unnamed: 0,Ride ID,Bike Type,Started Ride,Ended Ride,Start Station Name,Start Station ID,End Station Name,End Station ID,Starting Latitude,Starting Longitude,Ending Latitude,Ending Longitude,User Type,Month,Time of Day,Day of Week
58672,79E5141AA29B983B,classic_bike,2022-01-18 16:35:19,2022-01-18 16:42:58,7 St & Monroe St,HB304,Hoboken Terminal - Hudson St & Hudson Pl,HB101,40.746413,-74.037977,40.735938,-74.030305,member,January,4 PM,Tuesday
69519,D98524150BC64CCD,classic_bike,2022-01-19 18:44:11,2022-01-19 18:48:41,8 St & Washington St,HB603,9 St HBLR - Jackson St & 8 St,HB305,40.745984,-74.028199,40.747907,-74.038412,member,January,6 PM,Wednesday
69518,6F36E358655212DE,classic_bike,2022-01-08 17:35:44,2022-01-08 18:12:35,8 St & Washington St,HB603,Washington St,JC098,40.745984,-74.028199,40.724294,-74.035483,member,January,5 PM,Saturday
69517,3CD1733725FEFE63,classic_bike,2022-01-02 19:56:50,2022-01-02 20:13:44,Hamilton Park,JC009,Jersey & 6th St,JC027,40.727596,-74.044247,40.725289,-74.045572,member,January,7 PM,Sunday
69516,6023A999BA25C364,classic_bike,2022-01-06 23:07:36,2022-01-06 23:09:15,Hamilton Park,JC009,Jersey & 6th St,JC027,40.727596,-74.044247,40.725289,-74.045572,member,January,11 PM,Thursday


In [17]:
# Specify the name for the CSV file
csv_file_name = 'citibike22_data.csv'

# Save the DataFrame as a CSV file
citibike22_df.to_csv(csv_file_name, index=False)
