In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os

In [2]:
cleaned_files = os.listdir("cleaned_data\\")

# combining data into single frame
dataframes = [pd.read_csv(f"cleaned_data\\{file}") for file in cleaned_files]
data = pd.concat(dataframes)

In [None]:
data.tail()

In [None]:
# Create a figure and two subplots
fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, figsize=(12, 20))

# Plot for members' starting station
data[data["member_casual"] == "member"]["start_station_name"].value_counts().nlargest(30).plot(
    kind='bar', ax=ax1, color='b')

ax1.set_title("Number of Rides by Members' Starting Station")
ax1.set_ylabel('Number of Rides')
ax1.set_xlabel('Starting Station')
ax1.set_xticklabels(ax1.get_xticklabels(), rotation=45, ha='right')

# Plot for casuals' starting station
data[data["member_casual"] == "casual"]["start_station_name"].value_counts().nlargest(30).plot(
    kind='bar', ax=ax2, color='g')

ax2.set_title("Number of Rides by Casuals' Starting Station")
ax2.set_ylabel('Number of Rides')
ax2.set_xlabel('Starting Station')
ax2.set_xticklabels(ax2.get_xticklabels(), rotation=45, ha='right')

plt.tight_layout()
plt.show()


In [None]:
numeric_data = data[['start_lat',
                     'start_lng',
                     'end_lat',
                     'end_lng',
                     'day_of_week',
                     'ride_distance_km',
                     'ride_length_seconds']]
sns.heatmap(numeric_data.corr(), annot=True, cmap="coolwarm");

In [None]:
member_data = data[data["member_casual"] == "member"]
casual_data = data[data["member_casual"] == "casual"]

# Create a figure with 1 row and 2 columns of subplots
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(12, 6))

# Plot the bar chart for members
member_data["rideable_type"].value_counts().plot.bar(ax=axes[0], color='b')
axes[0].set_title('Members')
axes[0].set_xlabel('Rideable Type')
axes[0].set_ylabel('Count')

# Plot the bar chart for casuals
casual_data["rideable_type"].value_counts().plot.bar(ax=axes[1], color='g')
axes[1].set_title('Casuals')
axes[1].set_xlabel('Rideable Type')
axes[1].set_ylabel('Count')

# Adjust layout
plt.tight_layout()

# Show the plots
plt.show()

In [None]:
# Create subplots
fig, axs = plt.subplots(1, 2, figsize=(12, 6), sharey=True)

# Plot for 'member'
member_data = member_data['day_of_week'].value_counts()
axs[0].bar(member_data.index, member_data.values, color='b')
axs[0].set_title('Member')
axs[0].set_xlabel('Day of the Week')
axs[0].set_ylabel('Count')

# Plot for 'casual'
casual_data = casual_data['day_of_week'].value_counts()
axs[1].bar(casual_data.index, casual_data.values, color='g')
axs[1].set_title('Casual')
axs[1].set_xlabel('Day of the Week')

# Adjust layout
plt.tight_layout()
plt.show()


In [None]:
data['started_at'] = pd.to_datetime(data['started_at'])

# Group by the date (or by specific time intervals) and count observations
counts = data.groupby(data['started_at'].dt.date).size()

plt.figure(figsize=(10, 6))
ax = counts.plot(kind='bar', color='skyblue')

# Reduce the number of ticks to show only the months
plt.xticks(ticks=range(0, len(counts), 12), labels=[f"{date.year}-{date.month}" for date in counts.index[::12]], rotation=45)

plt.title('Rides by Month')
plt.xlabel('Month')
plt.ylabel('Number of Rides')
plt.tight_layout()
plt.show()