## Dependencies

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, mean_absolute_percentage_error
import plotly.express as px
import plotly.graph_objects as go

In [2]:
data = pd.read_csv('Uber-Jan-Feb-FOIL.csv')
print(data.info(),'\n\n',data.head())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 354 entries, 0 to 353
Data columns (total 4 columns):
 #   Column                   Non-Null Count  Dtype 
---  ------                   --------------  ----- 
 0   dispatching_base_number  354 non-null    object
 1   date                     354 non-null    object
 2   active_vehicles          354 non-null    int64 
 3   trips                    354 non-null    int64 
dtypes: int64(2), object(2)
memory usage: 11.2+ KB
None 

   dispatching_base_number      date  active_vehicles  trips
0                  B02512  1/1/2015              190   1132
1                  B02765  1/1/2015              225   1765
2                  B02764  1/1/2015             3427  29421
3                  B02682  1/1/2015              945   7679
4                  B02617  1/1/2015             1228   9537


## EDA

In [3]:
data['dispatching_base_number'].value_counts()

dispatching_base_number
B02512    59
B02765    59
B02764    59
B02682    59
B02617    59
B02598    59
Name: count, dtype: int64

In [4]:
# convert the dates to appropriate format
data['date'] = pd.to_datetime(data['date'],errors='coerce')
# Extract date-related features
data['day'] = data['date'].dt.day
data['month'] = data['date'].dt.month
data['day_of_week'] = data['date'].dt.dayofweek

In [5]:
# Compute daily aggregates
daily_trips = data.groupby('date')[['trips', 'active_vehicles']].sum().reset_index()
daily_trips['utilization_rate'] = daily_trips['trips'] / daily_trips['active_vehicles']

# --- 1. Daily Uber Trip Trends (Total Trips) ---
fig1 = go.Figure()
fig1.add_trace(go.Scatter(x=daily_trips['date'], y=daily_trips['trips'], mode='lines+markers',
                          name='Total Trips', line=dict(color='blue')))
fig1.update_layout(title='Daily Uber Trip Trends',
                   xaxis_title='Date', yaxis_title='Total Trips',
                   hovermode='x')
fig1.show()

# --- 2. Daily Utilization Rate ---
fig8 = go.Figure()
fig8.add_trace(go.Scatter(x=daily_trips['date'], y=daily_trips['utilization_rate'], mode='lines+markers',
                          name='Utilization Rate', line=dict(color='red')))
fig8.update_layout(title='Daily Uber Utilization Rate',
                   xaxis_title='Date', yaxis_title='Utilization Rate',
                   hovermode='x')
fig8.show()



# --- 2. Per Base Utilization Rate (Trips per Active Vehicle) as Pie Chart ---
data['utilization_rate'] = data['trips'] / data['active_vehicles']
base_utilization = data.groupby('dispatching_base_number')['utilization_rate'].mean().reset_index()
fig2 = px.pie(base_utilization, names='dispatching_base_number', values='utilization_rate',
              title='Per Base Utilization Rate')
fig2.show()

# --- 3. Weekly Trip Trends by Base ---
data['week'] = data['date'].dt.isocalendar().week
weekly_trends = data.groupby(['week', 'dispatching_base_number'])['trips'].sum().reset_index()
fig3 = px.line(weekly_trends, x='week', y='trips', color='dispatching_base_number',
               title='Weekly Uber Trip Trends by Base',
               labels={'week': 'Week Number', 'trips': 'Total Trips'})
fig3.update_traces(mode='lines+markers')
fig3.show()

# --- 4. Rolling Average of Trips & Seasonal Trends Over Weeks by Base ---
data['rolling_avg'] = data.groupby('dispatching_base_number')['trips'].transform(lambda x: x.rolling(window=7, min_periods=1).mean())
fig4 = px.line(data, x='date', y='rolling_avg', color='dispatching_base_number',
               title='7-Day Rolling Average of Uber Trips by Base',
               labels={'rolling_avg': 'Rolling Average Trips'})
fig4.update_traces(mode='lines+markers')
fig4.show()


fig5 = px.bar(data, x='day_of_week', y='trips', color='dispatching_base_number',
              title='Uber Trips by Day of the Week (Colored by Base)',
              labels={'day_of_week': 'Day of the Week (0=Monday, 6=Sunday)', 'trips': 'Total Trips'},
              hover_data={'trips': True})
fig5.show()

base_trips = data.groupby('dispatching_base_number')['trips'].sum().reset_index()
fig6 = px.pie(base_trips, names='dispatching_base_number', values='trips', title='Uber Trips by Base',
              hover_data=['trips'])
fig6.show()


fig7 = px.scatter(data, x='active_vehicles', y='trips', color='dispatching_base_number',
                  title='Active Vehicles vs Completed Trips per Base',
                  labels={'active_vehicles': 'Active Vehicles', 'trips': 'Completed Trips'},
                  hover_data=['dispatching_base_number'])
fig7.show()

In [6]:
import pandas as pd
import plotly.express as px

# Ensure 'week' column is properly created
data['week'] = data['date'].dt.isocalendar().week

# --- 1. Normalize Weekly Trip Trends by Base ---
weekly_trends = data.groupby(['week', 'dispatching_base_number'])['trips'].sum().reset_index()

# Normalize trips per base (Min-Max Scaling)
weekly_trends['normalized_trips'] = weekly_trends.groupby('dispatching_base_number')['trips'].transform(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

# Plot normalized weekly trends
fig1 = px.line(weekly_trends, x='week', y='normalized_trips', color='dispatching_base_number',
               title='Normalized Weekly Uber Trip Trends by Base',
               labels={'week': 'Week Number', 'normalized_trips': 'Normalized Trips'})
fig1.update_traces(mode='lines+markers')
fig1.show()

# --- 2. Normalize 7-Day Rolling Average of Trips by Base ---
data['rolling_avg'] = data.groupby('dispatching_base_number')['trips'].transform(
    lambda x: x.rolling(window=7, min_periods=1).mean()
)

# Normalize rolling averages per base
data['normalized_rolling_avg'] = data.groupby('dispatching_base_number')['rolling_avg'].transform(
    lambda x: (x - x.min()) / (x.max() - x.min())
)

# Plot normalized rolling average trends
fig2 = px.line(data, x='date', y='normalized_rolling_avg', color='dispatching_base_number',
               title='Normalized 7-Day Rolling Average of Uber Trips by Base',
               labels={'normalized_rolling_avg': 'Normalized Rolling Avg Trips'})
fig2.update_traces(mode='lines+markers')
fig2.show()


## Data cleaning and preprocessing