# Step 1: Save Your Model as a .pkl File (in a different notebook)

```

# Define the target and features
target = time_lagged_features['pickup_date_count']
features = time_lagged_features[['scheduled_date_count',
                                  'pickup_date_count_lag_7',
                                  'scheduled_date_count_7',
                                  'pickup_date_count_lag_14',
                                  'scheduled_date_count_14',
                                  'pickup_date_count_lag_21',
                                  'scheduled_date_count_21']]

# Ensure target is positive before Box-Cox transformation
target_positive = target + 1  # Adding 1 to ensure all values are positive
target_transformed, fitted_lambda = boxcox(target_positive)

# Define the model
model_rf = RandomForestRegressor(max_depth=20, max_features='sqrt', min_samples_split=2,
                                 n_estimators=200, random_state=42)

# Define a custom scoring function
mae_scorer = make_scorer(mean_absolute_error)

# Perform cross-validation on transformed target
cv_scores = cross_val_score(model_rf, features, target_transformed, cv=5, scoring=mae_scorer)

# Output cross-validation scores and their mean
print("Cross-Validation MAE scores on transformed target:", cv_scores)
print("Mean Cross-Validation MAE on transformed target:", np.mean(cv_scores))

# Train the model on the entire training set
final_model = model_rf.fit(features, target_transformed)

```



# Step 2: Create Your Gradio App Script

In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.6.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.3 (from gradio)
  Downloading gradio_client-1.4.3-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart==0.0.12 (from gradio)
  Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.7.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

## ML Deployment: Using Gradio and Huggingface

In [None]:
import gradio as gr
from datetime import datetime

# Function to handle predictions
def predict(year, month, day, scheduled_date_count):
    try:
        # Convert month and day to integers
        month = int(month)
        day = int(day)

        # Construct the date from the inputs
        input_date = f"{year}-{month:02d}-{day:02d}"
        target_date = datetime.strptime(input_date, "%Y-%m-%d")

        # Check if the date is within the allowed range
        if target_date < datetime(2024, 8, 29) or target_date > datetime(2025, 8, 30):
            return "Error: Date must be between 2024-08-29 and 2025-08-30."

        # Generate lagged features using historical data (assuming you have a function for this)
        features = calculate_lagged_features(historical_data, target_date, scheduled_date_count)

        if features.empty:
            return "Error: Insufficient historical data to compute lagged features."

        # Convert features to NumPy array
        features = features.iloc[0].values.reshape(1, -1)

        # Make prediction using the pre-trained model
        prediction = model_xgb.predict(features)

        # Manually round the prediction
        predicted_value = prediction[0]
        if predicted_value - int(predicted_value) >= 0.5:
            rounded_prediction = int(predicted_value) + 1  # Round up if decimal is >= 0.5
        else:
            rounded_prediction = int(predicted_value)  # Keep only the integer part if decimal is < 0.5

        return f"Predicted Pickup Date Count for {input_date}: {rounded_prediction}"
    except Exception as e:
        return f"An unexpected error occurred: {e}"

# Define components for Gradio
year_dropdown = gr.Dropdown(
    choices=[str(y) for y in range(2024, 2026)],
    label="Year",
    value="2025"
)
month_dropdown = gr.Dropdown(
    choices=[str(m).zfill(2) for m in range(1, 13)],
    label="Month",
    value="04"
)
day_dropdown = gr.Dropdown(
    choices=[str(d).zfill(2) for d in range(1, 32)],
    label="Day",
    value="11"
)
scheduled_count_input = gr.Number(label="Enter Scheduled Date Count", value=30)
output = gr.Textbox(label="Predicted Pickup Date Count")

# Create Gradio interface
app = gr.Interface(
    fn=predict,
    inputs=[year_dropdown, month_dropdown, day_dropdown, scheduled_count_input],
    outputs=output,
    title="Pickup Date Count Predictor",
    description="Select a date and enter the scheduled date count to predict the pickup date count."
)

# Launch the app
if __name__ == "__main__":
    app.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://5cc41d83ce0681ca3e.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [20]:
historical_data = pd.read_csv('historical_data.csv')
historical_data.tail()

Unnamed: 0,date,scheduled_date_count,pickup_date_count_lag_7,scheduled_date_count_7,pickup_date_count_lag_14,scheduled_date_count_14,pickup_date_count_lag_21,scheduled_date_count_21
255,2024-08-24,19,0.0,15.0,0.0,7.0,0.0,100.0
256,2024-08-25,31,0.0,23.0,0.0,19.0,0.0,70.0
257,2024-08-26,43,52.0,45.0,47.0,33.0,0.0,65.0
258,2024-08-27,91,40.0,54.0,27.0,12.0,80.0,50.0
259,2024-08-28,11,28.0,19.0,29.0,25.0,41.0,19.0


In [24]:
historical_data

Unnamed: 0_level_0,scheduled_date_count,pickup_date_count_lag_7,scheduled_date_count_7,pickup_date_count_lag_14,scheduled_date_count_14,pickup_date_count_lag_21,scheduled_date_count_21
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2023-12-13,20,33.0,43.0,31.0,32.0,25.0,25.0
2023-12-14,59,32.0,41.0,41.0,44.0,39.0,38.0
2023-12-15,18,0.0,1.0,0.0,7.0,0.0,2.0
2023-12-16,13,0.0,13.0,0.0,1.0,0.0,3.0
2023-12-17,0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...
2024-08-24,19,0.0,15.0,0.0,7.0,0.0,100.0
2024-08-25,31,0.0,23.0,0.0,19.0,0.0,70.0
2024-08-26,43,52.0,45.0,47.0,33.0,0.0,65.0
2024-08-27,91,40.0,54.0,27.0,12.0,80.0,50.0


In [41]:
import pandas as pd
import joblib
import gradio as gr
import numpy as np
from datetime import datetime
from scipy.stats import boxcox
from scipy.special import inv_boxcox

# Load the model and historical data
final_model, fitted_lambda = joblib.load("trained_model_and_lambda.pkl")

# contains lagged features
historical_data = pd.read_csv('historical_data.csv')

historical_data['date'] = pd.to_datetime(historical_data['date']).dt.date

# Set the date column as index
historical_data.set_index('date', inplace=True)


# Function to calculate lagged features (same logic as training)
def create_lagged_features(historical_data, input_date):
    # Get the most recent date in the historical data (using index)
    last_available_date = historical_data.index[-1]

    # Convert input_date to datetime.date to match the format of last_available_date
    input_date = input_date.date()

    # Check if input date is after the last available date
    if input_date <= last_available_date:
        return "Error: Input date must be after the most recent historical data date."

    # Generate lagged features based on the most recent data
    lagged_features = {}

    # Add the current 'scheduled_date_count' value (this is the missing feature)
    lagged_features['scheduled_date_count'] = historical_data.loc[last_available_date, 'scheduled_date_count']

    for lag in [7, 14, 21]:
        # For each lag, look at the last available data point (most recent date)
        lagged_features[f'pickup_date_count_lag_{lag}'] = historical_data.loc[last_available_date, f'pickup_date_count_lag_{lag}']
        lagged_features[f'scheduled_date_count_{lag}'] = historical_data.loc[last_available_date, f'scheduled_date_count_{lag}']

    return lagged_features


# Function to make predictions based on user input
def predict(year, month, day, scheduled_date_count):
    try:
        # Ensure the inputs are integers where necessary
        year = int(year)
        month = int(month)
        day = int(day)
        scheduled_date_count = int(scheduled_date_count)

        # Construct the input date as a datetime object
        input_date = datetime(year, month, day)

        # Generate lagged features for the input date
        features = create_lagged_features(historical_data, input_date)

        # Add the scheduled_date_count as a feature
        features['scheduled_date_count'] = scheduled_date_count

        # Convert features to numpy array for prediction
        features_array = np.array([list(features.values())]).reshape(1, -1)

        # Make the prediction using the pre-trained model
        prediction = final_model.predict(features_array)

        # Reverse the Box-Cox transformation
        predicted_value = inv_boxcox(prediction[0], fitted_lambda) - 1

        # Round the prediction
        rounded_prediction = round(predicted_value)

        return f"Predicted Pickup Count for {input_date.strftime('%Y-%m-%d')} is {rounded_prediction} hampers"

    except Exception as e:
        return f"An unexpected error occurred: {e}"

# Define Gradio components
year_dropdown = gr.Dropdown(choices=[str(y) for y in range(2024, 2026)], label="Year", value="2025")
month_dropdown = gr.Dropdown(choices=[str(m).zfill(2) for m in range(1, 13)], label="Month", value="04")
day_dropdown = gr.Dropdown(choices=[str(d).zfill(2) for d in range(1, 32)], label="Day", value="11")
scheduled_count_input = gr.Number(label="Enter Scheduled Date Count", value=30)

output = gr.Textbox(label="Predicted Pickup Date Count")

# Create Gradio interface
app = gr.Interface(fn=predict, inputs=[year_dropdown, month_dropdown, day_dropdown, scheduled_count_input], outputs=output,
                   title="Pickup Date Count Predictor",
                   description="Select a date and enter the scheduled date count to predict the pickup date count.")

# Launch the app
if __name__ == "__main__":
    app.launch()



Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://2cc762fc8689de8ba7.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [33]:
# Insert final app below to export into a py app
with open('app.py', 'w') as f:
    f.write(""" # Insert final app below to export into a py app


    """)

SyntaxError: incomplete input (<ipython-input-33-690f44e98eb3>, line 1)

In [34]:
!pip freeze > requirements.txt

In [None]:
import numpy as np
from scipy.stats import boxcox
from scipy.special import inv_boxcox
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import mean_absolute_error, make_scorer

In [35]:
!pip freeze

absl-py==1.4.0
accelerate==1.1.1
aiofiles==23.2.1
aiohappyeyeballs==2.4.3
aiohttp==3.11.2
aiosignal==1.3.1
alabaster==1.0.0
albucore==0.0.19
albumentations==1.4.20
altair==4.2.2
annotated-types==0.7.0
anyio==3.7.1
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
array_record==0.5.1
arviz==0.20.0
astropy==6.1.6
astropy-iers-data==0.2024.11.18.0.35.2
astunparse==1.6.3
async-timeout==4.0.3
atpublic==4.1.0
attrs==24.2.0
audioread==3.0.1
autograd==1.7.0
babel==2.16.0
backcall==0.2.0
beautifulsoup4==4.12.3
bigframes==1.27.0
bigquery-magics==0.4.0
bleach==6.2.0
blinker==1.9.0
blis==0.7.11
blosc2==2.7.1
bokeh==3.6.1
Bottleneck==1.4.2
bqplot==0.12.43
branca==0.8.0
CacheControl==0.14.1
cachetools==5.5.0
catalogue==2.0.10
certifi==2024.8.30
cffi==1.17.1
chardet==5.2.0
charset-normalizer==3.4.0
chex==0.1.87
clarabel==0.9.0
click==8.1.7
cloudpathlib==0.20.0
cloudpickle==3.1.0
cmake==3.30.5
cmdstanpy==1.2.4
colorcet==3.1.0
colorlover==0.3.0
colour==0.1.5
community==1.0.0b1
confection==0.1.5
cons==0.

# Step 3: Create requirements.txt file

pick and choose the correct versions only

In [1]:
with open('requirements.txt', 'w') as f:
    f.write("""
numpy==1.26.4
scikit-learn==1.5.2
scipy==1.13.1
""")

# Step 4: Set Up Hugging Face Spaces
# Step 5: Upload Files to Hugging Face

https://huggingface.co/spaces/grethasaur/hamper_pickup_predictor



---

initial app version calculates the same lagged features becaused it bases it on the same historical data since we don't have current/live data. that app would work good if historical data given to it is recent and is updated daily.

this one below should dynamically calculate lags based on input

---

Speculating that model might not have realized any seasonality and maybe need features that indicates seasonality.

model needs reworking but current one provides prediction but is mostly affected by scheduled pickup count

In [2]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.6.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.3 (from gradio)
  Downloading gradio_client-1.4.3-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart==0.0.12 (from gradio)
  Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

In [10]:
import pandas as pd
import joblib
import gradio as gr
import numpy as np
from datetime import datetime
from scipy.stats import boxcox
from scipy.special import inv_boxcox

# Load the model and historical data
final_model, fitted_lambda = joblib.load("trained_model_and_lambda.pkl")

# contains lagged features
historical_data = pd.read_csv('historical_data.csv')

historical_data['date'] = pd.to_datetime(historical_data['date']).dt.date

# Set the date column as index
historical_data.set_index('date', inplace=True)


# Function to calculate lagged features (same logic as training)
def create_lagged_features(historical_data, input_date, scheduled_date_count):
    data = historical_data.copy()
    input_date = input_date.date()

    new_row = pd.DataFrame({'scheduled_date_count': [scheduled_date_count]},
                           index=[input_date])
    data = pd.concat([data, new_row]).sort_index()

    for lag in [7, 14, 21]:
        data[f'pickup_date_count_lag_{lag}'] = data['scheduled_date_count'].shift(lag)
        data[f'scheduled_date_count_{lag}'] = data['scheduled_date_count'].shift(lag)

    # Check for missing lagged data
    lagged_features = data.loc[input_date]
    if lagged_features.isnull().any():
        raise ValueError("Insufficient historical data to calculate lagged features.")

    return lagged_features.to_dict()


# Function to make predictions based on user input
def predict(year, month, day, scheduled_date_count):
    try:
        # Ensure the inputs are integers where necessary
        year = int(year)
        month = int(month)
        day = int(day)
        scheduled_date_count = int(scheduled_date_count)

        # Construct the input date as a datetime object
        input_date = datetime(year, month, day)

        # Generate lagged features for the input date
        features = create_lagged_features(historical_data, input_date, scheduled_date_count)

        # Convert features to numpy array for prediction
        features_array = np.array([list(features.values())]).reshape(1, -1)

        # Debugging: Print features array
        print(f"Features for {input_date}: {features}")

        # Make the prediction using the pre-trained model
        prediction = final_model.predict(features_array)

        # Reverse the Box-Cox transformation
        predicted_value = inv_boxcox(prediction[0], fitted_lambda) - 1

        # Round the prediction
        rounded_prediction = round(predicted_value)

        return f"Predicted Pickup Count for {input_date.strftime('%Y-%m-%d')} is {rounded_prediction} hampers"

    except Exception as e:
        return f"An unexpected error occurred: {e}"

# Define Gradio components
year_dropdown = gr.Dropdown(choices=[str(y) for y in range(2024, 2026)], label="Year", value="2025")
month_dropdown = gr.Dropdown(choices=[str(m).zfill(2) for m in range(1, 13)], label="Month", value="04")
day_dropdown = gr.Dropdown(choices=[str(d).zfill(2) for d in range(1, 32)], label="Day", value="11")
scheduled_count_input = gr.Number(label="Enter Scheduled Date Count", value=30)

output = gr.Textbox(label="Predicted Pickup Date Count")

# Create Gradio interface
app = gr.Interface(fn=predict, inputs=[year_dropdown, month_dropdown, day_dropdown, scheduled_count_input], outputs=output,
                   title="Pickup Date Count Predictor",
                   description="Select a date and enter the scheduled date count to predict the pickup date count.")

# Launch the app
if __name__ == "__main__":
    app.launch()



Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://886111e8e25a83e7b1.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
