In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
import joblib

In [5]:
# Load dataset
df = pd.read_csv("dholakpur_shops_data.csv")

In [6]:
df

Unnamed: 0,date,day_of_week,weather,festival,pyaare_shop_open,tuntun_shop_open,pyaare_free_offer,total_visitors_dholakpur,visitors_pyaare_shop,visitors_tuntun_shop,sales_tuntun_laddoos,sales_pyaare_shakes,kidnap_rumor
0,2025-01-01,Wednesday,Cloudy,No,Yes,Yes,Yes,833,300,159,685,1138,No
1,2025-01-02,Thursday,Cloudy,Yes,Yes,Yes,No,314,150,140,631,1759,No
2,2025-01-03,Friday,Cloudy,No,Yes,Yes,No,805,76,356,1241,1662,No
3,2025-01-04,Saturday,Windy,No,Yes,Yes,No,818,421,163,201,1451,No
4,2025-01-05,Sunday,Sunny,Yes,Yes,Yes,No,491,435,157,572,1557,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,2026-01-31,Saturday,Rainy,Yes,Yes,Yes,No,675,493,333,1072,1907,No
396,2026-02-01,Sunday,Windy,Yes,No,Yes,No,840,213,381,317,1683,No
397,2026-02-02,Monday,Windy,Yes,No,Yes,No,463,398,2,380,1156,No
398,2026-02-03,Tuesday,Cloudy,Yes,Yes,Yes,Yes,926,7,265,1471,1884,No


In [7]:
# Features and target
X = df.drop(columns=["sales_tuntun_laddoos", "date"])
y = df["sales_tuntun_laddoos"]

In [8]:
X

Unnamed: 0,day_of_week,weather,festival,pyaare_shop_open,tuntun_shop_open,pyaare_free_offer,total_visitors_dholakpur,visitors_pyaare_shop,visitors_tuntun_shop,sales_pyaare_shakes,kidnap_rumor
0,Wednesday,Cloudy,No,Yes,Yes,Yes,833,300,159,1138,No
1,Thursday,Cloudy,Yes,Yes,Yes,No,314,150,140,1759,No
2,Friday,Cloudy,No,Yes,Yes,No,805,76,356,1662,No
3,Saturday,Windy,No,Yes,Yes,No,818,421,163,1451,No
4,Sunday,Sunny,Yes,Yes,Yes,No,491,435,157,1557,No
...,...,...,...,...,...,...,...,...,...,...,...
395,Saturday,Rainy,Yes,Yes,Yes,No,675,493,333,1907,No
396,Sunday,Windy,Yes,No,Yes,No,840,213,381,1683,No
397,Monday,Windy,Yes,No,Yes,No,463,398,2,1156,No
398,Tuesday,Cloudy,Yes,Yes,Yes,Yes,926,7,265,1884,No


In [9]:
y

Unnamed: 0,sales_tuntun_laddoos
0,685
1,631
2,1241
3,201
4,572
...,...
395,1072
396,317
397,380
398,1471


In [10]:
# Categorical and numerical columns
categorical_features = ['day_of_week', 'weather', 'festival', 'pyaare_shop_open', 'tuntun_shop_open', 'pyaare_free_offer', 'kidnap_rumor']
numerical_features = [col for col in X.columns if col not in categorical_features]

In [11]:
categorical_features

['day_of_week',
 'weather',
 'festival',
 'pyaare_shop_open',
 'tuntun_shop_open',
 'pyaare_free_offer',
 'kidnap_rumor']

In [12]:
numerical_features

['total_visitors_dholakpur',
 'visitors_pyaare_shop',
 'visitors_tuntun_shop',
 'sales_pyaare_shakes']

In [13]:
X.columns

Index(['day_of_week', 'weather', 'festival', 'pyaare_shop_open',
       'tuntun_shop_open', 'pyaare_free_offer', 'total_visitors_dholakpur',
       'visitors_pyaare_shop', 'visitors_tuntun_shop', 'sales_pyaare_shakes',
       'kidnap_rumor'],
      dtype='object')

In [14]:
# Preprocessing and pipeline
preprocessor = ColumnTransformer(transformers=[
    ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
], remainder="passthrough")

model_pipeline = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=100, random_state=42))
])

In [15]:
preprocessor

In [16]:
model_pipeline

In [17]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [18]:
X_train

Unnamed: 0,day_of_week,weather,festival,pyaare_shop_open,tuntun_shop_open,pyaare_free_offer,total_visitors_dholakpur,visitors_pyaare_shop,visitors_tuntun_shop,sales_pyaare_shakes,kidnap_rumor
3,Saturday,Windy,No,Yes,Yes,No,818,421,163,1451,No
18,Sunday,Windy,Yes,Yes,Yes,Yes,354,482,365,579,No
202,Tuesday,Windy,No,Yes,Yes,No,706,211,158,1905,No
250,Monday,Cloudy,No,Yes,Yes,No,338,467,360,123,No
274,Thursday,Sunny,No,Yes,Yes,No,856,381,233,1465,No
...,...,...,...,...,...,...,...,...,...,...,...
71,Thursday,Rainy,No,Yes,Yes,No,869,292,145,1661,No
106,Thursday,Cloudy,Yes,Yes,Yes,Yes,962,291,121,218,No
270,Sunday,Rainy,Yes,Yes,Yes,No,931,295,212,78,No
348,Monday,Sunny,Yes,Yes,Yes,No,971,420,46,1771,No


In [19]:
X_test

Unnamed: 0,day_of_week,weather,festival,pyaare_shop_open,tuntun_shop_open,pyaare_free_offer,total_visitors_dholakpur,visitors_pyaare_shop,visitors_tuntun_shop,sales_pyaare_shakes,kidnap_rumor
209,Tuesday,Rainy,No,Yes,Yes,Yes,957,436,205,767,No
280,Wednesday,Sunny,No,Yes,Yes,No,382,4,329,1160,No
33,Monday,Windy,Yes,Yes,Yes,No,720,65,396,1892,No
210,Wednesday,Sunny,No,Yes,Yes,No,544,50,383,700,No
93,Friday,Windy,No,Yes,Yes,No,703,425,227,146,No
...,...,...,...,...,...,...,...,...,...,...,...
246,Thursday,Sunny,No,Yes,Yes,Yes,931,404,107,1015,No
227,Saturday,Cloudy,No,Yes,Yes,Yes,801,81,18,1536,No
369,Monday,Cloudy,Yes,Yes,Yes,No,830,131,181,187,No
176,Thursday,Rainy,No,Yes,Yes,No,909,98,166,1866,No


In [20]:
# Train model
model_pipeline.fit(X_train, y_train)


The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).



In [21]:
# Predict and evaluate
y_pred = model_pipeline.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


Mean Squared Error: 133919.84401125


In [22]:
# Save model
joblib.dump(model_pipeline, "tuntun_sales_predictor.pkl")

['tuntun_sales_predictor.pkl']

In [23]:
import pandas as pd
import joblib

# Load the model
model = joblib.load("tuntun_sales_predictor.pkl")

In [24]:
# Collect user input
def get_user_input():
  user_input = {
    "day_of_week": input("Enter day of the week (e.g., Monday): "),
    "weather": input("Enter weather (e.g., Sunny, Rainy): "),
    "festival": input("Festival today? (Yes/No): "),
    "pyaare_shop_open": input("Is Pyaare's shop open? (Yes/No): "),
    "tuntun_shop_open": input("Is Tuntun aunty's shop open? (Yes/No): "),
    "pyaare_free_offer": input("Is Pyaare giving free offer? (Yes/No): "),
    "total_visitors_dholakpur": int(input("Total visitors in Dholakpur: ")),
    "visitors_pyaare_shop": int(input("Visitors at Pyaare's shop: ")),
    "visitors_tuntun_shop": int(input("Visitors at Tuntun aunty's shop: ")),
    "sales_pyaare_shakes": int(input("Shakes sold at Pyaare's shop: ")),
    "kidnap_rumor": input("Is there a kidnap rumor? (Yes/No): ")
}

# Create a DataFrame
  new_data = pd.DataFrame([user_input])

# Predict laddoo sales
  prediction = model.predict(new_data)

# Output the result
  print("\n🎯 Predicted laddoo sales for Tuntun Aunty:", round(prediction[0]))

get_user_input()


KeyboardInterrupt: Interrupted by user

In [25]:
# prompt: create UI to input and output also

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
import joblib
import ipywidgets as widgets
from IPython.display import display

# Load dataset (assuming dholakpur_shops_data.csv is in the current directory)
try:
    df = pd.read_csv("dholakpur_shops_data.csv")
except FileNotFoundError:
    print("Error: dholakpur_shops_data.csv not found. Please upload the file.")
    exit()

# ... (rest of your existing code for preprocessing and model training) ...


# Load the model
model = joblib.load("tuntun_sales_predictor.pkl")

# UI elements
day_of_week_dropdown = widgets.Dropdown(options=df['day_of_week'].unique().tolist(), description='Day:')
weather_dropdown = widgets.Dropdown(options=df['weather'].unique().tolist(), description='Weather:')
festival_dropdown = widgets.Dropdown(options=['Yes', 'No'], description='Festival:')
pyaare_shop_open_dropdown = widgets.Dropdown(options=['Yes', 'No'], description="Pyaare's Shop:")
tuntun_shop_open_dropdown = widgets.Dropdown(options=['Yes', 'No'], description="Tuntun's Shop:")
pyaare_free_offer_dropdown = widgets.Dropdown(options=['Yes', 'No'], description='Pyaare Offer:')
kidnap_rumor_dropdown = widgets.Dropdown(options=['Yes', 'No'], description='Kidnap Rumor:')

total_visitors_dholakpur_slider = widgets.IntSlider(min=df['total_visitors_dholakpur'].min(), max=df['total_visitors_dholakpur'].max(), description='Total Visitors:')
visitors_pyaare_shop_slider = widgets.IntSlider(min=df['visitors_pyaare_shop'].min(), max=df['visitors_pyaare_shop'].max(), description='Pyaare Visitors:')
visitors_tuntun_shop_slider = widgets.IntSlider(min=df['visitors_tuntun_shop'].min(), max=df['visitors_tuntun_shop'].max(), description='Tuntun Visitors:')
sales_pyaare_shakes_slider = widgets.IntSlider(min=df['sales_pyaare_shakes'].min(), max=df['sales_pyaare_shakes'].max(), description='Pyaare Shakes:')

output_text = widgets.Output()

def predict_sales(change):
    with output_text:
        output_text.clear_output()
        user_input = {
            "day_of_week": day_of_week_dropdown.value,
            "weather": weather_dropdown.value,
            "festival": festival_dropdown.value,
            "pyaare_shop_open": pyaare_shop_open_dropdown.value,
            "tuntun_shop_open": tuntun_shop_open_dropdown.value,
            "pyaare_free_offer": pyaare_free_offer_dropdown.value,
            "total_visitors_dholakpur": total_visitors_dholakpur_slider.value,
            "visitors_pyaare_shop": visitors_pyaare_shop_slider.value,
            "visitors_tuntun_shop": visitors_tuntun_shop_slider.value,
            "sales_pyaare_shakes": sales_pyaare_shakes_slider.value,
            "kidnap_rumor": kidnap_rumor_dropdown.value
        }
        new_data = pd.DataFrame([user_input])
        prediction = model.predict(new_data)
        print("\n🎯 Predicted laddoo sales for Tuntun Aunty:", round(prediction[0]))

# Link UI elements to prediction function
for widget in [day_of_week_dropdown, weather_dropdown, festival_dropdown, pyaare_shop_open_dropdown, tuntun_shop_open_dropdown, pyaare_free_offer_dropdown, total_visitors_dholakpur_slider, visitors_pyaare_shop_slider, visitors_tuntun_shop_slider, sales_pyaare_shakes_slider, kidnap_rumor_dropdown]:
    widget.observe(predict_sales, names='value')

# Display UI
display(day_of_week_dropdown, weather_dropdown, festival_dropdown, pyaare_shop_open_dropdown, tuntun_shop_open_dropdown, pyaare_free_offer_dropdown, kidnap_rumor_dropdown, total_visitors_dholakpur_slider, visitors_pyaare_shop_slider, visitors_tuntun_shop_slider, sales_pyaare_shakes_slider, output_text)



Dropdown(description='Day:', options=('Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'Monday', 'Tues…

Dropdown(description='Weather:', options=('Cloudy', 'Windy', 'Sunny', 'Rainy'), value='Cloudy')

Dropdown(description='Festival:', options=('Yes', 'No'), value='Yes')

Dropdown(description="Pyaare's Shop:", options=('Yes', 'No'), value='Yes')

Dropdown(description="Tuntun's Shop:", options=('Yes', 'No'), value='Yes')

Dropdown(description='Pyaare Offer:', options=('Yes', 'No'), value='Yes')

Dropdown(description='Kidnap Rumor:', options=('Yes', 'No'), value='Yes')

IntSlider(value=300, description='Total Visitors:', max=1000, min=300)

IntSlider(value=2, description='Pyaare Visitors:', max=499, min=2)

IntSlider(value=2, description='Tuntun Visitors:', max=399, min=2)

IntSlider(value=5, description='Pyaare Shakes:', max=2000, min=5)

Output()