<a href="https://colab.research.google.com/github/akhil0203/prediction-and-optimisation/blob/main/Prediction_and_optimisation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
!pip install gurobipy



In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error


import gurobipy as gp
from gurobipy import GRB

# Load historical sales data from Historical_Data.csv
data = pd.read_csv('/content/Historical_Data.csv')

# Prepare the data (convert date to datetime objects and set date as index)
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)

# Feature engineering
data['Month'] = data.index.month
data['Day_of_week'] = data.index.dayofweek

#features and target
X = data[['Month', 'Day_of_week']]
y = data['Sold_Units']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [13]:
# Gradient Boosting Regressor model
gbr = GradientBoostingRegressor(random_state=42)

#parameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}

# Grid Search
grid_search = GridSearchCV(estimator=gbr, param_grid=param_grid, cv=3, n_jobs=-1, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

#best model found
best_gbr = grid_search.best_estimator_

# Predict value on the test set
y_pred = best_gbr.predict(X_test)

#best hyperparameters found
print("Best Hyperparameters found:")
print(grid_search.best_params_)

# model evaluation
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')


Best Hyperparameters found:
{'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 50}
Mean Squared Error: 2.779616686267473


In [15]:
#Random Forest Regressor model
rfr = RandomForestRegressor(random_state=42)

#parameter grid for Random Forest
param_grid_rfr = {
    'n_estimators': [50, 100, 200],
    'max_depth': [10, 20, 30]
}

#Grid Search for Random Forest
grid_search_rfr = GridSearchCV(estimator=rfr, param_grid=param_grid_rfr, cv=3, n_jobs=-1, scoring='neg_mean_squared_error')
grid_search_rfr.fit(X_train, y_train)

#best Random Forest model found
best_rfr = grid_search_rfr.best_estimator_
# Prediction on the test set
y_pred = best_rfr.predict(X_test)

#best hyperparameters found
print("Best Hyperparameters found:")
print(grid_search.best_params_)

# model evaluation
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')

Best Hyperparameters found:
{'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 50}
Mean Squared Error: 2.779672259824237


In [39]:
# prediction on test set using both the models
y_pred_gbr = best_gbr.predict(X_test)
y_pred_rfr = best_rfr.predict(X_test)

# Average the predictions to create the ensemble prediction
y_pred_ensemble = (y_pred_gbr + y_pred_rfr) / 2

# ensemble model evaluation
mse_ensemble = mean_squared_error(y_test, y_pred_ensemble)
print(f'Mean Squared Error (Ensemble): {mse_ensemble}')


pred_ensemble:[1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025
 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025
 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025
 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025
 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025
 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025
 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025
 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025
 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025
 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025
 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025
 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025
 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025
 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.9707025 1.

In [21]:
# Predicting future demand using for next 30 days
future_dates = pd.date_range(start='2024-06-01', periods=30)
future_data = pd.DataFrame({
    'Month': future_dates.month,
    'Day_of_week': future_dates.dayofweek
}, index=future_dates)

predicted_demand_gbr = best_gbr.predict(future_data)
predicted_demand_rfr = best_rfr.predict(future_data)
predicted_demand_ensemble = (predicted_demand_gbr + predicted_demand_rfr) / 2

print("Predicted Demand for the next 30 days:")
for date, demand in zip(future_dates, predicted_demand_ensemble):
    print(f"Date: {date.strftime('%Y-%m-%d')}, Predicted Demand: {demand}")


Predicted Demand for the next 30 days:
Date: 2024-06-01, Predicted Demand: 1.9705295179169888
Date: 2024-06-02, Predicted Demand: 1.9705295179169888
Date: 2024-06-03, Predicted Demand: 1.9705295179169888
Date: 2024-06-04, Predicted Demand: 1.9705295179169888
Date: 2024-06-05, Predicted Demand: 1.9705295179169888
Date: 2024-06-06, Predicted Demand: 1.9705295179169888
Date: 2024-06-07, Predicted Demand: 1.9705295179169888
Date: 2024-06-08, Predicted Demand: 1.9705295179169888
Date: 2024-06-09, Predicted Demand: 1.9705295179169888
Date: 2024-06-10, Predicted Demand: 1.9705295179169888
Date: 2024-06-11, Predicted Demand: 1.9705295179169888
Date: 2024-06-12, Predicted Demand: 1.9705295179169888
Date: 2024-06-13, Predicted Demand: 1.9705295179169888
Date: 2024-06-14, Predicted Demand: 1.9705295179169888
Date: 2024-06-15, Predicted Demand: 1.9705295179169888
Date: 2024-06-16, Predicted Demand: 1.9705295179169888
Date: 2024-06-17, Predicted Demand: 1.9705295179169888
Date: 2024-06-18, Predicte

In [37]:
#optimization problem
# objective is to minimise the production cost for period of 30 days
model = gp.Model("ProductionScheduling")

# Parameters
num_days = len(predicted_demand_ensemble) #numer of days
production_cost = np.random.uniform(100, 200, size=num_days)  # Random production cost per day

#printing the random costs generated
for day in range(num_days):
    print(f"Day {day + 1}: Production Cost = {production_cost[day]}")

max_production = 100  # Maximum production capacity per day


# Decision variables
production = model.addVars(num_days, vtype=GRB.CONTINUOUS, name="production")

# Objective: To minimize total production cost
model.setObjective(gp.quicksum(production_cost[i] * production[i] for i in range(num_days)), GRB.MINIMIZE) #objective funtion

# Constraints
for i in range(num_days):
    model.addConstr(production[i] >= predicted_demand_ensemble[i], f"Demand_Constraint_{i}")
    model.addConstr(production[i] <= max_production, f"Capacity_Constraint_{i}")

# model optimization
model.optimize()

# results
for i in range(num_days):
    print(f"Day {i+1}: Produce {production[i].X} units")

Day 1: Production Cost = 110.85597022780992
Day 2: Production Cost = 137.27317175972806
Day 3: Production Cost = 102.6234707878758
Day 4: Production Cost = 165.0016299447326
Day 5: Production Cost = 114.19628384996653
Day 6: Production Cost = 185.85796732430012
Day 7: Production Cost = 168.07178202918726
Day 8: Production Cost = 153.72019701208905
Day 9: Production Cost = 175.2057442448547
Day 10: Production Cost = 102.25282404108252
Day 11: Production Cost = 131.6194459573745
Day 12: Production Cost = 135.4089642477159
Day 13: Production Cost = 192.18575759853567
Day 14: Production Cost = 165.7822919723481
Day 15: Production Cost = 172.43615917070557
Day 16: Production Cost = 158.9095362450998
Day 17: Production Cost = 153.58045129087498
Day 18: Production Cost = 137.42739007450427
Day 19: Production Cost = 188.50606078182204
Day 20: Production Cost = 187.20285907553625
Day 21: Production Cost = 125.7498826185302
Day 22: Production Cost = 136.28960719032355
Day 23: Production Cost = 1

In [36]:
#for different production costs generated randomly
for iteration in range(1, 6):
  model = gp.Model("ProductionScheduling")

  # Parameters
  num_days = len(predicted_demand_ensemble) #numer of days
  production_cost = np.random.uniform(100, 200, size=num_days)  # Random production cost per day

  #printing the random costs generated
  print(f"Iteration {iteration}:")
  for day in range(num_days):
    print(f"Day {day + 1}: Production Cost = {production_cost[day]}")

  max_production = 100  # Maximum production capacity per day


  # Decision variables
  production = model.addVars(num_days, vtype=GRB.CONTINUOUS, name="production")

  # Objective: To minimize total production cost
  model.setObjective(gp.quicksum(production_cost[i] * production[i] for i in range(num_days)), GRB.MINIMIZE) #objective funtion

  # Constraints
  for i in range(num_days):
    model.addConstr(production[i] >= predicted_demand_ensemble[i], f"Demand_Constraint_{i}")
    model.addConstr(production[i] <= max_production, f"Capacity_Constraint_{i}")

  # model optimization
  model.optimize()
  print(f"Iteration {iteration}: Objective = {model.objVal}")

  # results
  for i in range(num_days):
    print(f"Day {i+1}: Produce {production[i].X} units \n" )


Iteration 1:
Day 1: Production Cost = 188.93928196622107
Day 2: Production Cost = 142.93003928386312
Day 3: Production Cost = 193.02581550438322
Day 4: Production Cost = 171.5024014978114
Day 5: Production Cost = 168.00625333857025
Day 6: Production Cost = 110.00247402443514
Day 7: Production Cost = 153.7202512094148
Day 8: Production Cost = 147.15201037583628
Day 9: Production Cost = 167.85429338630718
Day 10: Production Cost = 136.44405734558575
Day 11: Production Cost = 103.92774843631273
Day 12: Production Cost = 177.21178752734397
Day 13: Production Cost = 122.67219135559901
Day 14: Production Cost = 157.31493551354058
Day 15: Production Cost = 187.06869427827652
Day 16: Production Cost = 101.61776256966245
Day 17: Production Cost = 126.78511552830666
Day 18: Production Cost = 175.43875725778338
Day 19: Production Cost = 166.26839114390856
Day 20: Production Cost = 118.42297511551243
Day 21: Production Cost = 158.86853853933837
Day 22: Production Cost = 128.6073455842362
Day 23: P