### Import Libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
import numpy as np
import joblib

### Read CSV file

In [2]:
test_data = pd.read_csv('test_data.csv')

### Load the model

In [3]:
best_gb_model = joblib.load('best_gb_model.pkl')

### Check for 1 instance

In [4]:
# Get user inputs
new_item_identifier = input("Enter Item Identifier: ")
item_weight = float(input("Enter Item Weight: "))
item_fat_content = input("Enter Item Fat Content: ")
item_visibility = float(input("Enter Item Visibility: "))
item_type = input("Enter Item Type: ")
item_mrp = float(input("Enter Item MRP: "))
outlet_identifier = input("Enter Outlet Identifier: ")
outlet_establishment_year = int(input("Enter Outlet Establishment Year: "))
outlet_size = input("Enter Outlet Size: ")
outlet_location_type = input("Enter Outlet Location Type: ")
outlet_type = input("Enter Outlet Type: ")

# Prepare input data
new_data = pd.DataFrame({
    'Item_Identifier': [new_item_identifier],
    'Item_Weight': [item_weight],
    'Item_Fat_Content': [item_fat_content],
    'Item_Visibility': [item_visibility],
    'Item_Type': [item_type],
    'Item_MRP': [item_mrp],
    'Outlet_Identifier': [outlet_identifier],
    'Outlet_Establishment_Year': [outlet_establishment_year],
    'Outlet_Size': [outlet_size],
    'Outlet_Location_Type': [outlet_location_type],
    'Outlet_Type': [outlet_type]
})

# Make predictions
predictions = best_gb_model.predict(new_data)

# Display the predictions
print("Predictions:", predictions)

Enter Item Identifier: 156
Enter Item Weight: 9.3
Enter Item Fat Content: 1
Enter Item Visibility: 0.016047301
Enter Item Type: 4
Enter Item MRP: 249.8092
Enter Outlet Identifier: 9
Enter Outlet Establishment Year: 1999
Enter Outlet Size: 2
Enter Outlet Location Type: 1
Enter Outlet Type: 1
Predictions: [4227.56546044]


### Check the difference

In [5]:
actual_value = 3735.138
predicted_value = 4227.56546044

# Calculate absolute difference
abs_diff = abs(actual_value - predicted_value)

# Calculate percentage error
percentage_error = (abs_diff / actual_value) * 100

print("Absolute Difference:", abs_diff)
print("Percentage Error:", percentage_error)

Absolute Difference: 492.4274604399998
Percentage Error: 13.183648380327575


### Make predictions on the test data

In [6]:
predictions = best_gb_model.predict(test_data)

### Load and reverse label encoder instances from files

In [7]:
label_encoder_item = joblib.load('label_encoder_item.joblib')
label_encoder_outlet = joblib.load('label_encoder_outlet.joblib')

test_data['Item_Identifier'] = label_encoder_item.inverse_transform(test_data['Item_Identifier'])

test_data['Outlet_Identifier'] = label_encoder_outlet.inverse_transform(test_data['Outlet_Identifier'])

### Add the predictions as a new column in your test_data DataFrame

In [8]:
test_data['Item_Outlet_Sales'] = predictions

# Save the updated DataFrame with predictions
test_data.to_csv('test_file_with_predictions.csv', index=False)