In [95]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import joblib 
from sklearn.preprocessing import MinMaxScaler

In [96]:
df = pd.read_csv('dataset.csv')

In [97]:
df.head()

Unnamed: 0,Sale_ID,Date,Event_Type,Event_Name,Promotional_Type,Promotional_Budget,Sales_Revenue,ROI,City
0,SALE_00001,1/8/2021,Islamic Event,Eid Ul Adha,Social Media Campaigns,371581,14469414.16,37.94,Lahore
1,SALE_00002,1/15/2021,Islamic Event,Eid Ul Adha,Charity Promotions,249464,810458.56,2.25,Islamabad
2,SALE_00003,1/22/2021,Islamic Event,Eid Ul Adha,Celebrity Endorsements,735053,2016751.77,1.74,Faisalabad
3,SALE_00004,1/29/2021,Islamic Event,Kashmir Day,Discount offers,685627,1137531.68,0.66,Peshawar
4,SALE_00005,2/5/2021,Islamic Event,Kashmir Day,TV and Radio Campaigns,388376,889536.81,1.29,Sukkur


In [98]:
df.isna().sum()

Sale_ID               0
Date                  0
Event_Type            0
Event_Name            0
Promotional_Type      0
Promotional_Budget    0
Sales_Revenue         0
ROI                   0
City                  0
dtype: int64

In [99]:
df['Promotional_Type'].value_counts()

Promotional_Type
Social Media Campaigns                 973
Discount offers                        971
Celebrity Endorsements                 417
Charity Promotions                     417
TV and Radio Campaigns                 277
Retail Partnerships                    277
Sponsorship of Local Events            139
Educational Campaigns                  139
In-Store Displays                      139
Wedding Packages                       139
Recipe Contests                        139
Promotions For Kashmir Relief          139
Collaborations with NGOs               139
Special Packaging                      139
Branding with Religious themes         139
Collaborations with National Events    139
Advertisements                         139
Partnership with Nutritionists         139
Name: count, dtype: int64

In [100]:
df['City'].value_counts()

City
Lahore            417
Islamabad         417
Faisalabad        417
Peshawar          417
Sukkur            417
Multan            417
Hyderabad         417
Larkana           417
Nawabshah         416
Bahawalpur        416
Rahim Yar Khan    416
Karachi           416
Name: count, dtype: int64

In [101]:
df.describe()

Unnamed: 0,Promotional_Budget,Sales_Revenue,ROI
count,5000.0,5000.0,5000.0
mean,548085.9552,1541907.0,1.83876
std,259079.929845,1359662.0,2.924792
min,100364.0,160171.4,0.5
25%,324440.5,820567.5,1.13
50%,551797.5,1382538.0,1.77
75%,771930.5,2102531.0,2.38
max,999926.0,44836070.0,158.64


In [102]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5000 entries, 0 to 4999
Data columns (total 9 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Sale_ID             5000 non-null   object 
 1   Date                5000 non-null   object 
 2   Event_Type          5000 non-null   object 
 3   Event_Name          5000 non-null   object 
 4   Promotional_Type    5000 non-null   object 
 5   Promotional_Budget  5000 non-null   int64  
 6   Sales_Revenue       5000 non-null   float64
 7   ROI                 5000 non-null   float64
 8   City                5000 non-null   object 
dtypes: float64(2), int64(1), object(6)
memory usage: 351.7+ KB


In [103]:
df['Event_Name'].value_counts()

Event_Name
Ramadan                695
Eid Ul Fitr            556
Wedding Season         556
National Health Day    556
Eid Ul Adha            555
Kashmir Day            417
Muharram               417
Labour Day             417
National Food Day      417
Independence Day       414
Name: count, dtype: int64

In [104]:
df['Promotional_Type'].value_counts()

Promotional_Type
Social Media Campaigns                 973
Discount offers                        971
Celebrity Endorsements                 417
Charity Promotions                     417
TV and Radio Campaigns                 277
Retail Partnerships                    277
Sponsorship of Local Events            139
Educational Campaigns                  139
In-Store Displays                      139
Wedding Packages                       139
Recipe Contests                        139
Promotions For Kashmir Relief          139
Collaborations with NGOs               139
Special Packaging                      139
Branding with Religious themes         139
Collaborations with National Events    139
Advertisements                         139
Partnership with Nutritionists         139
Name: count, dtype: int64

In [105]:
df.shape

(5000, 9)

In [106]:
le_event = LabelEncoder()
le_type = LabelEncoder()
le_city = LabelEncoder()

In [107]:
df["Event_Name"] = le_event.fit_transform(df["Event_Name"])
df["Promotional_Type"] = le_type.fit_transform(df["Promotional_Type"])
df["City"] = le_city.fit_transform(df["City"])

In [108]:
df.head()

Unnamed: 0,Sale_ID,Date,Event_Type,Event_Name,Promotional_Type,Promotional_Budget,Sales_Revenue,ROI,City
0,SALE_00001,1/8/2021,Islamic Event,0,13,371581,14469414.16,37.94,5
1,SALE_00002,1/15/2021,Islamic Event,0,3,249464,810458.56,2.25,3
2,SALE_00003,1/22/2021,Islamic Event,0,2,735053,2016751.77,1.74,1
3,SALE_00004,1/29/2021,Islamic Event,3,6,685627,1137531.68,0.66,9
4,SALE_00005,2/5/2021,Islamic Event,3,16,388376,889536.81,1.29,11


In [109]:
X=df[['Event_Name','Promotional_Budget','Promotional_Type','City','Sales_Revenue']]
y=df['ROI']

In [110]:
X.shape

(5000, 5)

In [111]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = MinMaxScaler()
X_train_normalized = scaler.fit_transform(X_train) 
X_test_normalized = scaler.transform(X_test)
model = LinearRegression()
model.fit(X_train, y_train)



In [112]:
# dump(model, 'roi_model.pkl')
# print("Model saved successfully!")

In [113]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R-squared (R²): {r2:.2f}")

Mean Squared Error (MSE): 0.31
Mean Absolute Error (MAE): 0.36
R-squared (R²): 0.64


In [114]:
X=df[['Event_Name','ROI','Promotional_Type','City','Sales_Revenue']]
y=df['Promotional_Budget']

In [115]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = MinMaxScaler()
X_train_normalized = scaler.fit_transform(X_train) 
X_test_normalized = scaler.transform(X_test)
model = LinearRegression()
model.fit(X_train, y_train)


In [116]:
# dump(model, 'budget_model.pkl')
# print("Model saved successfully!")

In [117]:
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R-squared (R²): {r2:.2f}")

Mean Squared Error (MSE): 9728175821.78
Mean Absolute Error (MAE): 77639.16
R-squared (R²): 0.85


In [118]:
# joblib.dump(le_event, "event_encoder.pkl")
# joblib.dump(le_type, "type_encoder.pkl")
# joblib.dump(le_city, "city_encoder.pkl")