In [3]:
import pandas as pd
import statistics
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

In [4]:
# Load the dataset
data = pd.read_csv('event_sponsorship_roi_dataset.csv')

# Display the first few rows of the dataset to understand its structure
data.head()


Unnamed: 0,Event Type,Sponsor Types,Sponsor Costs,Expected Footfall,Budget,Total Revenue,ROI
0,Festivals and fairs,Financial|Merchandise|In Kind,98964.44|20812.52|80396.99,10852,170896.91,208114.73,21.78
1,Sports events,Merchandise|In Kind|Food Stalls|Influencer|Inf...,30582.89|30125.97|27896.11|64051.25|54598.67,53523,144000.68,166587.41,15.69
2,Entertainment and media events,Merchandise|Influencer|Media Sponsorship|Phila...,37590.81|86547.63|15244.14|75434.66|15662.31,21050,184722.98,254145.28,37.58
3,Community and Charity events,Philanthropy|In Kind,8374.48|34478.24,1555,31870.87,52517.53,64.78
4,Conferences and seminars,In Kind|Philanthropy|Philanthropy,64147.12|77142.88|82455.06,1400,176779.21,272626.79,54.22


In [5]:
# Split 'Sponsor Costs' and convert to numeric
data['Sponsor Costs'] = data['Sponsor Costs'].apply(lambda x: statistics.mean(map(float, x.split('|'))))


In [6]:
data.head(2)

Unnamed: 0,Event Type,Sponsor Types,Sponsor Costs,Expected Footfall,Budget,Total Revenue,ROI
0,Festivals and fairs,Financial|Merchandise|In Kind,66724.65,10852,170896.91,208114.73,21.78
1,Sports events,Merchandise|In Kind|Food Stalls|Influencer|Inf...,41450.978,53523,144000.68,166587.41,15.69


In [7]:
# Split 'Sponsor Types' and create dummy variables
sponsor_types = data['Sponsor Types'].str.get_dummies(sep='|')
data = pd.concat([data.drop('Sponsor Types', axis=1), sponsor_types], axis=1)


In [8]:
data.head(2)

Unnamed: 0,Event Type,Sponsor Costs,Expected Footfall,Budget,Total Revenue,ROI,Financial,Food Stalls,In Kind,Influencer,Media Sponsorship,Merchandise,Philanthropy
0,Festivals and fairs,66724.65,10852,170896.91,208114.73,21.78,1,0,1,0,0,1,0
1,Sports events,41450.978,53523,144000.68,166587.41,15.69,0,1,1,1,0,1,0


In [9]:
data = pd.get_dummies(data, columns=['Event Type'], dtype=int)

In [10]:
data.head()

Unnamed: 0,Sponsor Costs,Expected Footfall,Budget,Total Revenue,ROI,Financial,Food Stalls,In Kind,Influencer,Media Sponsorship,Merchandise,Philanthropy,Event Type_Community and Charity events,Event Type_Conferences and seminars,Event Type_Entertainment and media events,Event Type_Festivals and fairs,Event Type_Sports events,Event Type_Virtual event
0,66724.65,10852,170896.91,208114.73,21.78,1,0,1,0,0,1,0,0,0,0,1,0,0
1,41450.978,53523,144000.68,166587.41,15.69,0,1,1,1,0,1,0,0,0,0,0,1,0
2,46095.91,21050,184722.98,254145.28,37.58,0,0,1,1,1,1,1,0,0,1,0,0,0
3,21426.36,1555,31870.87,52517.53,64.78,0,0,1,0,0,0,1,1,0,0,0,0,0
4,74581.686667,1400,176779.21,272626.79,54.22,0,0,1,0,0,0,1,0,1,0,0,0,0


In [11]:
# Separate features and target
X = data.drop(['Total Revenue', 'ROI'], axis=1)
y = data['Total Revenue']

In [12]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [19]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

import pickle
scaler_filename = 'scaler.pkl'
with open(scaler_filename, 'wb') as file:
    pickle.dump(scaler, file)

In [14]:
r1=LinearRegression()
r2=Lasso()
r3=Ridge()
r4=ElasticNet()
r5=SVR()
r6=DecisionTreeRegressor()
r7=RandomForestRegressor()
reg=[r1,r2,r3,r4,r5,r6,r7]
names=['LR','LASSO','RIDGE','EL','SVR','DTR','RFR']
mse={}
r2s={}
t={}
import time
from sklearn.metrics import mean_squared_error,r2_score

################################################################################

for model,name in zip(reg,names):
  t1=time.time()
  model.fit(X_train_scaled,y_train)
  y_pred=model.predict(X_test_scaled)
  t2=time.time()
  mse[name]=mean_squared_error(y_test,y_pred)
  r2s[name]=r2_score(y_test,y_pred)
  t[name]=t2-t1

In [15]:
for i,j in r2s.items():
  print(i,':',j*100)

LR : 92.03141235916932
LASSO : 92.0313341301474
RIDGE : 92.0076191531405
EL : 29.8515781246934
SVR : -2.9409384692581364
DTR : 83.7578820447371
RFR : 91.39937214093355


In [18]:
import pickle
# Initialize and train the Lasso model
lasso = Lasso()

t1 = time.time()
lasso.fit(X_train_scaled, y_train)
y_pred = lasso.predict(X_test_scaled)
t2 = time.time()

# Evaluate the performance
mse_lasso = mean_squared_error(y_test, y_pred)
r2_lasso = r2_score(y_test, y_pred)
training_time = t2 - t1

print('LASSO R^2:', r2_lasso * 100)
print('LASSO MSE:', mse_lasso)
print('LASSO Training Time:', training_time)

# Save the trained Lasso model using pickle
model_filename = 'lasso_model.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(lasso, file)

print(f'Trained Lasso model saved as {model_filename}')


LASSO R^2: 92.0313341301474
LASSO MSE: 1232849665.1988306
LASSO Training Time: 0.014969348907470703
Trained Lasso model saved as lasso_model.pkl


In [16]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=[X_train_scaled.shape[1]]),
    tf.keras.layers.Dense(128, activation='relu', input_shape=[X_train_scaled.shape[1]]),
    tf.keras.layers.Dense(256, activation='relu', input_shape=[X_train_scaled.shape[1]]),
    tf.keras.layers.Dense(256, activation='relu', input_shape=[X_train_scaled.shape[1]]),
    tf.keras.layers.Dense(128, activation='relu', input_shape=[X_train_scaled.shape[1]]),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Train the model
t1 = time.time()
model.fit(X_train_scaled, y_train, epochs=200, verbose=0)
t2 = time.time()

# Predict and evaluate
y_pred_ann = model.predict(X_test_scaled)
mse['ANN'] = mean_squared_error(y_test, y_pred_ann)
r2s['ANN'] = r2_score(y_test, y_pred_ann)
t['ANN'] = t2 - t1

for i, j in r2s.items():
  print(i, '=:=', j)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
LR =:= 0.9203141235916932
LASSO =:= 0.920313341301474
RIDGE =:= 0.920076191531405
EL =:= 0.298515781246934
SVR =:= -0.029409384692581364
DTR =:= 0.8375788204473711
RFR =:= 0.9139937214093355
ANN =:= 0.9095639358996173
