In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns

## Importing CSV as DataFrame

In [4]:
# Importing train and test data
X_train = pd.read_csv('C:/Users/S580381/Documents/GitHub/marketing_camp_rev_pred/data/X_train.csv')
y_train = pd.read_csv('C:/Users/S580381/Documents/GitHub/marketing_camp_rev_pred/data/y_train.csv')
X_test = pd.read_csv('C:/Users/S580381/Documents/GitHub/marketing_camp_rev_pred/data/X_test.csv')
y_test = pd.read_csv('C:/Users/S580381/Documents/GitHub/marketing_camp_rev_pred/data/y_test.csv')

In [5]:
X_train.head()

Unnamed: 0,Impressions,Conversion_Rate,Spent,Clicks,Cpa,Age
0,5117,0.0,0.0,0,0.0,32
1,478480,0.04,135.750001,75,45.25,47
2,76355,0.222222,14.63,9,7.315,37
3,475184,0.045455,127.320003,88,31.830001,37
4,208572,0.055556,60.76,36,30.38,42


## Our data is encoded, standarized, relevant features are selected, and dataset is split into train and test splits. Now we'll start building our models.

In [7]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from math import sqrt

Creating an Evaluate function to evaluate all the metrics after model training.

In [8]:
def evaluate_model(true, predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = sqrt(mse)
    r2 = r2_score(true, predicted)
    
    return mae, mse, rmse, r2

In [None]:
models = {
    'Decision Tree': DecisionTreeRegressor(random_state=42),
    'Random Forest': RandomForestRegressor(random_state=42),
    'Extra Trees': ExtraTreesRegressor(random_state=42),
    'Gradient Boosting': GradientBoostingRegressor(random_state=42),
    'XGBoost': XGBRegressor(random_state=42, eval_metric='mae'),
    'LightGBM': LGBMRegressor(random_state=42)
}

model_list = []
r2_list = []

for i in range(len(models)):
    model = list(models.values())[i]
    model_name = list(models.keys())[i]
    
    # Fit the model
    model.fit(X_train, y_train)
    
    # Predictions 
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)
    
    # Evalute Train and Test set
    mae_train, mse_train, rmse_train, r2_train = evaluate_model(y_train, y_train_pred)
    mae_test, mse_test, rmse_test, r2_test = evaluate_model(y_test, y_test_pred)
    
    # Append results to lists
    print(list(models.keys())[i])
    model_list.append(model_name)
    
    print("Model performance on Train set:")
    print(f"MAE: {mae_train:.4f}")
    print(f"MSE: {mse_train:.4f}")
    print(f"RMSE: {rmse_train:.4f}")
    print(f"R2: {r2_train:.4f}")
    
    print('-------------------------------------')
    
    print("Model performance on Test set:")
    print(f"MAE: {mae_test:.4f}")
    print(f"MSE: {mse_test:.4f}")
    print(f"RMSE: {rmse_test:.4f}")
    print(f"R2: {r2_test:.4f}")
    
    r2_list.append(r2_test)
    
    print('='*40)
    print('\n')