In [2]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor

In [3]:
models = {
    'Linear Regression': LinearRegression(),
    'Ridge': Ridge(),
    'Lasso': Lasso(),
    'KNN Regressor': KNeighborsRegressor(),
    'Support Vector Regressor': SVR(),
    'Decission Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor(),
    'AdaBoost Regressor': AdaBoostRegressor(),
    'Gradient Boosting Regressor': GradientBoostingRegressor(),
    'XGB Regressor': XGBRegressor()
}

In [4]:
# Load Data Set
df = pd.read_csv("data/Video Games Sales(Clean).csv")
df.head()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,Others,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,Others,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


In [103]:
df.columns

Index(['Rank', 'Name', 'Platform', 'Year', 'Genre', 'Publisher', 'NA_Sales',
       'EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales'],
      dtype='object')

In [104]:
# Seperate Target Variable
X = df.drop(columns=['Name', 'Global_Sales'], axis=1)
y = df['Global_Sales']

In [105]:
# Identify Numerical and Categorical Columns
categorical_columns = [var for var in X.columns if X[var].dtype == 'O']

In [106]:
# Encode Categorical Features
le = LabelEncoder()


for i in categorical_columns:
    X[i] = le.fit_transform(X[i])

In [109]:
columns = X.columns.to_list()

In [110]:
numeric_pipeline = Pipeline(
    [
        ("imputer", SimpleImputer(strategy="median")),
        ("scaler", StandardScaler())
    ]
)


transform = ColumnTransformer(
    transformers = [
        ("numeric", numeric_pipeline, columns)
    ]
)

In [111]:
X = transform.fit_transform(X)

In [118]:
# Tarin Test Split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [130]:
evalueate_model = []

In [120]:
def style(txt):
    print(f'{"-"*50}\n{txt}\n{"-"*50}')

In [None]:
def evaluate_model(model, X_train, y_train, X_test, y_test):
    
    # Test on Train Data
    y_pred_train = model.predict(X_train)
    
    # Test on Test Data
    y_pred = model.predict(X_test)
    

In [None]:
for key, value in models.items():
    # Train Mmodel
    
    model = value
    model.fit(X_train, y_train)    
    
    
    # Test the model
    y_pred = model.predict(X_test)
        
    # Calculate metrics
    style(f"Model: {key}")
    print(f"R2 Score: {round(r2_score(y_test, y_pred), 3)}")
    print(f"Mean Absolute Error: {round(mean_absolute_error(y_test, y_pred), 3)}")
    print(f"Mean Squared Error: {round(mean_squared_error(y_test, y_pred), 3)}")

[{'Model': 'Linear Regression', 'R2': 1.0},
 {'Model': 'Ridge', 'R2': 1.0},
 {'Model': 'Lasso', 'R2': 0.26},
 {'Model': 'KNN', 'R2': 0.77},
 {'Model': 'SVM', 'R2': 0.37},
 {'Model': 'Tree', 'R2': 0.84},
 {'Model': 'Forest', 'R2': 0.84},
 {'Model': 'AdaBoost', 'R2': 0.84},
 {'Model': 'Gradient Boost', 'R2': 0.87},
 {'Model': 'XGBoost', 'R2': 0.8},
 {'Model': 'Linear Regression', 'MAE': 0.0},
 {'Model': 'Ridge', 'MAE': 0.0},
 {'Model': 'Lasso', 'MAE': 0.53},
 {'Model': 'KNN', 'MAE': 0.07},
 {'Model': 'SVM', 'MAE': 0.12},
 {'Model': 'Tree', 'MAE': 0.02},
 {'Model': 'Forest', 'MAE': 0.02},
 {'Model': 'AdaBoost', 'MAE': 0.25},
 {'Model': 'Gradient Boost', 'MAE': 0.02},
 {'Model': 'XGBoost', 'MAE': 0.04}]