In [8]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf

from tensorflow_addons.metrics import RSquare

In [9]:
data = pd.read_csv('dataset.csv')

In [10]:
data

Unnamed: 0,Name,Publisher,Year,Genre,Merketing_Cost,Critic_Score,Rating,Global_Sale
0,Monster Hunter Rise,Capcom,2022,Card Game,,88,M,11200000
1,The Anacrusis,Stray Bombay Company,2022,Shooter,,51,T,200000
2,God of War,PlayStation PC,2022,Action,,94,M,60000128
3,Rainbow Six Extraction,Ubisoft,2022,Action,,71,M,9000000
4,Windjammers 2,DotEmu,2022,Sports,,78,E,40688
...,...,...,...,...,...,...,...,...
70,Crossfire: Legion,Prime Matter,2022,Real-Time,,64,T,10073
71,Dragon Quest Treasures,Square Enix,2022,Role-Playing,,73,E,5511110
72,High on Life,Squanch Games,2022,Shooter,,67,M,25000999
73,Crisis Core: Final Fantasy VII Reunion,Square Enix,2022,Action,,83,T,3963266


In [11]:
columns_to_drop = ['Name','Merketing_Cost']

data.drop(columns_to_drop, axis=1, inplace=True)

In [12]:
data

Unnamed: 0,Publisher,Year,Genre,Critic_Score,Rating,Global_Sale
0,Capcom,2022,Card Game,88,M,11200000
1,Stray Bombay Company,2022,Shooter,51,T,200000
2,PlayStation PC,2022,Action,94,M,60000128
3,Ubisoft,2022,Action,71,M,9000000
4,DotEmu,2022,Sports,78,E,40688
...,...,...,...,...,...,...
70,Prime Matter,2022,Real-Time,64,T,10073
71,Square Enix,2022,Role-Playing,73,E,5511110
72,Squanch Games,2022,Shooter,67,M,25000999
73,Square Enix,2022,Action,83,T,3963266


In [13]:
data.isnull().sum()

Publisher       0
Year            0
Genre           0
Critic_Score    0
Rating          0
Global_Sale     0
dtype: int64

# Encoading

In [14]:
data

Unnamed: 0,Publisher,Year,Genre,Critic_Score,Rating,Global_Sale
0,Capcom,2022,Card Game,88,M,11200000
1,Stray Bombay Company,2022,Shooter,51,T,200000
2,PlayStation PC,2022,Action,94,M,60000128
3,Ubisoft,2022,Action,71,M,9000000
4,DotEmu,2022,Sports,78,E,40688
...,...,...,...,...,...,...
70,Prime Matter,2022,Real-Time,64,T,10073
71,Square Enix,2022,Role-Playing,73,E,5511110
72,Squanch Games,2022,Shooter,67,M,25000999
73,Square Enix,2022,Action,83,T,3963266


In [15]:
onehot_columns = ['Publisher', 'Genre', 'Rating']

In [16]:
def onehot_encode(data, columns):
    for column in columns:
        dummies = pd.get_dummies(data[column])
        data = pd.concat([data, dummies], axis=1)
        data.drop(column, axis=1, inplace=True)
    return data

In [17]:
data = onehot_encode(data, onehot_columns)

In [18]:
data

Unnamed: 0,Year,Critic_Score,Global_Sale,2K Games,2K GamesTake-Two Interactive,ATLUS,Amazon Game Studios,Animal Uprising,Atari,Bandai Namco Games,...,Racing,Real-Time,Role-Playing,Shooter,Simulation,Sports,Strategy,E,M,T
0,2022,88,11200000,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,2022,51,200000,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
2,2022,94,60000128,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,2022,71,9000000,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,2022,78,40688,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,2022,64,10073,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,1
71,2022,73,5511110,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0
72,2022,67,25000999,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,1,0
73,2022,83,3963266,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


# Scaling

In [19]:
y = data['Global_Sale']
X = data.drop('Global_Sale', axis=1)

In [20]:
X

Unnamed: 0,Year,Critic_Score,2K Games,2K GamesTake-Two Interactive,ATLUS,Amazon Game Studios,Animal Uprising,Atari,Bandai Namco Games,Bay 12 GamesKitfox Games,...,Racing,Real-Time,Role-Playing,Shooter,Simulation,Sports,Strategy,E,M,T
0,2022,88,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,2022,51,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
2,2022,94,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,2022,71,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,2022,78,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70,2022,64,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,1
71,2022,73,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,1,0,0
72,2022,67,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,1,0
73,2022,83,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [21]:
scaler = StandardScaler()

X = scaler.fit_transform(X)

In [22]:
X.shape

(75, 79)

# Training

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

In [31]:
from sklearn.linear_model import LinearRegression

In [32]:
regressor = LinearRegression()

In [33]:
regressor.fit(X_train, y_train)

LinearRegression()

In [35]:
pred = regressor.predict(X_test)

In [36]:
pred

array([ 4.04192000e+06, -5.76772267e+18, -2.38010581e+18,  4.25664000e+05,
        4.75721600e+07, -1.40688017e+18, -3.35313699e+18,  2.98464000e+06,
       -1.03208206e+18, -2.38010581e+18, -3.35313699e+18, -1.40707463e+18,
        4.59616000e+06, -3.11767030e+18,  7.37564491e+17])

In [38]:
regressor.score(X_test,y_test)

-9.35390036589403e+22