In [43]:
import pandas as pd
import tensorflow as tf
import sklearn

In [44]:
from tensorflow.keras.layers import Dense, Dropout, Activation, Input
from tensorflow.keras.models import Model
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

In [45]:
base = pd.read_csv("vgsales.csv")
base

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.00
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.00,31.37
...,...,...,...,...,...,...,...,...,...,...,...
16593,16596,Woody Woodpecker in Crazy Castle 5,GBA,2002.0,Platform,Kemco,0.01,0.00,0.00,0.00,0.01
16594,16597,Men in Black II: Alien Escape,GC,2003.0,Shooter,Infogrames,0.01,0.00,0.00,0.00,0.01
16595,16598,SCORE International Baja 1000: The Official Game,PS2,2008.0,Racing,Activision,0.00,0.00,0.00,0.00,0.01
16596,16599,Know How 2,DS,2010.0,Puzzle,7G//AMES,0.00,0.01,0.00,0.00,0.01


In [46]:
base = base.drop(['Other_Sales', 
                  'NA_Sales', 
                  'EU_Sales', 
                  'JP_Sales',
                  'Name'], axis = 1)

In [47]:
base.shape

(16598, 6)

In [48]:
base.isnull().sum()

Rank              0
Platform          0
Year            271
Genre             0
Publisher        58
Global_Sales      0
dtype: int64

In [49]:
base.columns

Index(['Rank', 'Platform', 'Year', 'Genre', 'Publisher', 'Global_Sales'], dtype='object')

In [50]:
X = base.iloc[: , [1, 2, 3, 4]].values
X

array([['Wii', 2006.0, 'Sports', 'Nintendo'],
       ['NES', 1985.0, 'Platform', 'Nintendo'],
       ['Wii', 2008.0, 'Racing', 'Nintendo'],
       ...,
       ['PS2', 2008.0, 'Racing', 'Activision'],
       ['DS', 2010.0, 'Puzzle', '7G//AMES'],
       ['GBA', 2003.0, 'Platform', 'Wanadoo']], dtype=object)

In [51]:
global_sales = base.iloc[:, 5].values

In [52]:
onehotencoder = ColumnTransformer(transformers=[("OneHot", OneHotEncoder(), [0, 2, 3])], remainder='passthrough')
X = onehotencoder.fit_transform(X).toarray()

In [53]:
X.shape

(16598, 623)

In [58]:
input_layer = Input(shape=(623,))

hidden_layer1 = Dense(units = 313, activation = 'relu')(input_layer)
activation1 = Activation('relu')(hidden_layer1)

hidden_layer2 = Dense(units = 313)(activation1)
activation2 = Activation('relu')(hidden_layer2)

output_layer = Dense(units = 1)(activation2)
activation_output = Activation('linear')(output_layer)

In [59]:
regressor = Model(inputs = input_layer, outputs = [output_layer])

In [60]:
regressor.compile(optimizer = 'adam', loss = 'mse')

In [61]:
regressor.fit(X, [global_sales], epochs = 500, batch_size = 100)

Epoch 1/500
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 421.8847
Epoch 2/500
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 3.1343
Epoch 3/500
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 2.5046
Epoch 4/500
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 2.6733
Epoch 5/500
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 2.7576
Epoch 6/500
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 2.1222
Epoch 7/500
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 2.1896
Epoch 8/500
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 2.7379
Epoch 9/500
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 2.4234
Epoch 10/500
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5

<keras.src.callbacks.history.History at 0x22d2ce161e0>

In [62]:
prev_global_sales = regressor.predict(X)

[1m519/519[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step


In [63]:
prev_global_sales.mean()

0.5392665

In [64]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(global_sales, prev_global_sales)

0.5917095505494586