In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt



from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split 

import tensorflow as tf
from tensorflow_addons.metrics import RSquare

In [None]:
data = pd.read_csv('../input/videogamesales/vgsales.csv' , index_col='Rank')
data

# Preprocessing

In [None]:
data = data.drop(columns= ['Name','NA_Sales','EU_Sales','JP_Sales','Other_Sales' ]) #Drop columns we don't need

In [None]:
data

In [None]:
data.isnull().sum() #Seeing sum the blank 

In [None]:
data['Year'] = data['Year'].fillna(data['Year'].mean()) #We filled in the blank with the average value

In [None]:
data = data.dropna(axis=0) #Drop columns blank we don't need 

In [None]:
data.isnull().sum() #cheek the blank 

In [None]:
data

In [None]:
data['Platform'].unique() #Show unique values

In [None]:
data['Genre'].unique() #Show unique values

In [None]:
counts = data['Publisher'].value_counts() 

data['Publisher'] = data['Publisher'].apply(lambda x: 'Small Publisher' if counts[x] < 50 else x) #split into samples

In [None]:
data

In [None]:
onehot_columns = ['Platform', 'Genre', 'Publisher'] #Insert columns under a new name

In [None]:
def onehot_encode(data, columns): #merge the old data with a new name (onehot_columns)
    for column in columns: #farloop for each the columns
        dummies = pd.get_dummies(data[column]) #the old data with it's columns
        data = pd.concat([data, dummies], axis=1) #merge the columns from dummies & data 
        data.drop(column, axis=1, inplace=True) #drop the columns we meraged 
    return data

In [None]:
data = onehot_encode(data, onehot_columns) #

In [None]:
data 

In [None]:
y = data['Global_Sales']
X = data.drop('Global_Sales', axis=1)

In [None]:
y

In [None]:
X

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)


In [None]:
X.shape

# Training 

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)


In [None]:
#Building a Tensorflow Model 
inputs = tf.keras.Input(shape=(91,)) #shape a number = 91 columns & all 
x = tf.keras.layers.Dense(128, activation='relu')(inputs) #proccess activation for inputs
x = tf.keras.layers.Dense(128, activation='relu')(x)##proccess activation for x
outputs = tf.keras.layers.Dense(1)(x) # one output

model = tf.keras.Model(inputs=inputs, outputs=outputs) #After defining the input and output we put them in the form


optimizer = tf.keras.optimizers.RMSprop(0.001) # Optimized Build by RMS

model.compile( 
    optimizer=optimizer,
    loss='mse'
) #Form Compilation



batch_size = 64 #Sample volume
epochs = 300

history = model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    batch_size=batch_size,
    epochs=epochs,
    verbose=0
) #We gave him previous the difintion so he could memorize them

# Results

In [None]:
plt.figure(figsize=(14, 10)) #size for a figure

epochs_range = range(1, epochs + 1) # range  from epochs and plus 
train_loss = history.history['loss'] 
val_loss = history.history['val_loss']

plt.plot(epochs_range, train_loss, label="Training Loss")
plt.plot(epochs_range, val_loss, label="Validation Loss")

plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()


plt.show()

In [None]:
np.argmin(val_loss)

In [None]:
y_pred = np.squeeze(model.predict(X_test))

result = RSquare()
result.update_state(y_test, y_pred)

print("R^2 Score:", result.result())

In [None]:
model.evaluate(X_test, y_test)

In [None]:
history.history['val_loss']