In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import tensorflow as tf
import keras_tuner as kt

In [2]:
#import csv
game_df = pd.read_csv('data/videogames_final.csv')
#drop extra columns
game_df.drop(columns = ['Unnamed: 0','NA_Sales','EU_Sales','JP_Sales','Other_Sales'],inplace = True)
game_df

Unnamed: 0,Game_Title,Platform,Year,Genre,Publisher,Global_Sales
0,Wii Sports,Wii,2006.0,Sports,Nintendo,82.74
1,Super Mario Bros.,NES,1985.0,Platform,Nintendo,40.24
2,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,35.82
3,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,33.00
4,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,31.37
...,...,...,...,...,...,...
21171,Strawberry Nauts,PSV,2016.0,Adventure,Unknown,0.01
21172,Aiyoku no Eustia,PSV,2014.0,Misc,dramatic create,0.01
21173,Samurai Warriors: Sanada Maru,PS3,2016.0,Action,Tecmo Koei,0.01
21174,Haitaka no Psychedelica,PSV,2016.0,Adventure,Idea Factory,0.01


In [3]:
game_df['Success_sales'] = np.where(game_df['Global_Sales']>=4, 'good','bad')

In [4]:
game_df.head()

Unnamed: 0,Game_Title,Platform,Year,Genre,Publisher,Global_Sales,Success_sales
0,Wii Sports,Wii,2006.0,Sports,Nintendo,82.74,good
1,Super Mario Bros.,NES,1985.0,Platform,Nintendo,40.24,good
2,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,35.82,good
3,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,33.0,good
4,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,31.37,good


In [5]:
#convert categorical into numerical
game_dummy = pd.get_dummies(game_df)
game_dummy

Unnamed: 0,Year,Global_Sales,Game_Title_ Beyblade Burst,Game_Title_ Fire Emblem Fates,Game_Title_ Frozen: Olaf's Quest,Game_Title_ Haikyu!! Cross Team Match!,Game_Title_ Tales of Xillia 2,Game_Title_'98 Koshien,Game_Title_.hack//G.U. Vol.1//Rebirth,Game_Title_.hack//G.U. Vol.2//Reminisce,...,Publisher_dramatic create,Publisher_fonfun,Publisher_iWin,Publisher_id Software,Publisher_imageepoch Inc.,Publisher_inXile Entertainment,"Publisher_mixi, Inc",Publisher_responDESIGN,Success_sales_bad,Success_sales_good
0,2006.0,82.74,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
1,1985.0,40.24,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,2008.0,35.82,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,2009.0,33.00,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4,1996.0,31.37,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21171,2016.0,0.01,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
21172,2014.0,0.01,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,1,0
21173,2016.0,0.01,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
21174,2016.0,0.01,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [6]:
# Split data into our features and target arrays
y = game_dummy['Success_sales_good']
X = game_dummy.drop(columns = 'Success_sales_good').values
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state= 70)

In [7]:
#StandardScaler
game_scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = game_scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [8]:
#create function to determine the best machine learning model
def create_model(hp): 
    number_input = len(X_train_scaled[0])
    game_model = tf.keras.models.Sequential()
    # Allow kerastuner to decide which activation function to use in hidden layers
    activation = hp.Choice('activation',['relu','tanh','sigmoid'])
    
    # Allow kerastuner to decide number of neurons in first layer
    game_model.add(tf.keras.layers.Dense(units=hp.Int('first_units',
        min_value=1,
        max_value=100,
        step=2), activation = activation, input_dim = number_input))
    # Allow kerastuner to decide number of hidden layers and neurons in hidden layers
    for i in range(hp.Int('num_layers', 1, 5)):
        game_model.add(tf.keras.layers.Dense(units=hp.Int('units_' + str(i),
            min_value=1,
            max_value=50,
            step=2),
            activation=activation))
    
    game_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))
    # Compile the model
    game_model.compile(loss="binary_crossentropy", optimizer='adam', metrics=["accuracy"])
    
    return game_model

In [9]:
#hyperband() to find the best hyperparameters
game_tuner = kt.Hyperband(
    create_model,
    objective='val_accuracy',
    max_epochs=25,
    hyperband_iterations=4)

In [10]:
# Kerastuner search for best hyperparameters
game_tuner.search(X_train_scaled,y_train,epochs=25,validation_data=(X_test_scaled,y_test))

Trial 120 Complete [00h 03m 00s]
val_accuracy: 0.9931998252868652

Best val_accuracy So Far: 0.9990555047988892
Total elapsed time: 01h 39m 20s
INFO:tensorflow:Oracle triggered exit


In [11]:
best_params = game_tuner.get_best_hyperparameters()[0]
best_params.values

{'activation': 'sigmoid',
 'first_units': 63,
 'num_layers': 4,
 'units_0': 1,
 'units_1': 21,
 'units_2': 29,
 'units_3': 3,
 'units_4': 25,
 'tuner/epochs': 25,
 'tuner/initial_epoch': 0,
 'tuner/bracket': 0,
 'tuner/round': 0}

In [12]:
best_model = game_tuner.get_best_models(1)[0]

In [13]:
model_loss, model_accuracy = best_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

166/166 - 1s - loss: 0.0210 - accuracy: 0.9991 - 581ms/epoch - 4ms/step
Loss: 0.02104298397898674, Accuracy: 0.9990555047988892


In [14]:
best_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 63)                760032    
                                                                 
 dense_1 (Dense)             (None, 1)                 64        
                                                                 
 dense_2 (Dense)             (None, 21)                42        
                                                                 
 dense_3 (Dense)             (None, 29)                638       
                                                                 
 dense_4 (Dense)             (None, 3)                 90        
                                                                 
 dense_5 (Dense)             (None, 1)                 4         
                                                                 
Total params: 760,870
Trainable params: 760,870
Non-trai