In [1]:
# Load libraries
import os
import numpy as np
import pandas as pd
from keras import models
from keras import layers
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV, cross_val_score, KFold
from sklearn.datasets import make_classification
from sklearn.preprocessing import StandardScaler
from sklearn import preprocessing

# Set random seed
np.random.seed(0)

Using TensorFlow backend.


### Get Data

In [2]:
i_path = 'D:\\Data\\Box-Office-Forecasting'
m = pd.read_csv(os.path.join(i_path, 'movie-master-final.csv'), header=0, sep=';', engine='python', encoding= 'utf8')

#target_variable = 'revenue_range'; problem_type = 'MULTICLASS'
target_variable = 'is_profitable'; problem_type = 'BINARY'

all_features = ['mpaa', 'budget', 'seasonality', 'is_sequel', 'screen_count', 'runtime']
yc_features = ['like_ratio', 'polarity_tb', 'polarity_sia']
# all_features = all_features + yc_features
data = m[all_features + [target_variable]]

# like_ratio column has some 'infinity' values, we replace them with one.
data = data.replace(np.Inf, 1)

# Factorize revenue_range to get numbers instead of labels
factor = pd.factorize(data[target_variable])
data[target_variable] = factor[0]
definitions = factor[1]

# Use LabelEncoder to convert textual classifications to numeric. We will use the same encoder later to convert them back.
encoder = preprocessing.LabelEncoder()
if 'mpaa' in all_features:
    data['mpaa'] = encoder.fit_transform(data['mpaa'].astype(str))
if 'genre' in all_features:
    data['genre'] = encoder.fit_transform(data['genre'].astype(str))

# Split columns into independent/predictor variables vs dependent/response/outcome variable
X = np.array(data.drop([target_variable], 1))
y = np.array(data[target_variable])

# Scale the data. We will use the same scaler later for scoring function
scaler = preprocessing.StandardScaler().fit(X)
X = scaler.transform(X)

# Training - Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.20, random_state=17)

# 5 fold stratified cross validation
kf = KFold(n_splits=5, shuffle=True, random_state=17)

In [3]:
number_of_features = len(all_features)
# Create function returning a compiled network
def create_network(optimizer='rmsprop'):
    
    # Start neural network
    network = models.Sequential()

    # Add fully connected layer with a ReLU activation function
    network.add(layers.Dense(units=16, activation='relu', input_shape=(number_of_features,)))

    # Add fully connected layer with a ReLU activation function
    network.add(layers.Dense(units=16, activation='relu'))

    # Add fully connected layer with a sigmoid activation function
    network.add(layers.Dense(units=1, activation='sigmoid'))

    # Compile neural network
    network.compile(loss='binary_crossentropy', # Cross-entropy
                    optimizer=optimizer, # Optimizer
                    metrics=['accuracy']) # Accuracy performance metric
    
    # Return compiled network
    return network

### Grid search 

In [4]:
# Wrap Keras model so it can be used by scikit-learn
neural_network = KerasClassifier(build_fn=create_network, verbose=0)

# Create hyperparameter space
epochs = [5, 10]
batches = [32, 64]
#momentum = [0.5, 0.7, 0.9]
optimizers = ['rmsprop', 'adam']

# Create hyperparameter options
hyperparameters = dict(optimizer=optimizers, epochs=epochs, batch_size=batches)

# Create grid search
grid = GridSearchCV(estimator=neural_network, param_grid=hyperparameters)

# Fit grid search
grid_result = grid.fit(X, y)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.


KeyboardInterrupt: 

### CV Score with selected parameters

In [5]:
neural_network = KerasClassifier(build_fn=create_network, epochs=50, batch_size=32, optimizer='adam', verbose=1)
cv_score = cross_val_score(neural_network, X_train, y_train, cv=5) 
print(cv_score)
print(np.mean(cv_score))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

In [None]:
grid_result.best_score_

In [None]:
grid_result.best_params_

In [None]:
allscores=grid_result.cv_results_
print(allscores)