In [7]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tflearn
import glob
from tflearn.data_utils import to_categorical, pad_sequences
from sklearn import preprocessing
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# load the IGN Dataset 


In [8]:
df = pd.read_csv("ign.csv")
print(df.head())

   Unnamed: 0 score_phrase                                              title  \
0           0      Amazing                            LittleBigPlanet PS Vita   
1           1      Amazing  LittleBigPlanet PS Vita -- Marvel Super Hero E...   
2           2        Great                               Splice: Tree of Life   
3           3        Great                                             NHL 13   
4           4        Great                                             NHL 13   

                                                 url          platform  score  \
0             /games/littlebigplanet-vita/vita-98907  PlayStation Vita    9.0   
1  /games/littlebigplanet-ps-vita-marvel-super-he...  PlayStation Vita    9.0   
2                          /games/splice/ipad-141070              iPad    8.5   
3                      /games/nhl-13/xbox-360-128182          Xbox 360    8.5   
4                           /games/nhl-13/ps3-128181     PlayStation 3    8.5   

        genre editors_choi

# Convert scores in a suitable form

In [9]:
# Our labels will be generated starting from data of column 'score_phrase'
labels = list(df['score_phrase'].unique())
print("Unique scores:\n", labels)
# To generate a baseline for the classification we convert them to positive and
# negative
positive = ['Masterpiece', 'Amazing', 'Great', 'Good', 'Okay']
negative = ['Awful', 'Mediocre', 'Bad', 'Painful', 'Unbearable', 'Disaster']

def convert_to_binary(val):
    if val in positive:
        return 1#'positive'
    elif val in negative:
        return 0#'negative'
    else:
        return val

df['binary'] = df['score_phrase'].apply(convert_to_binary)
#print(df['binary'].unique())


# Create a mapping to integers for the labels, to be used later with a more complicated RNN
le = preprocessing.LabelEncoder()
le.fit(labels)
map_labels = { l : i for l, i in zip(labels, le.transform(labels)) }
print("Mapping from scores to integers:\n", map_labels)
df['multiple'] = df['score_phrase'].map(map_labels)
#print(df['multiple'][:5])
#print(df['score_phrase'][:5])
#print(le.transform(['Amazing', 'Great']))

Unique scores:
 ['Amazing', 'Great', 'Good', 'Awful', 'Okay', 'Mediocre', 'Bad', 'Painful', 'Unbearable', 'Disaster', 'Masterpiece']
Mapping from scores to integers:
 {'Great': 5, 'Disaster': 3, 'Bad': 2, 'Good': 4, 'Painful': 9, 'Unbearable': 10, 'Awful': 1, 'Amazing': 0, 'Okay': 8, 'Mediocre': 7, 'Masterpiece': 6}


In [10]:
print(df.columns)
# We don't have a full review but we can combine several columns to get one.
# We assume url and release date are not relevant for the final score
print(df['editors_choice'].unique())
# 'editors_choice' is composed by Y and N only
print(df.loc[df['editors_choice'] == 'Y', 'binary'].value_counts())
print(df.loc[df['editors_choice'] == 'N', 'binary'].value_counts())
# We can see that such feature is quite important since whenever the game
# is an editor's choice it gets always a positive review. Since we want to
# perform sentiment analysis on reviwews it is better to convert 'Y' in something
# more clear like 'editors_choice'
df['editors_choice'] = df['editors_choice'].map({'Y' : 'editors_choice', 'N': ''})

to_merge = ['title', 'platform', 'genre', 'editors_choice']

def insert_space(val):
    if pd.isnull(val):
        return ' '
    else:
        return val + ' '

df['predictors'] = df['title'].apply(insert_space)
for col in to_merge[1:]:

    df['predictors'] += df[col].apply(insert_space)

#print(df[['predictors', 'title', 'platform', 'genre', 'editors_choice']].head())


Index(['Unnamed: 0', 'score_phrase', 'title', 'url', 'platform', 'score',
       'genre', 'editors_choice', 'release_year', 'release_month',
       'release_day', 'binary', 'multiple'],
      dtype='object')
['Y' 'N']
1    3517
Name: binary, dtype: int64
1    10801
0     4307
Name: binary, dtype: int64


# Prepare the NN

In [11]:
X = df['predictors']#word_vectors
y = df['binary']

# Now we can create our test and training set
trainX, testX, trainY, testY = train_test_split(X, y,
test_size=0.33, random_state=1)

# and Convert the 'predictors' column into vectors (bag of word)
vectorizer = CountVectorizer()
print(trainX[:2])

trainX = vectorizer.fit_transform(trainX)
testX  = vectorizer.transform(testX) # the use of vectorizer ensures that words not seen during training are ignored
print(trainX[:2])
print(trainX.todense())

print(trainX.shape)
vocab = vectorizer.get_feature_names()
#print(vocab)

word2idx = vectorizer.vocabulary_

#print(word2idx)

print(trainX.shape)

print(trainX[:2])

# Probably there is a direct way to deal with sparse matrices and embedding,
# for now we will create manually the input data
def input_from_sparse(matrix):
    nonzeros = [val.nonzero()[1] for val in matrix]
    return pad_sequences(nonzeros, maxlen=100, value=0.)
# Sequence padding
trainX = input_from_sparse(trainX)
testX  = input_from_sparse(testX)

print(trainX[:2])

# Converting labels to binary vectors
trainY = to_categorical(trainY, nb_classes=2)
testY = to_categorical(testY, nb_classes=2)

# Network building
tf.reset_default_graph()

net = tflearn.input_data([None, trainX.shape[1]])
net = tflearn.embedding(net, input_dim=len(vocab), output_dim=128)
net = tflearn.lstm(net, 128, dropout=0.8)
net = tflearn.fully_connected(net, 2, activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
                         loss='categorical_crossentropy')

model = tflearn.DNN(net, tensorboard_verbose=0)

12037    Tomb Raider Underworld PlayStation 3 Action  
616           Star Fox 64 Nintendo 64 Flight, Action  
Name: predictors, dtype: object
  (0, 208)	1
  (0, 4421)	1
  (0, 6141)	1
  (0, 4650)	1
  (0, 5911)	1
  (1, 2294)	1
  (1, 4037)	1
  (1, 154)	2
  (1, 2349)	1
  (1, 5507)	1
  (1, 208)	1
[[0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 ..., 
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]
 [0 0 0 ..., 0 0 0]]
(12478, 6625)
(12478, 6625)
  (0, 208)	1
  (0, 4421)	1
  (0, 6141)	1
  (0, 4650)	1
  (0, 5911)	1
  (1, 2294)	1
  (1, 4037)	1
  (1, 154)	2
  (1, 2349)	1
  (1, 5507)	1
  (1, 208)	1
[[   0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    0    0    0    0    0    0    0
     0    0    0    0    0    0    0    

# Train or load an already trained NN

In [12]:
print("Start training")
# Training
save_fn = './binary_model.tflearn'

if glob.glob(save_fn + '*'):
    print("*"*80)
    model.load(save_fn)
    print('Model loaded from file.')
else:
    model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True,
            batch_size=32)
    model.save(save_fn)
    print('Model successfully trained and saved.')




Training Step: 3900  | total loss: [1m[32m0.11814[0m[0m
| Adam | epoch: 010 | loss: 0.11814 - acc: 0.9385 | val_loss: 0.78709 - val_acc: 0.7672 -- iter: 12478/12478
Training Step: 3900  | total loss: [1m[32m0.11814[0m[0m
| Adam | epoch: 010 | loss: 0.11814 - acc: 0.9385 | val_loss: 0.78709 - val_acc: 0.7672 -- iter: 12478/12478
--
Model successfully trained and saved.


# Test Accuracy

In [13]:
predictions = model.predict(testX)

p0 = np.rint(np.array(predictions)[:, 0])
y0 = np.rint(testY[:, 0])

#print(p0[:2], y0[:2])
print("This should be very close to the val_acc obtained from the training")
print(accuracy_score(y0, p0))


print('\nManually compare some data')
for p,r in zip(predictions[:5], testY[:5]):
    print(p,r)

print('\nClassification report')
print(classification_report(y0, p0))


This should be very close to the val_acc obtained from the training
0.765414023101

Manually compare some data
[0.06874389946460724, 0.931256115436554] [ 0.  1.]
[4.274999355402542e-06, 0.9999957084655762] [ 0.  1.]
[0.0001945694093592465, 0.9998055100440979] [ 0.  1.]
[2.7417359888204373e-05, 0.9999725818634033] [ 0.  1.]
[1.7396172324879444e-06, 0.9999982118606567] [ 0.  1.]

Classification report
             precision    recall  f1-score   support

        0.0       0.83      0.87      0.85      4675
        1.0       0.51      0.44      0.47      1472

avg / total       0.75      0.77      0.76      6147



# Multiple labels


In [14]:
X = df['predictors']
y = df['multiple']

trainX, testX, trainY, testY = train_test_split(X, y,
test_size=0.33, random_state=1)


vectorizer = CountVectorizer()
trainX = vectorizer.fit_transform(trainX)
testX  = vectorizer.transform(testX) # the use of vectorizer ensures that words not seen during training are ignored
vocab = vectorizer.get_feature_names()
word2idx = vectorizer.vocabulary_

# Probably there is a direct way to deal with sparse matrices and embedding,
# for now we will create manually the input data
def input_from_sparse(matrix):
    nonzeros = [val.nonzero()[1] for val in matrix]
    # Here maxlen can be tweaked
    return pad_sequences(nonzeros, maxlen=100, value=0.)

# Sequence padding
trainX = input_from_sparse(trainX)
testX  = input_from_sparse(testX)

# Converting labels to full class vectors
trainY = to_categorical(trainY, nb_classes=len(labels))
testY = to_categorical(testY, nb_classes=len(labels))

print(trainX.shape, trainY.shape)
print(testX.shape, testY.shape)

# Network building
tf.reset_default_graph()

net = tflearn.input_data([None, trainX.shape[1]])
net = tflearn.embedding(net, input_dim=len(vocab), output_dim=128)
net = tflearn.lstm(net, 128, dropout=0.8)
net = tflearn.fully_connected(net, trainY.shape[1], activation='softmax')
net = tflearn.regression(net, optimizer='adam', learning_rate=0.001,
                         loss='categorical_crossentropy')

model = tflearn.DNN(net, tensorboard_verbose=0)


(12478, 100) (12478, 11)
(6147, 100) (6147, 11)


# Train or load an already trained NN

In [15]:
print("Start training")
save_fn = './multiple_model.tflearn'

if glob.glob(save_fn + '*'):
    print("*"*80)
    model.load(save_fn)
    print('Model loaded from file.')
else:
    model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True, batch_size=32)
    model.save(save_fn)
    print('Model successfully trained and saved.')






Training Step: 3900  | total loss: [1m[32m1.09280[0m[0m
| Adam | epoch: 010 | loss: 1.09280 - acc: 0.5964 | val_loss: 1.70123 - val_acc: 0.3659 -- iter: 12478/12478
Training Step: 3900  | total loss: [1m[32m1.09280[0m[0m
| Adam | epoch: 010 | loss: 1.09280 - acc: 0.5964 | val_loss: 1.70123 - val_acc: 0.3659 -- iter: 12478/12478
--
Model successfully trained and saved.


# Test Accuracy

In [16]:
predictions = model.predict(testX)

for i in range(testY.shape[1]):
    p0 = np.rint(np.array(predictions)[:, i])
    y0 = np.rint(testY[:, i])

    #print(p0[:2], y0[:2])
    print("This shows how every single label is predicted")
    print(accuracy_score(y0, p0))
    print('\nClassification report')
    print(classification_report(y0, p0))


    
p = np.argmax(predictions, axis=1)
y = np.argmax(testY, axis=1)
#print(p[:2])
#print(predictions[:2])
print("This shows the accuracy if the label is chosen based on the largest probability, it should be similar to val_acc obtained from the training")
print(accuracy_score(y, p))
print('\nClassification report')
print(classification_report(y, p))




print('\nManually compare some data')
for p,r in zip(predictions[:5], testY[:5]):
    print(p,r)


from sklearn.metrics import log_loss
print("log_loss: ", log_loss(testY, predictions, normalize=False))
print("log_loss normalized: ", log_loss(testY, predictions, normalize=True))

#Manual check
#p = np.rint(predictions)
#print("log_loss normalized: ", log_loss(testY, p, normalize=True))
#result = [np.allclose(i,j) for i, j in zip(testY, p)]
#print(np.mean(result))

This shows how every single label is predicted
0.918984870669

Classification report
             precision    recall  f1-score   support

        0.0       0.98      0.93      0.95      5607
        1.0       0.53      0.78      0.63       540

avg / total       0.94      0.92      0.93      6147

This shows how every single label is predicted
0.965836993655

Classification report
             precision    recall  f1-score   support

        0.0       0.97      1.00      0.98      5937
        1.0       0.00      0.00      0.00       210

avg / total       0.93      0.97      0.95      6147

This shows how every single label is predicted
0.926793557833

Classification report
             precision    recall  f1-score   support

        0.0       0.93      1.00      0.96      5697
        1.0       0.00      0.00      0.00       450

avg / total       0.86      0.93      0.89      6147

This shows how every single label is predicted
0.999674638035

Classification report
             pr

  'precision', 'predicted', average, warn_for)
