# Twitch Image Classifier
Originally, I did a project in the data analysis class in which we classified twitch images according to the shown game. Back then, we used SVMs and transfered learning.
Now, I want to do this project again - this time building my own network architecture. My goal is to get at least 80% accuracy.

## Setup
Please add the twitch images to the data directory. If you do not have any preview images, then you can use my twitch preview image crawler, which is also available on my github.

The following directory structure should be present

data/training/Game1/...

data/training/Game2/...

data/training/Game...

data/validation/Game1/...

data/validation/Game2/...

data/validataion/Game...

## Code

### Check for networkx version
For hyperas networkx version 1.11 is required. Thus, we need to check this first!

In [1]:
import networkx

assert networkx.__version__ == '1.11', 'You need to install networkx version 1.11, otherwise hyperas does not work'

### Check directory structure
In the first step we'll just check, if the right directory structure is present. This will hopefully save some time, if I work on this later again and I need to setup the environment again.

In [2]:
import os
import glob

# First check if data directory is present. This should be the case anyways, since this is set in the git repository,
# but we'll check :)

assert os.path.isdir('data'), 'The data directory is not present, but mandatory'

# Check if training and validation is present
assert os.path.isdir('data/training'), 'The training directory in the data directory is not present, but mandatory'
assert os.path.isdir('data/validation'), 'The validation directory in the data directory is not present, but mandatory'

# Check how many directories (== Games) there are in training and validation
training_games = [game for game in os.listdir("data/training/") if os.path.isdir('data/training/{}'.format(game))]
validation_games = [game for game in os.listdir("data/validation/") if os.path.isdir('data/validation/{}'.format(game))]
assert training_games == validation_games, 'The games must be the same in training and validation'

# Check how many games there are
assert len(training_games) >= 1, 'You need to add at least one game'
assert len(training_games) >= 2, 'For a real classification scenario you should add more than one game'

# Determine training and validation sample size
training_samples = len([name for name in glob.glob('data/training/*/*') if os.path.isfile(name)])
validation_samples = len([name for name in glob.glob('data/validation/*/*') if os.path.isfile(name)])
assert training_samples > validation_samples, 'You should use more training samples than validation samples'
assert training_samples > 32, 'You should use at least 32 files'

## Imports and Modeldefinition

In [3]:
from hyperopt import Trials, STATUS_OK, tpe
from hyperas import optim
from hyperas.distributions import uniform
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.datasets import cifar10
from keras.utils import np_utils

Using TensorFlow backend.


In [4]:
def model(train_generator, validation_generator):
    
    training_steps = training_samples // batch_size
    validation_steps = validation_samples // batch_size
    
    input_shape = (img_width, img_height, 3)
    
    model = Sequential()
    model.add(Conv2D(16, (3, 3), input_shape=input_shape))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Conv2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(64))
    model.add(Activation('relu'))
    model.add(Dropout({{uniform(0, 1)}}))
    model.add(Dense(nb_classes))
    model.add(Activation('sigmoid'))
    model.compile(loss=['categorical_crossentropy'],
                  optimizer='adam',
                  metrics=['accuracy'])

    model.fit_generator(
        train_generator,
        steps_per_epoch=training_steps,
        epochs=epochs,
        validation_data=validation_generator,
        validation_steps=validation_steps)

    score, acc = model.evaluate_generator(validation_generator)

    return {'loss': -acc, 'status': STATUS_OK, 'model': model}

## Data Definition
Here we define the data image size and the dataflows, so they can be used in the model.

In [None]:
def definitions():
    global batch_size, epochs, img_width, img_height, training_samples, validation_samples, nb_classes
    batch_size = 32
    epochs = 10
    img_width, img_height = 320, 180
    
    training_samples = len([name for name in glob.glob('data/training/*/*') if os.path.isfile(name)])
    validation_samples = len([name for name in glob.glob('data/validation/*/*') if os.path.isfile(name)])
    nb_classes = len([name for name in os.listdir('data/training') if os.path.isdir('data/training/{}'.format(name))])
    

def data():
    # Load definitions
    definitions()
    
    train_data_dir = 'data/training'
    validation_data_dir = 'data/validation'

    test_datagen = ImageDataGenerator(
        rescale=None)

    train_datagen = ImageDataGenerator(
        rescale=None)

    train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='categorical')

    validation_generator = test_datagen.flow_from_directory(
        validation_data_dir,
        target_size=(img_width, img_height),
        batch_size=batch_size,
        class_mode='categorical')

    return train_generator, validation_generator


## Main-Loop
This is the main loop, starting keras with the hyperparameter optimization.

In [None]:
best_run, best_model = optim.minimize(model=model,
                                          data=data,
                                          algo=tpe.suggest,
                                          max_evals=15,
                                          trials=Trials(),
                                          functions=[definitions],
                                          notebook_name='TwitchImageClassifier')
print(best_run)
print(best_model)

>>> Imports:
#coding=utf-8

try:
    import networkx
except:
    pass

try:
    import os
except:
    pass

try:
    import glob
except:
    pass

try:
    from hyperopt import Trials, STATUS_OK, tpe
except:
    pass

try:
    from hyperas import optim
except:
    pass

try:
    from hyperas.distributions import uniform
except:
    pass

try:
    from keras.models import Sequential
except:
    pass

try:
    from keras.layers.core import Dense, Dropout, Activation, Flatten
except:
    pass

try:
    from keras.layers import Conv2D, MaxPooling2D
except:
    pass

try:
    from keras.optimizers import SGD
except:
    pass

try:
    from keras.preprocessing.image import ImageDataGenerator
except:
    pass

try:
    from keras.datasets import cifar10
except:
    pass

try:
    from keras.utils import np_utils
except:
    pass

>>> Hyperas search space:

def get_space():
    return {
        'Dropout': hp.uniform('Dropout', 0, 1),
    }

>>> Functions
  1: def definitions():
  2:     global