<a href="https://colab.research.google.com/github/cbalkig/Anomaly_Detection_in_Videos/blob/master/train_multi_thread.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import multiprocessing as mp
import sys
from multiprocessing import Process
print(mp.cpu_count())

In [None]:
import os
import pandas as pd
import numpy as np
import time

In [None]:
working_directory = '/content/drive/MyDrive/AnomalyDetectionInVideos'

In [None]:
class Config:
    EDIT_DATASET_PATH = os.path.join(working_directory, "regenerated_files")
    BATCH_SIZE = 256
    EPOCHS = 10
    IMAGE_SIZE = 256
    THREAD_COUNT = 2
    LOG_FILE = os.path.join(working_directory, "log.txt")

In [None]:
#sys.stdout=open(Config.LOG_FILE,"w")
processes = {}

In [None]:
def show_batch(image_batch, label_batch):
    plt.figure(figsize=(10,10))
    for n in range(10):
        ax = plt.subplot(5,5,n+1)
        plt.imshow(image_batch[n, :, :, 0], cmap='gray')
        plt.axis('off')

In [None]:
def plot_acc_loss(trained):
    fig, ax = plt.subplots(1, 2, figsize=(15,5))
    ax[0].set_title('loss')
    ax[0].plot(trained.epoch, trained.history["loss"], label="Train loss")
    ax[1].set_title('acc')
    ax[1].plot(trained.epoch, trained.history["accuracy"], label="Train acc")
    ax[0].legend()
    ax[1].legend()

In [None]:
def train(id):
    import keras
    from keras.layers import Conv2D, Flatten, Dense, Dropout, MaxPooling2D
    from keras.models import Sequential, load_model
    from keras.preprocessing.image import ImageDataGenerator

    train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        rotation_range=30, 
        fill_mode='nearest',
        brightness_range=[0.4,1.5])

    train_generator = train_datagen.flow_from_dataframe(
            dataframe=dfs[id],  
            directory=Config.EDIT_DATASET_PATH,
            x_col="file_name",
            y_col="label",
            target_size=(Config.IMAGE_SIZE, Config.IMAGE_SIZE),
            batch_size=Config.BATCH_SIZE,
            shuffle=True,
            color_mode = 'grayscale',
            class_mode='binary')
    print("Sample Count after ImageDataGenerator", train_generator.samples)


    print("I will start process", id)

    input_shape = (Config.IMAGE_SIZE, Config.IMAGE_SIZE, 1)

    model = Sequential()

    model.add(Conv2D(32, kernel_size=3, activation="relu", input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(64, activation="relu"))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation="sigmoid"))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    data = train_generator
    print("Data is ready for process", id, model, data)

    history = model.fit(data, epochs=Config.EPOCHS, verbose=1)

    print("Model is ready for process", id)

    model_path = os.path.join(working_directory, "model_" + str(id) + ".hdf5")
    model.save(model_path)
    print("Model saved : ", model_path)

    print("I've finished process", id)

In [None]:
sample_count = sum(len(files) for _, _, files in os.walk(Config.EDIT_DATASET_PATH))
print("Sample Count", sample_count)

In [None]:
#Generate data frame as file_name, class_name (label)
image_array = []

count = 0
for f in sorted(os.listdir(Config.EDIT_DATASET_PATH)):
    directory_path = os.path.join(Config.EDIT_DATASET_PATH, f)
    if os.path.isdir(directory_path):
        class_name = f
        for v in sorted(os.listdir(directory_path)):
            file_name = os.path.join(directory_path, v)
            image_array.append([file_name, f])
            count = count + 1

df = pd.DataFrame(data=image_array, columns=['file_name', 'label'])
print(df.head())

In [None]:
slider = int(sample_count / Config.THREAD_COUNT)

sliders = []
for i in range(Config.THREAD_COUNT):
    sliders.append(i * slider)
sliders.append(sample_count)

print("Sliders", sliders)

df = df.iloc[np.random.permutation(len(df))]

dfs = []
for i in range(Config.THREAD_COUNT):
    dfs.append(df.iloc[sliders[i]:sliders[i+1] - 1])

In [None]:
start_time = time.time()
for i in range(Config.THREAD_COUNT):
    print('registering process %d' % i)
    proc = Process(target=train, args=(i,))
    proc.start()
    processes[i] = proc

while (True):
    finished_count = 0
    for id in processes.keys():
      # print("Process", id, " is alive:", processes[id].is_alive())
      if not processes[id].is_alive():
          finished_count = finished_count + 1
    
    if finished_count == Config.THREAD_COUNT:
        break
    else:
        time.sleep(5)

print("Train Execution time:", time.time() - start_time, "seconds.")