In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D
from tensorflow.keras.layers import MaxPool2D, Flatten, Dense, Dropout
from tensorflow.keras.layers import concatenate
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Sequential
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd

from tensorflow import keras
from tensorflow.keras import layers

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

In [None]:
!git clone https://github.com/cyberspace-power/cs6313_yt_vid_classification.git

Cloning into 'cs6313_yt_vid_classification'...
remote: Enumerating objects: 15489, done.[K
remote: Counting objects: 100% (3821/3821), done.[K
remote: Compressing objects: 100% (3819/3819), done.[K
remote: Total 15489 (delta 4), reused 3814 (delta 1), pack-reused 11668[K
Receiving objects: 100% (15489/15489), 1.19 GiB | 27.05 MiB/s, done.
Resolving deltas: 100% (14/14), done.
Checking out files: 100% (15446/15446), done.


In [None]:
from tqdm import tqdm
image_filepath = 'cs6313_yt_vid_classification/dataset/category_wise_thumbnails/'

def read_image_data(categories = ['1', '2', '10', '17']):

    # image_data = []
    label_data = []
    title_data = []

    for x in categories:
      for img in tqdm(os.listdir(image_filepath+'category_'+x), desc = "loading"):

        image_path = os.path.join(image_filepath+'category_'+x+'/', img)
        label_data.append(x)
        title_data.append('category_'+x+'/'+img)
    
    df = pd.DataFrame({'Video Id':title_data, 'Category':label_data})
    return df

In [None]:
image_df = read_image_data()
image_df

loading: 100%|██████████| 3797/3797 [00:00<00:00, 364935.20it/s]
loading: 100%|██████████| 3797/3797 [00:00<00:00, 262455.05it/s]
loading: 100%|██████████| 3797/3797 [00:00<00:00, 279703.75it/s]
loading: 100%|██████████| 3797/3797 [00:00<00:00, 363369.82it/s]


Unnamed: 0,Video Id,Category
0,category_1/lxkG245Y8-M.jpg,1
1,category_1/lmqoGFXrINw.jpg,1
2,category_1/63pEpQ3nR0Q.jpg,1
3,category_1/QE6NLpi-pcU.jpg,1
4,category_1/udsWV3ZTXZ4.jpg,1
...,...,...
15183,category_17/7XH2OqODpCA.jpg,17
15184,category_17/Fwql_8ZaHO0.jpg,17
15185,category_17/QQAt8qy8Gpc.jpg,17
15186,category_17/iVavCU148rU.jpg,17


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

img_gen = ImageDataGenerator(rescale=1/255, validation_split=0.2)

img_iter = img_gen.flow_from_dataframe(
    image_df,
    shuffle=True,
    directory='cs6313_yt_vid_classification/dataset/category_wise_thumbnails/',
    x_col='Video Id',
    y_col='Category',
    class_mode='categorical',
    target_size=(360, 240),
    batch_size=20,
    subset='training'
)

img_iter_val = img_gen.flow_from_dataframe(
    image_df,
    shuffle=True,
    directory='cs6313_yt_vid_classification/dataset/category_wise_thumbnails/',
    x_col='Video Id',
    y_col='Category',
    class_mode='categorical',
    target_size=(360, 240),
    batch_size=200,
    subset='validation'
)

Found 12151 validated image filenames belonging to 4 classes.
Found 3037 validated image filenames belonging to 4 classes.


In [None]:
from tqdm import tqdm
text_filepath = 'cs6313_yt_vid_classification/dataset/category_wise_data/'

def read_text_data(categories = ['1', '2', '10', '17']):
  dfs = []
  for x in categories:
    df = pd.read_csv(text_filepath+'category_'+x+'.csv')
    df['Category'] = x
    dfs.append(df)
  text_df = pd.concat(dfs)
  return text_df

In [None]:
text_df = read_text_data()
text_df

Unnamed: 0,Video Id,Title,Description,Category
0,oV-bTfF09jo,Super Girls,My first amv on my new computer with a better ...,1
1,cdDkpbVwLPw,The Art of Flying,"Had to change the original soundtrack, because...",1
2,Udvj7HDeRcY,Kid vs. Kat Short - Run Coop Run,,1
3,inaFXNedo1g,Kwaku & Gloria; A Fairytale,Special thanks to every member of team Oneshot...,1
4,MZL2CSyLZ1c,Keep Holding On - Kagome and Sango,My ... 13th video 0.0 (bad number)\n\nMy frien...,1
...,...,...,...,...
3792,zyuvfwaIY8w,"'Lonnie B' on sparring in front of Floyd, Hopk...",HustleBoss.com catches up with light heavyweig...,17
3793,zyUxzZZUL6o,"Blake Paulson Fresno, CA",BMX racing 15 expert blake paulson,17
3794,ZzCDBLoRsAU,Ice pike,fishing for pike,17
3795,zzIPWNlexv4,James Harden Earns Second Triple-Double of Season,"James Harden records 31 points, 11 rebounds an...",17


In [None]:
text_df = text_df[['Title', 'Category']]
print(text_df.head())

                                Title Category
0                         Super Girls        1
1                   The Art of Flying        1
2    Kid vs. Kat Short - Run Coop Run        1
3         Kwaku & Gloria; A Fairytale        1
4  Keep Holding On - Kagome and Sango        1


In [None]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(text_df, test_size=0.2, random_state=42)
X_train = train['Title']
y_train = train['Category']

X_test = test['Title']
y_test = test['Category']

print(X_train.shape)
print(y_train.shape)

print(X_test.shape)
print(y_test.shape)

(12150,)
(12150,)
(3038,)
(3038,)


In [None]:
tokenizer = Tokenizer(num_words=20000)
tokenizer.fit_on_texts(text_df['Title'])
X_train= tokenizer.texts_to_sequences(X_train)
X_test= tokenizer.texts_to_sequences(X_test) 

In [None]:
maxlen=200
X_train=pad_sequences(X_train,padding='post', maxlen=maxlen)
X_test=pad_sequences(X_test,padding='post', maxlen=maxlen)

In [None]:
type(X_train)

numpy.ndarray

In [None]:
y_train

2054    10
1530     2
3476    10
1303     2
477     10
        ..
1394     2
2027    17
1593     2
860      1
3473     2
Name: Category, Length: 12150, dtype: object

In [None]:
image_input = Input(shape=(224, 224, 3))
text_input = Input(shape=(None,))
max_features = 20000

# the first branch operates on the first input

# resnet_base = tf.keras.applications.ResNet50(
#     include_top=False,
#     weights="imagenet",
#     input_tensor=None,
#     input_shape=(480, 360, 3), #changed from (224,224,3)
#     pooling='avg',
#     classes=4 #changed from 2 to 4
# )

base_model = tf.keras.applications.MobileNetV2(
    input_shape=(224, 224, 3),
    alpha=1.0,
    include_top=False,
    weights="imagenet",
    pooling='avg',
    classes=4
)

base_model.trainable = False

x = Sequential()
x.add(base_model)
x.add(Flatten())

# x = Flatten()(base_model.output)
# cnn_model = Model(image_input, x)

# the second branch opreates on the second input
inputs = keras.Input(shape=(None,), dtype="int32")
y = layers.Embedding(max_features, 128)(inputs)
y = layers.Bidirectional(layers.LSTM(64, return_sequences=True))(y)
y = layers.Bidirectional(layers.LSTM(64))(y)
y = layers.Flatten()(y)
y = keras.Model(inputs, y)

# combine the output of the two branches
combined = concatenate([x.output, y.output])

# apply a FC layer and then a regression prediction on the
# combined outputs
z = Dense(64, activation="relu")(combined)
# z = Dense(32, activation="relu")(z)
# z = Dense(4, activation="relu")(z)
# z = Dense(2, activation="relu")(z)
z = Dense(4, activation='softmax')(z)
# our model will accept the inputs of the two branches and
# then output a single value
model = Model(inputs=[x.input, y.input], outputs=z)

model.compile(loss="mean_absolute_percentage_error", optimizer=Adam(learning_rate=1e-5),metrics=['accuracy'])

In [None]:
type(img_iter), type(X_train)

(keras.preprocessing.image.DataFrameIterator, numpy.ndarray)

In [None]:
checkpoint = ModelCheckpoint("lstm_model.h5", monitor='val_accuracy', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1)
early = EarlyStopping(monitor='val_accuracy', min_delta=0, patience=20, verbose=1, mode='auto')
hist = model.fit([np.asarray(img_iter), X_train], [np.asarray(img_iter_val), X_test], epochs=20, batch_size=32, callbacks=[checkpoint, early])

	# x=[trainAttrX, trainImagesX], y=trainY,
	# validation_data=([testAttrX, testImagesX], testY),
	# epochs=200, batch_size=8)



In [None]:
plt.plot(hist.history['accuracy'])
plt.plot(hist.history['val_accuracy'])
plt.plot(hist.history["loss"])
plt.plot(hist.history['val_loss'])
plt.title("RNN (LSTM) Model Accuracy")
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.legend(["Accuracy","Validation Accuracy","Loss","Validation Loss"])
plt.show()
plt.savefig('rnn_lstm_acc.png')