In [34]:
!pip install opencv-python



In [35]:
import cv2
import os
import numpy as np
import keras
import matplotlib.pyplot as plt
from tensorflow.keras.applications import VGG16
from keras import backend as K
from keras.models import Model, Sequential, load_model
from keras.layers import Input, Dense, Activation, LSTM
import h5py

In [36]:
DIR_TR = r"C:\Users\wicm\train.csv"
DIR_TE = r"C:\Users\wicm\test.csv"
DIR_VA = r"C:\Users\wicm\validation.csv"

# Frame size  
IMG_SIZE = 224
IMG_SIZE_TUPLE = (IMG_SIZE, IMG_SIZE)

# Number of channels (RGB)
NUM_CHANNELS = 3
# Flat frame size
IMG_SIZE_FLAT = IMG_SIZE**2 * NUM_CHANNELS

# Number of sub-classes for classification
NUM_CLASSES = 12
CLASSES_LIST = ['Healthy Lifestyle and Weight Loss', 'Running', 'Weight Lifting', 'Yoga', 'Makeup', 'Haircare', 'Outfit', 'Skincare', 'Accommodation', 'Adventure', 'Culture', 'Food and drink']
assert len(CLASSES_LIST) == NUM_CLASSES

SEQUENCE_LENGTH = 20

In [37]:
image_model = VGG16(include_top=True, weights='imagenet')
image_model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [38]:
transfer_layer = image_model.get_layer('fc2')
image_model_transfer = Model(inputs=image_model.input,
                             outputs=transfer_layer.output)

In [39]:
image_model_transfer.summary()

Model: "model_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_4 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0   

In [40]:
def get_frames(vid_path):
    cap = cv2.VideoCapture(vid_path)
    total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    split_size = int(min(3, max(total_frames // SEQUENCE_LENGTH, 1)))
    chunk_size = total_frames / split_size
    skip_frame = max(1, chunk_size // SEQUENCE_LENGTH)
    
    for c in range(split_size):
        initial_frame = SEQUENCE_LENGTH * c * skip_frame
        
        frames_list = []
        for i in range(SEQUENCE_LENGTH):
            cap.set(cv2.CAP_PROP_POS_FRAMES, (i * skip_frame) + initial_frame)
            ret, frame = cap.read()
            if not ret:
                break
            
            RGB_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                
            res = cv2.resize(RGB_img, 
                             dsize=IMG_SIZE_TUPLE,
                             interpolation=cv2.INTER_CUBIC)
            
            frames_list.append(res)

            
        if len(frames_list) != SEQUENCE_LENGTH:
            continue
        
        result = np.array(frames_list)
        result = (result / 255).astype(np.float16)
        
        yield result

In [41]:
def extracted_features(vid_path):
    gen = get_frames(vid_path)
    for chunk in gen:
        shape = (SEQUENCE_LENGTH, 4096)
        transfer_values = np.zeros(shape=shape, dtype=np.float16)
        transfer_values = image_model_transfer.predict(chunk)
        
        yield transfer_values

In [42]:
def create_dataset(dataset_dir):
    class_dataset_dir = os.listdir(dataset_dir)
    features = []
    labels = []
    count = 0
    
    for dir in class_dataset_dir:
        videos_list = os.listdir(os.path.join(dataset_dir, dir))
        
        for vid in videos_list:
            gen_path = os.path.join(dataset_dir, dir, vid)
            gen = extracted_features(gen_path)
            for chunk in gen:
                features.append(chunk)
                if dir == CLASSES_LIST[0]:
                    labels.append([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
                elif dir == CLASSES_LIST[1]:
                    labels.append([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
                elif dir == CLASSES_LIST[2]:
                    labels.append([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])
                elif dir == CLASSES_LIST[3]:
                    labels.append([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])
                elif dir == CLASSES_LIST[4]:
                    labels.append([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])
                elif dir == CLASSES_LIST[5]:
                    labels.append([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0])
                elif dir == CLASSES_LIST[6]:
                    labels.append([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0])
                elif dir == CLASSES_LIST[7]:
                    labels.append([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])
                elif dir == CLASSES_LIST[8]:
                    labels.append([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0])
                elif dir == CLASSES_LIST[9]:
                    labels.append([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])
                elif dir == CLASSES_LIST[10]:
                    labels.append([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])
                elif dir == CLASSES_LIST[11]:
                    labels.append([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])
            count += 1
            print(gen_path, count, len(videos_list))
    
    return features, labels

In [43]:
features_tr, labels_tr = create_dataset(DIR_TR)
features_te, labels_te = create_dataset(DIR_TE)
features_va, labels_va = create_dataset(DIR_VA)

NotADirectoryError: [WinError 267] The directory name is invalid: 'C:\\Users\\wicm\\train.csv'

In [None]:
print("Train features", len(X_tr))
print("Test features", len(X_te))
print("Validation features", len(X_va))

In [None]:
#import torch
#import pandas as pd

In [None]:
#dataset_path = r"C:\Users\wicm\dataset.csv"

In [None]:
#df = pd.read_csv(dataset_path)
#print(df.head(100))

In [None]:
import cv2

SEQUENCE_LENGTH = 20
def get_frames(dataset_path):
    cap = cv2.VideoCapture(dataset_path)
    total_frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    split_size = int(min(3, max(total_frames // SEQUENCE_LENGTH, 1)))
    chunk_size = total_frames / split_size
    skip_frame = max(1, chunk_size // SEQUENCE_LENGTH)
    
    for c in range(split_size):
        initial_frame = SEQUENCE_LENGTH * c * skip_frame
        
        frames_list = []
        for i in range(SEQUENCE_LENGTH):
            cap.set(cv2.CAP_PROP_POS_FRAMES, (i * skip_frame) + initial_frame)
            ret, frame = cap.read()
            if not ret:
                break
            
            RGB_img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                
            res = cv2.resize(RGB_img, 
                             dsize=IMG_SIZE_TUPLE,
                             interpolation=cv2.INTER_CUBIC)
            
            frames_list.append(res)

        if len(frames_list) != SEQUENCE_LENGTH:
            continue
        
        result = np.array(frames_list)
        result = (result / 255).astype(np.float16)
        
        yield result

In [None]:
def extracted_features(dataset_path):
    gen = get_frames(dataset_path)
    for chunk in gen:
        shape = (SEQUENCE_LENGTH, 4096)
        transfer_values = np.zeros(shape=shape, dtype=np.float16)
        transfer_values = image_model_transfer.predict(chunk)
        
        yield transfer_values