In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/CS670/train/video

Mounted at /content/drive
/content/drive/.shortcut-targets-by-id/1dH6WlN66KdzdchbpckYYuKtQspkSx-Th/CS670/train/video


In [None]:
import cv2
import numpy as np
import os
import tensorflow
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.models import Model

base_model = InceptionV3(weights='imagenet', include_top=False, pooling='avg')
model = Model(inputs=base_model.input, outputs=base_model.output)

def extract_features(frame):
    frame = cv2.resize(frame, (299, 299))  # Resize frame to match input size of InceptionV3
    frame = preprocess_input(frame)  # Preprocess frame
    frame = np.expand_dims(frame, axis=0)  # Add batch dimension
    features = model.predict(frame)  # Extract features using InceptionV3
    return features

def compute_average_frame_and_features(video_path):
    cap = cv2.VideoCapture(video_path)
    total_frame = np.zeros((299, 299, 3), dtype=np.float32)  # Initialize total frame

    # Iterate through video frames to compute total frame
    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        # Resize frame to a consistent size (e.g., 299x299)
        frame = cv2.resize(frame, (299, 299))

        # Convert frame to float32
        frame = frame.astype(np.float32)

        # Add frame to total frame
        total_frame += frame

        frame_count += 1
    cap.release()

    # Check if any frames were read
    if frame_count == 0:
        return None  # Return None if no frames were read

    # Compute average frame
    average_frame = (total_frame / frame_count).astype(np.uint8)

    # Extract features from the average frame
    average_frame_features = extract_features(average_frame)

    return average_frame_features


folder_path = "/content/drive/MyDrive/CS670/train/video/"
video_folders = os.listdir(folder_path)

labels={"advertisement":0,
        "drama":1,
        "entertainment": 2,
        "interview": 3,
        "live_broadcast": 4,
        "movie": 5,
        "play": 6,
        "recitation": 7,
        "singing": 8,
        "speech": 9,
        "vlog": 10}




for i, video_folder in enumerate(video_folders):
    folder_full_path = os.path.join(folder_path, video_folder)
    files = os.listdir(folder_full_path)
    feat=[]
    names=[]
    y=[]
    val=video_folder.split('_')[1]

    for j, name in enumerate(files):
        print(name)
        video_path = os.path.join(folder_full_path, name)

        video_features = compute_average_frame_and_features(video_path)
        if video_features is not None:
            feat.append(video_features)
            names.append(name)
            y.append(labels[name.split("-")[1]])
            print("Shape of video features array in "+video_folder+" :", video_features.shape)
        else:
            print("No frames read from video:", name)

    feat=np.array(feat)
    names=np.array(names)
    y=np.array(y)
    np.save('/content/drive/MyDrive/CS670_Project/video_features_'+str(val)+'.npy', feat)
    np.save('/content/drive/MyDrive/CS670_Project/labels_'+str(val)+'.npy', y)
    np.save('/content/drive/MyDrive/CS670_Project/names_'+str(val)+'.npy', names)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Shape of video features array in /content/drive/MyDrive/CS670/train/video/batch_46 : (1, 2048)
id30227-interview-02-034.mp4
Shape of video features array in /content/drive/MyDrive/CS670/train/video/batch_46 : (1, 2048)
id30227-interview-02-017.mp4
Shape of video features array in /content/drive/MyDrive/CS670/train/video/batch_46 : (1, 2048)
id30227-interview-02-005.mp4
Shape of video features array in /content/drive/MyDrive/CS670/train/video/batch_46 : (1, 2048)
id30227-interview-02-040.mp4
Shape of video features array in /content/drive/MyDrive/CS670/train/video/batch_46 : (1, 2048)
id30227-interview-01-093.mp4
Shape of video features array in /content/drive/MyDrive/CS670/train/video/batch_46 : (1, 2048)
id30227-interview-02-032.mp4
Shape of video features array in /content/drive/MyDrive/CS670/train/video/batch_46 : (1, 2048)
id30227-interview-02-031.mp4
Shape of video features array in /content/drive/MyDrive/CS670/train

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split
import numpy as np
import os
import cv2

path = "/content/drive/MyDrive/CS670_Project/video_frames/"

# Load your extracted features and labels
X = []  # Load extracted features
y = []
names=[]

# Dictionary to map labels to integers
labels = {"advertisement": 0, "drama": 1, "entertainment": 2, "interview": 3, "live_broadcast": 4, "movie": 5,
          "play": 6, "recitation": 7, "singing": 8, "speech": 9, "vlog": 10}

i,j=0,1
directories=os.listdir(path)
print(directories)
# Iterate over all videos in the directory
for name in directories:
    image_path = os.path.join(path, name)
    image = cv2.imread(image_path)
    if image is not None:
        X.append(image)
        y.append(labels[name.split("-")[1]])
        names.append(name)
        i+=1
        print(j, i, name)
    else:
        print(f"Unable to read image: {name}")
    if i == 5000:

        X = np.array(X)
        y = np.array(y)
        names = np.array(names)
        np.save('/content/drive/MyDrive/CS670_Project/frame_images'+str(j)+'.npy',X)
        np.save('/content/drive/MyDrive/CS670_Project/frame_labels'+str(j)+'.npy',y)
        np.save('/content/drive/MyDrive/CS670_Project/frame_name'+str(j)+'.npy',names)
        X = []  # Load extracted features
        y = []
        names=[]
        j+=1
        i=0


# Convert lists to numpy arrays
X = np.array(X)
y = np.array(y)
names = np.array(names)

# Split the data into training and testing sets
np.save('/content/drive/MyDrive/CS670_Project/frame_images'+str(j)+'.npy',X)
np.save('/content/drive/MyDrive/CS670_Project/frame_labels'+str(j)+'.npy',y)
np.save('/content/drive/MyDrive/CS670_Project/frame_name'+str(j)+'.npy',names)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
7 4968 id30159-vlog-06-039.jpg
7 4969 id30159-vlog-06-054.jpg
7 4970 id30159-vlog-06-123.jpg
7 4971 id30159-vlog-07-017.jpg
7 4972 id30161-entertainment-01-008.jpg
7 4973 id30161-entertainment-02-021.jpg
7 4974 id30161-entertainment-02-013.jpg
7 4975 id30161-entertainment-03-005.jpg
7 4976 id30161-entertainment-04-002.jpg
7 4977 id30161-entertainment-04-046.jpg
7 4978 id30161-entertainment-04-063.jpg
7 4979 id30161-entertainment-04-080.jpg
7 4980 id30161-entertainment-04-078.jpg
7 4981 id30161-play-01-004.jpg
7 4982 id30161-play-01-022.jpg
7 4983 id30161-play-01-034.jpg
7 4984 id30161-play-01-052.jpg
7 4985 id30161-play-01-096.jpg
7 4986 id30161-play-01-114.jpg
7 4987 id30161-play-01-130.jpg
7 4988 id30161-play-01-137.jpg
7 4989 id30161-play-01-139.jpg
7 4990 id30161-speech-01-028.jpg
7 4991 id30161-speech-01-040.jpg
7 4992 id30161-speech-01-059.jpg
7 4993 id30161-speech-02-079.jpg
7 4994 id30161-speech-02-086.jpg
7 4995 

In [None]:
import cv2
import numpy as np
import os

def average_frame(video_path):
    cap = cv2.VideoCapture(video_path)
    total_sum = None
    frame_count = 0

    ret, frame = cap.read()
    while ret:
        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        if total_sum is None:
            total_sum = np.zeros_like(gray_frame, dtype=np.float64)
        total_sum += gray_frame.astype(np.float64)
        frame_count += 1
        ret, frame = cap.read()

    cap.release()

    if total_sum is not None and frame_count > 0:
        return (total_sum / frame_count).astype(np.uint8)
    else:
        print(f"No frames found in video: {video_path}")
        return None


def save_image(image_array, output_path):
    cv2.imwrite(output_path, image_array)

def process_videos(directory, batch_size, output_path):
    labels = {"advertisement": 0, "drama": 1, "entertainment": 2, "interview": 3, "live_broadcast": 4,
              "movie": 5, "play": 6, "recitation": 7, "singing": 8, "speech": 9, "vlog": 10}
    X, y, names = [], [], []
    batch_index = 1

    for dirpath, dirnames, filenames in os.walk(directory):
        for filename in filenames:
            video_path = os.path.join(dirpath, filename)
            output_name = os.path.join(output_path, filename.split('.')[0] + '.jpg')

            if os.path.exists(output_name):
                print(f"File {output_name} exists. Skipping.")
                continue

            print(f"Processing video: {video_path}")
            average_frame_image = average_frame(video_path)
            if average_frame_image is not None:
                average_frame_image=cv2.resize(average_frame_image, (299, 299))
                X.append(average_frame_image)
                y.append(labels[filename.split("-")[1]])
                names.append(filename)
                save_image(average_frame_image, output_name)

            if len(X) == batch_size:
                save_batch(X, y, names, batch_index)
                batch_index += 1
                X, y, names = [], [], []

    if X:  # Process remaining frames in the last batch
        save_batch(X, y, names, batch_index)

def save_batch(X, y, names, batch_index):
    X = np.array(X)
    y = np.array(y)
    names = np.array(names)
    np.save(f'/content/drive/MyDrive/CS670_Project/frame_images{batch_index}.npy', X)
    np.save(f'/content/drive/MyDrive/CS670_Project/frame_labels{batch_index}.npy', y)
    np.save(f'/content/drive/MyDrive/CS670_Project/frame_name{batch_index}.npy', names)

# Example usage
process_videos('/content/drive/MyDrive/CS670/train/video', batch_size=5000, output_path='/content/drive/MyDrive/CS670_Project/video_frames')


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Processing video: /content/drive/MyDrive/CS670/train/video/batch_19/id30073-interview-01-114.mp4
Processing video: /content/drive/MyDrive/CS670/train/video/batch_19/id30073-interview-01-096.mp4
Processing video: /content/drive/MyDrive/CS670/train/video/batch_19/id30073-interview-01-095.mp4
Processing video: /content/drive/MyDrive/CS670/train/video/batch_19/id30073-interview-01-245.mp4
Processing video: /content/drive/MyDrive/CS670/train/video/batch_19/id30073-speech-01-011.mp4
Processing video: /content/drive/MyDrive/CS670/train/video/batch_19/id30073-speech-01-136.mp4
Processing video: /content/drive/MyDrive/CS670/train/video/batch_19/id30073-speech-02-038.mp4
Processing video: /content/drive/MyDrive/CS670/train/video/batch_19/id30073-speech-02-086.mp4
Processing video: /content/drive/MyDrive/CS670/train/video/batch_19/id30073-speech-02-129.mp4
Processing video: /content/drive/MyDrive/CS670/train/video/batch_19/id30073-s

KeyboardInterrupt: 

In [None]:
X_new=np.array(X_new)
X_new.shape

(0,)

In [None]:
X

[]

In [None]:
import numpy as np
import os

X = []

video_features_path = '/content/drive/MyDrive/CS670_Project/video_features/'

for filename in os.listdir(video_features_path):
    if filename.endswith(".npy"):
        filepath = os.path.join(video_features_path, filename)
        data = np.load(filepath)
        data = np.squeeze(data)  # Remove singleton dimension
        n_samples_i, n_features = data.shape

        # Iterate through each feature and append it to X
        for i in range(n_samples_i):
            X.append(data[i, :])  # Append the i-th feature to X

X = np.array(X)

# Now X is an array with shape (n_samples_total, 2048)


In [None]:
y=[]

video_labels_path='/content/drive/MyDrive/CS670_Project/video_labels/'

for filename in os.listdir(video_labels_path):
    if filename.endswith(".npy"):
        filepath = os.path.join(video_labels_path, filename)
        data = np.load(filepath)
        for i in range(data.shape[0]):
            y.append(data[i])

y=np.array(y)

In [None]:
np.save('/content/drive/MyDrive/CS670_Project/video_features.npy', X)
np.save('/content/drive/MyDrive/CS670_Project/video_labels.npy', y)

In [None]:
import numpy as np
import os

names=[]

video_names_path='/content/drive/MyDrive/CS670_Project/video_names/'

for filename in os.listdir(video_names_path):
    if filename.endswith(".npy"):
        filepath = os.path.join(video_names_path, filename)
        data = np.load(filepath)
        for i in range(data.shape[0]):
            names.append(data[i])

names=np.array(names)
np.save('/content/drive/MyDrive/CS670_Project/video_names.npy', names)

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split
import numpy as np
import os
import cv2

path = "/content/drive/MyDrive/CS670_Project/video_frames/"

# Load your extracted features and labels
X = []  # Load extracted features
y = []

# Dictionary to map labels to integers
labels = {"advertisement": 0, "drama": 1, "entertainment": 2, "interview": 3, "live_broadcast": 4, "movie": 5,
          "play": 6, "recitation": 7, "singing": 8, "speech": 9, "vlog": 10}

# Iterate over all videos in the directory
for name in os.listdir(path):
    image_path = os.path.join(path, name)
    image = cv2.imread(image_path)
    print(name, image.shape)
    if image is not None:
        X.append(image)
        y.append(labels[name.split("-")[1]])
    else:
        print(f"Unable to read image: {name}")

# Convert lists to numpy arrays
X = np.array(X)
y = np.array(y)
np.save('/content/drive/MyDrive/CS670_Project/images.npy',X)
np.save('/content/drive/MyDrive/CS670_Project/image_labels.npy',y)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
id30140-speech-02-039.jpg (299, 299, 3)
id30146-live_broadcast-01-044.jpg (299, 299, 3)
id30146-live_broadcast-01-062.jpg (299, 299, 3)
id30146-live_broadcast-02-065.jpg (299, 299, 3)
id30146-live_broadcast-03-025.jpg (299, 299, 3)
id30146-live_broadcast-03-018.jpg (299, 299, 3)
id30146-live_broadcast-03-064.jpg (299, 299, 3)
id30146-live_broadcast-03-080.jpg (299, 299, 3)
id30146-live_broadcast-04-022.jpg (299, 299, 3)
id30146-live_broadcast-04-023.jpg (299, 299, 3)
id30146-live_broadcast-04-027.jpg (299, 299, 3)
id30146-live_broadcast-05-004.jpg (299, 299, 3)
id30146-live_broadcast-05-024.jpg (299, 299, 3)
id30146-live_broadcast-05-052.jpg (299, 299, 3)
id30146-live_broadcast-05-065.jpg (299, 299, 3)
id30146-live_broadcast-05-066.jpg (299, 299, 3)
id30146-live_broadcast-07-022.jpg (299, 299, 3)
id30146-live_broadcast-07-028.jpg (299, 299, 3)
id30146-live_broadcast-07-076.jpg (299, 299, 3)
id30146-live_broadcast-08-081.j

KeyboardInterrupt: 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
path='/content/drive/MyDrive/CS670_Project/video_frames'
count=0
for file in os.listdir(path):
  count=count+1
print(count)

39967


In [None]:
!ls /content/drive/MyDrive/CS670/train/video/* | wc -l

133738


In [None]:
path='/content/drive/MyDrive/CS670'
count=0
for folder in os.listdir(path):
  for file in os.listdir(folder):

    count=count+1
print(count)

FileNotFoundError: [Errno 2] No such file or directory: 'train'