#**Pre-processing the Data**

##Importing the necessary libraries and connecting to Google Drive

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import cv2
import os
import zipfile

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
local_data_folder = "/content"
os.makedirs(local_data_folder, exist_ok=True)

##Extracting the **RAVDESS** dataset in Colab

In [4]:
data_zip_file = "/content/drive/MyDrive/data.zip"

with zipfile.ZipFile(data_zip_file, 'r') as zip_ref:
    zip_ref.extractall(local_data_folder)

##Converting each file into Frames with target as 10 Frames per second

In [None]:
def extract_frames(video_path, output_folder, target_fps=20):
    # Open the video file
    cap = cv2.VideoCapture(video_path)
    
    # Get the original frames per second (fps) of the video
    video_fps = int(cap.get(cv2.CAP_PROP_FPS))
    
    # Get the total frame count of the video
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # Calculate the frame interval based on the target fps
    every_nth_frame = video_fps // target_fps

    # Create the output folder if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Initialize frame counters
    frame_number = 0
    extracted_frames = 0
    
    # Iterate through each frame in the video
    while cap.isOpened():
        # Read the next frame
        ret, frame = cap.read()
        
        # Break the loop if the frame cannot be read
        if not ret:
            break

        # Extract frames at the specified interval
        if frame_number % every_nth_frame == 0:
            # Save the extracted frame as an image file
            frame_file = os.path.join(output_folder, f"frame{extracted_frames:04d}.png")
            cv2.imwrite(frame_file, frame)
            
            # Increment the frame counter
            extracted_frames += 1

        # Increment the frame number
        frame_number += 1

    # Release the video capture object
    cap.release()

# Set the paths for the input data folder and the output base folder
data_folder = "/content/data"
output_base_folder = "/content/Output_RAVDESS"

# Iterate through each actor
for actor in range(1, 25):
    # Generate the actor folder name
    actor_folder = f"Actor_{actor:02d}"
    
    # Define the paths for the actor's data folder and output folder
    actor_data_folder = os.path.join(data_folder, actor_folder)
    actor_output_folder = os.path.join(output_base_folder, actor_folder)

    # Get the list of files in the actor's data folder
    file_list = os.listdir(actor_data_folder)

    # Iterate through each file in the actor's data folder
    for file in file_list:
        # Process only video files with the .mp4 extension
        if file.endswith(".mp4"):
            # Define the path for the input video file
            video_path = os.path.join(actor_data_folder, file)
            
            # Define the output folder for the extracted frames
            video_output_folder = os.path.join(actor_output_folder, os.path.splitext(file)[0])
            
            # Extract frames from the video and save them in the output folder
            extract_frames(video_path, video_output_folder, target_fps=10)


saving the output file to drive for future references

In [8]:
!zip -r /content/drive/MyDrive/Output.zip /content/Output_RAVDESS

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: content/Output_RAVDESS/Actor_23/02-01-05-01-01-02-23/frame0036.png (deflated 11%)
  adding: content/Output_RAVDESS/Actor_23/02-01-05-01-01-02-23/frame0041.png (deflated 11%)
  adding: content/Output_RAVDESS/Actor_23/02-01-05-01-01-02-23/frame0030.png (deflated 12%)
  adding: content/Output_RAVDESS/Actor_23/02-01-05-01-01-02-23/frame0010.png (deflated 10%)
  adding: content/Output_RAVDESS/Actor_23/02-01-05-01-01-02-23/frame0021.png (deflated 11%)
  adding: content/Output_RAVDESS/Actor_23/02-01-05-01-01-02-23/frame0048.png (deflated 11%)
  adding: content/Output_RAVDESS/Actor_23/02-01-05-01-01-02-23/frame0027.png (deflated 10%)
  adding: content/Output_RAVDESS/Actor_23/02-01-05-01-01-02-23/frame0000.png (deflated 13%)
  adding: content/Output_RAVDESS/Actor_23/02-01-05-01-01-02-23/frame0009.png (deflated 11%)
  adding: content/Output_RAVDESS/Actor_23/02-01-05-01-01-02-23/frame0032.png (deflated 11%)
  adding: conte

##Resizing each frame to (224,224) as required for VGG model

In [25]:
import cv2
import os

input_dir = "Output_RAVDESS"
output_dir = "Preprocessed_Frames"

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Iterate through the folders in the input directory
for actor_folder in os.listdir(input_dir):
    actor_path = os.path.join(input_dir, actor_folder)

    # Iterate through the videos in the actor's folder
    for video in os.listdir(actor_path):
        video_path = os.path.join(actor_path, video)

        # Iterate through the frames in the video's folder
        for frame in os.listdir(video_path):
          frame_path = os.path.join(video_path, frame)

          # Load the image using OpenCV
          image = cv2.imread(frame_path)

          # Check if the image is loaded successfully
          if image is not None:
              # Apply preprocessing steps to the image
              # For example, resize the image to a specific size
              resized_image = cv2.resize(image, (224, 224))  # Adjust the size as needed

              # Save the preprocessed image to the output directory
              output_path = os.path.join(output_dir, actor_folder,video, frame)
              os.makedirs(os.path.dirname(output_path), exist_ok=True)
              cv2.imwrite(output_path, resized_image)
          else:
              print("Error loading image:", frame_path)
        

##Processing the input using Tensorflow and making the Data ready for VGG16 model

In [27]:
from tensorflow.keras.applications.vgg16 import preprocess_input

input_dir = "Preprocessed_Frames"
output_dir = "Preprocessed_Frames_VGG"

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Define batch size for processing multiple frames at once
batch_size = 16

# Initialize empty lists for frames and corresponding paths
frames_list = []
frame_paths_list = []

# Iterate through the folders in the input directory
for actor_folder in os.listdir(input_dir):
    actor_path = os.path.join(input_dir, actor_folder)
    
    # Create the corresponding actor folder in the output directory
    output_actor_folder = os.path.join(output_dir, actor_folder)
    os.makedirs(output_actor_folder, exist_ok=True)
    
    # Iterate through the videos in the actor's folder
    for subfolder in os.listdir(actor_path):
        subfolder_path = os.path.join(actor_path, subfolder)
        
        # Create the corresponding subfolder in the output directory
        output_subfolder = os.path.join(output_actor_folder, subfolder)
        os.makedirs(output_subfolder, exist_ok=True)
        
        # Iterate through the frames in each subfolder
        for frame in os.listdir(subfolder_path):
            frame_path = os.path.join(subfolder_path, frame)
            
            # Append frame and frame path to the respective lists
            frames_list.append(cv2.imread(frame_path))
            frame_paths_list.append(frame_path)
            
            # Perform batch processing when the number of frames reaches the batch size
            if len(frames_list) == batch_size:
                # Convert frames list to a NumPy array
                frames_array = np.array(frames_list)
                
                # Preprocess the frames
                preprocessed_frames = preprocess_input(frames_array)
                
                # Save the preprocessed frames to the output directory
                for i, output_frame in enumerate(preprocessed_frames):
                    output_path = os.path.join(output_subfolder, os.path.basename(frame_paths_list[i]))
                    np.save(output_path, output_frame)
                
                # Clear the lists for the next batch
                frames_list.clear()
                frame_paths_list.clear()
        
        # Process the remaining frames that are less than the batch size
        if frames_list:
            # Convert frames list to a NumPy array
            frames_array = np.array(frames_list)
            
            # Preprocess the frames
            preprocessed_frames = preprocess_input(frames_array)
            
            # Save the preprocessed frames to the output directory
            for i, output_frame in enumerate(preprocessed_frames):
                output_path = os.path.join(output_subfolder, os.path.basename(frame_paths_list[i]))
                np.save(output_path, output_frame)
            
            # Clear the lists after processing
            frames_list.clear()
            frame_paths_list.clear()

# Process any remaining frames in the lists after iterating through all subfolders
if frames_list:
    # Convert frames list to a NumPy array
    frames_array = np.array(frames_list)
    
    # Preprocess the frames
    preprocessed_frames = preprocess_input(frames_array)
    
    # Save the preprocessed frames to the output directory
    for i, output_frame in enumerate(preprocessed_frames):
        output_path = os.path.join(output_subfolder, os.path.basename(frame_paths_list[i]))
        np.save(output_path, output_frame)
