In [None]:
## Code to convert mpg to mp4 form

from IPython.display import Video

video_path = "/content/umcp.mpg"
Video(video_path, embed=True)


# **Part 1: Video Summarisation**

In [1]:
import cv2
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
## For Video summarisation few steps needs to be done as mentioend below

## 1. extract_frames
## 2. preprocess_frames
## 3. compute_motion
## 4. apply_pca
## 5. select_key_frames
## 6. segment_and_summarize

# Step 1: Extract frames from the video
def extract_frames(video_path, frame_interval=1):
    cap = cv2.VideoCapture(video_path)
    frames = []
    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if frame_count % frame_interval == 0:
            frames.append(frame)
        frame_count += 1
    cap.release()
    return frames

# Step 2: Preprocess frames (to resize and convert to grayscale)
def preprocess_frames(frames, size=(128, 128)):
    processed_frames = []
    for frame in frames:
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        resized = cv2.resize(gray, size)
        processed_frames.append(resized)
    return processed_frames

# Step 3: Compute motion between consecutive frames
def compute_motion(frames):
    motion = []
    for i in range(1, len(frames)):
        diff = cv2.absdiff(frames[i], frames[i-1])
        motion.append(diff)
    return motion

# Step 4: Apply PCA to identify key frames
def apply_pca(input_motion, n_components=10):
    motion_array = np.array(input_motion).reshape(len(input_motion), -1)
    pca = PCA(n_components=n_components)
    pca.fit(motion_array)
    return pca

# Step 5: Select key frames based on PCA
def select_key_frames(frames, pca, n_key_frames=10):
    motion_array = np.array(frames).reshape(len(frames), -1)
    transformed = pca.transform(motion_array)
    key_frame_indices = np.argsort(-transformed.sum(axis=1))[:n_key_frames]
    key_frames = [frames[i] for i in key_frame_indices]
    return key_frames

# Step 6: Summarize the video by stitching key frames
def summarize_video(key_frames, output_path="summarized_video.mp4"):
    height, width = key_frames[0].shape
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')   # FourCC is a 4-byte code used to specify the video codec
    out = cv2.VideoWriter(output_path, fourcc, 1, (width, height), isColor=False)
    for frame in key_frames:
        out.write(frame)
    out.release()

# Step 7: Segment the video and extract key frames from each segment
def segment_and_summarize(video_path, n_segments=10, n_key_frames_per_segment=2):
    # Step 1: Extract frames
    frames = extract_frames(video_path)

    # Step 2: Preprocess frames
    processed_frames = preprocess_frames(frames)

    # Step 3: Divide frames into segments
    segment_size = len(processed_frames) // n_segments
    all_key_frames = []

    for i in range(n_segments):
        start = i * segment_size
        end = (i + 1) * segment_size if i < n_segments - 1 else len(processed_frames)
        segment_frames = processed_frames[start:end]

        # Step 4: Compute motion for the segment
        motion = compute_motion(segment_frames)

        # Step 5: Apply PCA to the segment
        pca = apply_pca(motion)

        ## to check variance
        # explained_variance = np.cumsum(pca.explained_variance_ratio_)
        # plt.plot(explained_variance)
        # plt.xlabel('Number of Components')
        # plt.ylabel('Cumulative Explained Variance')
        # plt.show()


        # Step 6: Select key frames for the segment
        key_frames = select_key_frames(segment_frames, pca, n_key_frames=n_key_frames_per_segment)
        all_key_frames.extend(key_frames)

    # Step 7: Summarize video using all key frames
    summarize_video(all_key_frames)
    print(f"Summarized video is saved as 'summarized_video.mp4' with {len(all_key_frames)} key frames.")

# Run the code
video_path = "/content/video.mp4"
segment_and_summarize(video_path, n_segments=10, n_key_frames_per_segment=2)

## Experimentation with different number of frames
# segment_and_summarize(video_path, n_segments=10, n_key_frames_per_segment=3)
# segment_and_summarize(video_path, n_segments=10, n_key_frames_per_segment=5)
# segment_and_summarize(video_path, n_segments=10, n_key_frames_per_segment=7)
# segment_and_summarize(video_path, n_segments=10, n_key_frames_per_segment=9)

Summarized video is saved as 'summarized_video.mp4' with 20 key frames.


## **Below is the implementation of the PCA from scratch**

In [None]:
# Let's Implement PCA from Scratch
class LocalPCA:
    def __init__(self, n_components):
        self.n_components = n_components
        self.mean = None
        self.components = None

    ## fit function
    def fit(self, X):
        # Step 1: We first need to center the data
        self.mean = np.mean(X, axis=0)
        X_centered = X - self.mean

        # Step 2: Then we need to compute covariance matrix
        covariance_matrix = np.cov(X_centered, rowvar=False)

        # Step 3: We need to compute eigenvalues and eigenvectors
        eigenvalues, eigenvectors = np.linalg.eig(covariance_matrix)

        # Step 4: After that we will have to sort eigenvectors by eigenvalues in descending order
        sorted_indices = np.argsort(-eigenvalues)
        self.components = eigenvectors[:, sorted_indices[:self.n_components]]

    ## transform function
    def transform(self, X):
        X_centered = X - self.mean
        return np.dot(X_centered, self.components)

    ## fit transform function
    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)


In [None]:
## Please uncomment the code to run the PCALocal implementation


# import cv2
# import numpy as np
# from sklearn.decomposition import PCA
# import matplotlib.pyplot as plt

# # Step 1: Extract frames from the video
# def extract_frames(video_path, frame_interval=1):
#     cap = cv2.VideoCapture(video_path)
#     frames = []
#     frame_count = 0
#     while True:
#         ret, frame = cap.read()
#         if not ret:
#             break
#         if frame_count % frame_interval == 0:
#             frames.append(frame)
#         frame_count += 1
#     cap.release()
#     return frames

# # Step 2: Preprocess frames (resize and convert to grayscale)
# def preprocess_frames(frames, size=(128, 128)):
#     processed_frames = []
#     for frame in frames:
#         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
#         resized = cv2.resize(gray, size)
#         processed_frames.append(resized)
#     return processed_frames

# # Step 3: Compute motion between consecutive frames
# def compute_motion(frames):
#     motion = []
#     for i in range(1, len(frames)):
#         diff = cv2.absdiff(frames[i], frames[i-1])
#         motion.append(diff)
#     return motion

# # Step 4: Apply PCA to identify key frames
# # def apply_pca(motion, n_components=10):
# #     motion_array = np.array(motion).reshape(len(motion), -1)
# #     pca = PCA(n_components=n_components)
# #     pca.fit(motion_array)
# #     return pca

# def apply_pca_scratch(motion, n_components=10):
#     motion_array = np.array(motion).reshape(len(motion), -1)
#     pca = LocalPCA(n_components=n_components)
#     transformed_data = pca.fit_transform(motion_array)
#     return pca, transformed_data

# # Step 5: Select key frames based on PCA
# def select_key_frames(frames, pca, n_key_frames=10):
#     motion_array = np.array(frames).reshape(len(frames), -1)
#     transformed = pca.transform(motion_array)
#     key_frame_indices = np.argsort(-transformed.sum(axis=1))[:n_key_frames]
#     key_frames = [frames[i] for i in key_frame_indices]
#     return key_frames

# # Step 6: Summarize the video by stitching key frames
# def summarize_video(key_frames, output_path="summarized_video.mp4"):
#     height, width = key_frames[0].shape
#     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
#     out = cv2.VideoWriter(output_path, fourcc, 1, (width, height), isColor=False)
#     for frame in key_frames:
#         out.write(frame)
#     out.release()

# # Step 7: Segment the video and extract key frames from each segment
# def segment_and_summarize(video_path, n_segments=10, n_key_frames_per_segment=2):
#     # Step 1: Extract frames
#     frames = extract_frames(video_path)

#     # Step 2: Preprocess frames
#     processed_frames = preprocess_frames(frames)

#     # Step 3: Divide frames into segments
#     segment_size = len(processed_frames) // n_segments
#     all_key_frames = []

#     for i in range(n_segments):
#         start = i * segment_size
#         end = (i + 1) * segment_size if i < n_segments - 1 else len(processed_frames)
#         segment_frames = processed_frames[start:end]

#         # Step 4: Compute motion for the segment
#         motion = compute_motion(segment_frames)

#         # Step 5: Apply PCA to the segment
#         pca = apply_pca_scratch(motion)

#         # Step 6: Select key frames for the segment
#         key_frames = select_key_frames(segment_frames, pca, n_key_frames=n_key_frames_per_segment)
#         all_key_frames.extend(key_frames)

#     # Step 7: Summarize video using all key frames
#     summarize_video(all_key_frames)
#     print(f"Summarized video saved as 'summarized_video.mp4' with {len(all_key_frames)} key frames.")

# # Run the code
# video_path = "/content/video.mp4"  # Replace with your video path
# segment_and_summarize(video_path, n_segments=10, n_key_frames_per_segment=2)

#**Part2**

In [2]:
import numpy as np
import cv2
import os
from datetime import datetime


## Defining GaussianMixture
class GaussianMixture:
    def __init__(self, n_components=3, learning_rate=0.01, threshold=0.7, var_threshold=16):   ## All the default parameters here can be tuned, but in my case these are working optimum
        self.n_components = n_components
        self.learning_rate = learning_rate
        self.threshold = threshold
        self.var_threshold = var_threshold
        self.weights = None
        self.means = None
        self.covars = None
        self.initialized = False

    def initialize(self, first_frame):
        height, width = first_frame.shape[:2]
        self.weights = np.ones((height, width, self.n_components)) / self.n_components
        self.means = np.zeros((height, width, self.n_components))
        self.covars = np.ones((height, width, self.n_components)) * self.var_threshold
        self.means[:,:,0] = first_frame
        self.initialized = True

    def gaussian_prob(self, x, mean, covar):
        diff = x - mean
        return np.exp(-diff * diff / (2 * covar)) / np.sqrt(2 * np.pi * covar)

    def update(self, frame):
        if not self.initialized:
            self.initialize(frame)
            return np.ones_like(frame, dtype=bool)

        height, width = frame.shape[:2]
        matches = np.zeros((height, width, self.n_components), dtype=bool)

        # Calculating match for each component
        for i in range(self.n_components):
            diff = np.abs(frame - self.means[:,:,i])
            matches[:,:,i] = diff < 2.5 * np.sqrt(self.covars[:,:,i])

        # Update parameters for matched components
        for i in range(self.n_components):
            match_pixels = matches[:,:,i]

            if np.any(match_pixels):
                # Update weights
                self.weights[:,:,i] = (1 - self.learning_rate) * self.weights[:,:,i]
                self.weights[match_pixels,i] += self.learning_rate

                # Update means and variances for matched pixels
                pixel_diff = frame[match_pixels] - self.means[match_pixels,i]
                self.means[match_pixels,i] += self.learning_rate * pixel_diff
                self.covars[match_pixels,i] += self.learning_rate * (
                    pixel_diff * pixel_diff - self.covars[match_pixels,i]
                )

        # Normalize weights
        weight_sum = np.sum(self.weights, axis=2)
        np.divide(self.weights, weight_sum[:,:,np.newaxis], out=self.weights)

        # Determine foreground pixels
        sorted_indices = np.argsort(self.weights, axis=2)
        sorted_weights = np.take_along_axis(self.weights, sorted_indices, axis=2)
        cumsum = np.cumsum(sorted_weights, axis=2)
        threshold_mask = cumsum < self.threshold

        background_prob = np.zeros((height, width))
        for i in range(self.n_components):
            background_prob += self.weights[:,:,i] * self.gaussian_prob(
                frame, self.means[:,:,i], self.covars[:,:,i]
            )

        return background_prob < 0.1

class BackgroundSubtractor:
    def __init__(self, video_path, output_dir="output"):
        self.video_path = video_path
        self.output_dir = output_dir
        self.gmm = GaussianMixture()
        self.setup_output_directories()

    def setup_output_directories(self):
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.frames_dir = os.path.join(self.output_dir, f"frames_{timestamp}")
        self.bg_dir = os.path.join(self.output_dir, f"background_{timestamp}")
        self.fg_dir = os.path.join(self.output_dir, f"foreground_{timestamp}")

        for directory in [self.frames_dir, self.bg_dir, self.fg_dir]:
            if not os.path.exists(directory):
                os.makedirs(directory)

    def final_video_processor(self):
        cap = cv2.VideoCapture(self.video_path)
        if not cap.isOpened():
            raise ValueError("Error opening video file")

        frame_count = 0

        while True:
            ret, frame = cap.read()
            if not ret:
                break

            # Convert to grayscale
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            # Get foreground mask
            foreground_mask = self.gmm.update(gray)

            # Save frames
            cv2.imwrite(f"{self.frames_dir}/frame_{frame_count:04d}.jpg", frame)

            # Create and save background/foreground
            background = frame.copy()
            foreground = frame.copy()

            background[foreground_mask] = 0
            foreground[~foreground_mask] = 0

            cv2.imwrite(f"{self.bg_dir}/bg_{frame_count:04d}.jpg", background)
            cv2.imwrite(f"{self.fg_dir}/fg_{frame_count:04d}.jpg", foreground)

            frame_count += 1

        cap.release()
        self.generate_videos()
        self.save_parameters()

    def generate_videos(self):
        def images_to_video(image_folder, output_path, fps=30):
            images = [img for img in os.listdir(image_folder) if img.endswith(".jpg")]
            images.sort()

            if not images:
                return

            frame = cv2.imread(os.path.join(image_folder, images[0]))
            height, width = frame.shape[:2]

            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

            for image in images:
                frame = cv2.imread(os.path.join(image_folder, image))
                out.write(frame)

            out.release()

        images_to_video(self.bg_dir, os.path.join(self.output_dir, "background.mp4"))
        images_to_video(self.fg_dir, os.path.join(self.output_dir, "foreground.mp4"))

    def save_parameters(self):
        params = f"""Background Subtraction Parameters
========================
Timestamp: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}

GMM Parameters:
-------------
Number of components: {self.gmm.n_components}
Learning rate: {self.gmm.learning_rate}
Background threshold: {self.gmm.threshold}
Variance threshold: {self.gmm.var_threshold}

Processing Details:
-----------------
Input video: {self.video_path}
Output directory: {self.output_dir}

Additional Information:
--------------------
- Implementation uses Gaussian Mixture Model (GMM) from scratch
- Background is modeled using {self.gmm.n_components} Gaussian components
- Pixels are classified as foreground when their probability is below threshold
- Sequential frame processing with online parameter updates
"""

        with open(os.path.join(self.output_dir, "parameters.txt"), "w") as f:
            f.write(params)

# Create a background subtractor instance
subtractor = BackgroundSubtractor("/content/video.mp4")

# Process the video
subtractor.final_video_processor()

In [3]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Authenticate and create the PyDrive client
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)



In [4]:
# import shutil

# folder_name = '/content/output'
# shutil.make_archive(folder_name, 'zip', folder_name)
# shutil.move('/content/output.zip', '/content/drive/MyDrive/ELL784/compressed_folder.zip')

'/content/output.zip'

In [10]:


# shutil.move('/content/output.zip', '/content/drive/MyDrive/ELL784/compressed_folder.zip')


'/content/drive/MyDrive/ELL784/compressed_folder.zip'

In [11]:
# # # import numpy as np
# import cv2
# import os
# from datetime import datetime
# from sklearn.cluster import KMeans

# class GaussianMixture:
#     def __init__(self, n_components=3, learning_rate=0.01, threshold=0.7):
#         self.n_components = n_components
#         self.learning_rate = learning_rate
#         self.threshold = threshold
#         self.weights = None
#         self.means = None
#         self.covars = None
#         self.n_init = False

#     def initialize(self, first_frame):
#         height, width = first_frame.shape[:2]
#         self.weights = np.ones((height, width, self.n_components)) / self.n_components
#         self.means = np.zeros((height, width, self.n_components))
#         self.covars = np.ones((height, width, self.n_components)) * 30

#         # Initialize first Gaussian with first frame
#         self.means[:,:,0] = first_frame
#         self.n_init = True

#     def gaussian_prob(self, x, mean, covar):
#         diff = x - mean
#         return np.exp(-diff * diff / (2 * covar)) / np.sqrt(2 * np.pi * covar)

#     def update(self, frame):
#         if not self.n_init:
#             self.initialize(frame)
#             return np.ones_like(frame, dtype=bool)

#         height, width = frame.shape[:2]
#         matches = np.zeros((height, width, self.n_components), dtype=bool)

#         # Calculate match for each component
#         for i in range(self.n_components):
#             diff = np.abs(frame - self.means[:,:,i])
#             matches[:,:,i] = diff < 2.5 * np.sqrt(self.covars[:,:,i])

#         # Update parameters for matched components
#         for i in range(self.n_components):
#             match_pixels = matches[:,:,i]

#             if np.any(match_pixels):
#                 # Update weights
#                 self.weights[:,:,i] = (1 - self.learning_rate) * self.weights[:,:,i]
#                 self.weights[match_pixels,i] += self.learning_rate

#                 # Update means and variances for matched pixels
#                 pixel_diff = frame[match_pixels] - self.means[match_pixels,i]
#                 self.means[match_pixels,i] += self.learning_rate * pixel_diff
#                 self.covars[match_pixels,i] += self.learning_rate * (
#                     pixel_diff * pixel_diff - self.covars[match_pixels,i]
#                 )

#         # Normalize weights
#         weight_sum = np.sum(self.weights, axis=2)
#         np.divide(self.weights, weight_sum[:,:,np.newaxis], out=self.weights)

#         # Determine foreground pixels
#         sorted_indices = np.argsort(self.weights, axis=2)
#         sorted_weights = np.take_along_axis(self.weights, sorted_indices, axis=2)
#         cumsum = np.cumsum(sorted_weights, axis=2)
#         threshold_mask = cumsum < self.threshold

#         background_prob = np.zeros((height, width))
#         for i in range(self.n_components):
#             background_prob += self.weights[:,:,i] * self.gaussian_prob(
#                 frame, self.means[:,:,i], self.covars[:,:,i]
#             )

#         return background_prob < 0.1

# class BackgroundSubtractor:
#     def __init__(self, video_path, output_dir="output"):
#         self.video_path = video_path
#         self.output_dir = output_dir
#         self.gmm = GaussianMixture()
#         self.setup_output_directories()

#     def setup_output_directories(self):
#         if not os.path.exists(self.output_dir):
#             os.makedirs(self.output_dir)

#         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
#         self.frames_dir = os.path.join(self.output_dir, f"frames_{timestamp}")
#         self.bg_dir = os.path.join(self.output_dir, f"background_{timestamp}")
#         self.fg_dir = os.path.join(self.output_dir, f"foreground_{timestamp}")

#         for directory in [self.frames_dir, self.bg_dir, self.fg_dir]:
#             if not os.path.exists(directory):
#                 os.makedirs(directory)

#     def process_video(self):
#         cap = cv2.VideoCapture(self.video_path)
#         if not cap.isOpened():
#             raise ValueError("Error opening video file")

#         frame_count = 0

#         while True:
#             ret, frame = cap.read()
#             if not ret:
#                 break

#             # Convert to grayscale
#             gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

#             # Get foreground mask using GMM
#             foreground_mask_gmm = self.gmm.update(gray)

#             # Refine foreground mask using K-Means
#             foreground_mask_kmeans = self.refine_foreground_mask(frame, foreground_mask_gmm)

#             # Save frames
#             cv2.imwrite(f"{self.frames_dir}/frame_{frame_count:04d}.jpg", frame)

#             # Create and save background/foreground
#             background = frame.copy()
#             foreground = frame.copy()

#             background[foreground_mask_kmeans] = 0
#             foreground[~foreground_mask_kmeans] = 0

#             cv2.imwrite(f"{self.bg_dir}/bg_{frame_count:04d}.jpg", background)
#             cv2.imwrite(f"{self.fg_dir}/fg_{frame_count:04d}.jpg", foreground)

#             frame_count += 1

#         cap.release()
#         self.create_videos()
#         self.save_parameters()

#     def refine_foreground_mask(self, frame, foreground_mask_gmm):
#         # Apply K-Means clustering to refine the foreground mask
#         foreground_pixels = frame[foreground_mask_gmm]
#         if len(foreground_pixels) == 0:
#             return foreground_mask_gmm

#         kmeans = KMeans(n_clusters=2)
#         labels = kmeans.fit_predict(foreground_pixels.reshape(-1, 3))

#         # Determine which cluster corresponds to the foreground
#         cluster_centers = kmeans.cluster_centers_
#         foreground_cluster = np.argmax(np.linalg.norm(cluster_centers - np.mean(frame, axis=(0, 1)), axis=1))

#         # Create refined foreground mask
#         refined_mask = np.zeros_like(foreground_mask_gmm, dtype=bool)
#         refined_mask[foreground_mask_gmm] = labels == foreground_cluster

#         return refined_mask

#     def create_videos(self):
#         def images_to_video(image_folder, output_path, fps=30):
#             images = [img for img in os.listdir(image_folder) if img.endswith(".jpg")]
#             images.sort()

#             if not images:
#                 return

#             frame = cv2.imread(os.path.join(image_folder, images[0]))
#             height, width = frame.shape[:2]

#             fourcc = cv2.VideoWriter_fourcc(*'mp4v')
#             out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

#             for image in images:
#                 frame = cv2.imread(os.path.join(image_folder, image))
#                 out.write(frame)

#             out.release()

#         images_to_video(self.bg_dir, os.path.join(self.output_dir, "background.mp4"))
#         images_to_video(self.fg_dir, os.path.join(self.output_dir, "foreground.mp4"))

#     def save_parameters(self):
#         params = f"""Background Subtraction Parameters
# ========================
# Timestamp: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}

# GMM Parameters:
# -------------
# Number of components: {self.gmm.n_components}
# Learning rate: {self.gmm.learning_rate}
# Background threshold: {self.gmm.threshold}

# Processing Details:
# -----------------
# Input video: {self.video_path}
# Output directory: {self.output_dir}

# Additional Information:
# --------------------
# - Implementation uses Gaussian Mixture Model (GMM) from scratch
# - Foreground refinement uses K-Means clustering
# - Background is modeled using {self.gmm.n_components} Gaussian components
# - Pixels are classified as foreground when their probability is below threshold
# - Sequential frame processing with online parameter updates
# """

#         with open(os.path.join(self.output_dir, "parameters.txt"), "w") as f:
#             f.write(params)

# # Run the code
# video_path = "/content/video.mp4"  # Replace with your video path
# subtractor = BackgroundSubtractor(video_path)
# subtractor.process_video()

In [12]:
# import shutil

# folder_name = '/content/output'
# shutil.make_archive(folder_name, 'zip', folder_name)
# shutil.move('/content/output.zip', '/content/drive/MyDrive/ELL784/compressed_folder.zip')

'/content/output.zip'

# **Part3**

In [13]:
import numpy as np
import cv2
import os
from datetime import datetime
from sklearn.cluster import KMeans

class GaussianMixture:
    def __init__(self, n_components=5, learning_rate=0.05, threshold=0.8):
        self.n_components = n_components
        self.learning_rate = learning_rate
        self.threshold = threshold
        self.weights = None
        self.means = None
        self.covars = None
        self.n_init = False

    def initialize(self, first_frame):
        height, width = first_frame.shape[:2]
        self.weights = np.ones((height, width, self.n_components)) / self.n_components
        self.means = np.zeros((height, width, self.n_components))
        self.covars = np.ones((height, width, self.n_components)) * 30

        # Initialize first Gaussian with first frame
        self.means[:,:,0] = first_frame
        self.n_init = True

    def gaussian_prob(self, x, mean, covar):
        diff = x - mean
        return np.exp(-diff * diff / (2 * covar)) / np.sqrt(2 * np.pi * covar)

    def update(self, frame):
        if not self.n_init:
            self.initialize(frame)
            return np.ones_like(frame, dtype=bool)

        height, width = frame.shape[:2]
        matches = np.zeros((height, width, self.n_components), dtype=bool)

        # Calculate match for each component
        for i in range(self.n_components):
            diff = np.abs(frame - self.means[:,:,i])
            matches[:,:,i] = diff < 2.0 * np.sqrt(self.covars[:,:,i])

        # Update parameters for matched components
        for i in range(self.n_components):
            match_pixels = matches[:,:,i]

            if np.any(match_pixels):
                # Update weights
                self.weights[:,:,i] = (1 - self.learning_rate) * self.weights[:,:,i]
                self.weights[match_pixels,i] += self.learning_rate

                # Update means and variances for matched pixels
                pixel_diff = frame[match_pixels] - self.means[match_pixels,i]
                self.means[match_pixels,i] += self.learning_rate * pixel_diff
                self.covars[match_pixels,i] += self.learning_rate * (
                    pixel_diff * pixel_diff - self.covars[match_pixels,i]
                )

        # Normalize weights
        weight_sum = np.sum(self.weights, axis=2)
        np.divide(self.weights, weight_sum[:,:,np.newaxis], out=self.weights)

        # Determine foreground pixels
        sorted_indices = np.argsort(self.weights, axis=2)
        sorted_weights = np.take_along_axis(self.weights, sorted_indices, axis=2)
        cumsum = np.cumsum(sorted_weights, axis=2)
        threshold_mask = cumsum < self.threshold

        background_prob = np.zeros((height, width))
        for i in range(self.n_components):
            background_prob += self.weights[:,:,i] * self.gaussian_prob(
                frame, self.means[:,:,i], self.covars[:,:,i]
            )

        return background_prob < 0.15

class BackgroundSubtractorSum:
    def __init__(self, video_path, output_dir="output"):
        self.video_path = video_path
        self.output_dir = output_dir
        self.gmm = GaussianMixture()
        self.setup_output_directories()

    def setup_output_directories(self):
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir)

        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        self.frames_dir = os.path.join(self.output_dir, f"frames_{timestamp}")
        self.bg_dir = os.path.join(self.output_dir, f"background_{timestamp}")
        self.fg_dir = os.path.join(self.output_dir, f"foreground_{timestamp}")

        for directory in [self.frames_dir, self.bg_dir, self.fg_dir]:
            if not os.path.exists(directory):
                os.makedirs(directory)

    def final_video_processor(self):
        cap = cv2.VideoCapture(self.video_path)
        if not cap.isOpened():
            raise ValueError("Error opening video file")

        frame_count = 0

        while True:
            ret, frame = cap.read()
            if not ret:
                break

            # Convert to grayscale
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            # Get foreground mask using GMM
            foreground_mask_gmm = self.gmm.update(gray)

            # Refine foreground mask using K-Means
            foreground_mask_kmeans = self.refine_foreground_mask(frame, foreground_mask_gmm)

            # Save frames
            cv2.imwrite(f"{self.frames_dir}/frame_{frame_count:04d}.jpg", frame)

            # Create and save background/foreground
            background = frame.copy()
            foreground = frame.copy()

            background[foreground_mask_kmeans] = 0
            foreground[~foreground_mask_kmeans] = 0

            cv2.imwrite(f"{self.bg_dir}/bg_{frame_count:04d}.jpg", background)
            cv2.imwrite(f"{self.fg_dir}/fg_{frame_count:04d}.jpg", foreground)

            frame_count += 1

        cap.release()
        self.generate_videos()
        self.save_parameters()

    def refine_foreground_mask(self, frame, foreground_mask_gmm):
        # Apply K-Means clustering to refine the foreground mask
        foreground_pixels = frame[foreground_mask_gmm]
        if len(foreground_pixels) == 0:
            return foreground_mask_gmm

        kmeans = KMeans(n_clusters=2)
        labels = kmeans.fit_predict(foreground_pixels.reshape(-1, 3))

        # Determine which cluster corresponds to the foreground
        cluster_centers = kmeans.cluster_centers_
        foreground_cluster = np.argmax(np.linalg.norm(cluster_centers - np.mean(frame, axis=(0, 1)), axis=1))

        # Create refined foreground mask
        refined_mask = np.zeros_like(foreground_mask_gmm, dtype=bool)
        refined_mask[foreground_mask_gmm] = labels == foreground_cluster

        return refined_mask

    def generate_videos(self):
        def images_to_video(image_folder, output_path, fps=30):
            images = [img for img in os.listdir(image_folder) if img.endswith(".jpg")]
            images.sort()

            if not images:
                return

            frame = cv2.imread(os.path.join(image_folder, images[0]))
            height, width = frame.shape[:2]

            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter(output_path, fourcc, fps, (width, height))

            for image in images:
                frame = cv2.imread(os.path.join(image_folder, image))
                out.write(frame)

            out.release()

        images_to_video(self.bg_dir, os.path.join(self.output_dir, "background.mp4"))
        images_to_video(self.fg_dir, os.path.join(self.output_dir, "foreground.mp4"))

    def save_parameters(self):
        params = f"""Background Subtraction Parameters
========================
Timestamp: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}

GMM Parameters:
-------------
Number of components: {self.gmm.n_components}
Learning rate: {self.gmm.learning_rate}
Background threshold: {self.gmm.threshold}

Processing Details:
-----------------
Input video: {self.video_path}
Output directory: {self.output_dir}

Additional Information:
--------------------
- Implementation uses Gaussian Mixture Model (GMM) from scratch
- Foreground refinement uses K-Means clustering
- Background is modeled using {self.gmm.n_components} Gaussian components
- Pixels are classified as foreground when their probability is below threshold
- Sequential frame processing with online parameter updates
"""

        with open(os.path.join(self.output_dir, "parameters.txt"), "w") as f:
            f.write(params)



In [14]:

video_path = "/content/summarized_video.mp4"
subtractor2 = BackgroundSubtractorSum(video_path)
subtractor2.final_video_processor()

In [15]:
import shutil

folder_name = '/content/output'
shutil.make_archive(folder_name, 'zip', folder_name)


'/content/output.zip'

In [16]:
shutil.move('/content/output_sum.zip', '/content/drive/MyDrive/ELL784/output_sum_compress.zip')

'/content/drive/MyDrive/ELL784/output_sum_compress.zip'