<a href="https://colab.research.google.com/github/coldsober-irene/ASSIGNMENTS/blob/main/HAR_assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##metadata

In [136]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


##packages

In [137]:
import os
import cv2
import random
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.svm import SVC
from tensorflow.keras.applications import ResNet50
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.applications.resnet50 import preprocess_input


# 1. **Frames sampling**


In [138]:
samples = 20
rate = 7

In [139]:
class Sampling:
  count = 0
  def __init__(self, base_dir, sampling_type = 'uniform', ref_mean=[0.07, 0.07, 0.07], ref_std=[0.1, 0.09, 0.08]):
    self.data_path = base_dir
    self.sampling_type = sampling_type
    self.mean = ref_mean
    self.std = ref_std

    # READ MAPPING FILE TO KNOW THE LABEL FOR EACH CLASS
    map_file = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/mapping_table_23.txt'
    self.maps = {}
    with open(map_file, 'r') as f:
        lines = f.readlines()
        for line in lines:
            parts = line.split()
            self.maps[parts[1]] = int(parts[0])

    # CREATE EXTRACTOR OBJECT
    self.Extractor = self.Feature_extract(sampled_type = self.sampling_type)

    # subfolders
    self.activities = os.listdir(self.data_path)

    # EXTRACTED FEATURES FROM ALL THE VIDEOS
    self.obtained_features = []
    # LABELS OF THE EXTRACTED FEATURES
    self.labels = []
    Sampling.count += 1

  def saveFeatures(self):
    # CREATE VSTACK ARRAY OF ALL FEATURES EXTRACTED
    all_features = np.vstack(self.obtained_features)
    labels = np.array(self.labels)

    # SAVED THE EXTRACTED FEATURES and their corresponding labels FOR FUTURE USE
    features_dir = os.path.join(self.data_path, 'features')
    os.makedirs(features_dir, exist_ok = True)

    np.save(os.path.join(features_dir,f'features{Sampling.count}.npy'), all_features)
    np.save(os.path.join(features_dir,f'labels{Sampling.count}.npy'), labels)

    print("FEATURE EXTRACTION AND SAVING IS DONE!!!")


  def Sampler(self, sample_rate = 5, num_samples = 10):
    # Loop through each activity
    for activity in self.activities:
        activity_folder = os.path.join(self.data_path, activity)

        # Loop through video files in the activity folder
        for video_file in os.listdir(activity_folder):
            if '.mp4' in video_file:
              frames_sampled = []
              video_path = os.path.join(activity_folder, video_file)
              cap = cv2.VideoCapture(video_path)

              frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

              if self.sampling_type == 'uniform':
                sample = self.UniformSampling(cap = cap, frameCount=frame_count, sample_rate = sample_rate)
                frames_sampled.append(sample)

              elif self.sampling_type == 'random':
                sample = self.RandomSampling(cap = cap, frameCount=frame_count, num_samples=num_samples)
                frames_sampled.append(sample)

              # EXTRACT FEATURE FROM THE FRAMES OF EACH VIDEO
              features_obtained = self.Extractor.features(frames = frames_sampled, ref_mean = self.mean, ref_std = self.std)
              self.obtained_features.append(features_obtained)

              # POPULATE THE LABEL CORRESPONDING TO THE CURRENT VIDEO
              self.labels.append(self.maps[activity])

  def UniformSampling(self, cap, sample_rate, frameCount):
    for i in range(0, frameCount, sample_rate):
      cap.set(cv2.CAP_PROP_POS_FRAMES, i)
      ret, frame = cap.read()
      if ret:
          return frame

  def RandomSampling(self, cap,num_samples, frameCount):
    sampled_indices = random.sample(range(frameCount), num_samples)

    for i in sampled_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            return frame

  class Feature_extract:
    def __init__(self, sampled_type = 'uniform'):
      # Load pre-trained ResNet50
      self.model = ResNet50(weights='imagenet', include_top=False)
      self.sampled_type = sampled_type

    # Function to normalize a frame
    def normalize_frame(self, frame, ref_mean, ref_std):
        actual_mean = np.mean(frame, axis=(0, 1), keepdims=True)
        actual_std = np.std(frame, axis=(0, 1))
        normalized_frame = (frame - actual_mean) / actual_std * ref_std + ref_mean
        return normalized_frame

    # Function to preprocess frames and extract features using ResNet
    def features(self,frames, ref_mean, ref_std):
        processed_frames = [self.normalize_frame(frame, ref_mean, ref_std) for frame in frames]
        processed_frames = [preprocess_input(frame) for frame in processed_frames]
        features = self.model.predict(np.array(processed_frames))
        return features


RUN EXTRACTION OF THE TRAIN DATASETS

uniform sampling

In [140]:
%%capture
data_root = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/train'
sample = Sampling(base_dir = data_root)
sample.Sampler(sample_rate = rate)
sample.saveFeatures()


random sampling

In [141]:
%%capture
sample2 = Sampling(base_dir = data_root, sampling_type = 'random')
sample2.Sampler(num_samples=samples)
sample2.saveFeatures()

#2. **validation data feature extraction**

In [142]:
class Sampling:
  count = 0
  def __init__(self, base_dir, sampling_type = 'uniform', ref_mean=[0.07, 0.07, 0.07], ref_std=[0.1, 0.09, 0.08]):
    self.data_path = base_dir
    self.sampling_type = sampling_type
    self.mean = ref_mean
    self.std = ref_std

    # READ MAPPING FILE TO KNOW THE LABEL FOR EACH CLASS
    map_file = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/validate.txt'
    self.maps = {}
    with open(map_file, 'r') as f:
        lines = f.readlines()
        for line in lines:
            parts = line.split()
            self.maps[parts[-1]] = int(parts[1])

    # CREATE EXTRACTOR OBJECT
    self.Extractor = self.Feature_extract(sampled_type = self.sampling_type)


    # EXTRACTED FEATURES FROM ALL THE VIDEOS
    self.obtained_features = []
    # LABELS OF THE EXTRACTED FEATURES
    self.labels = []
    Sampling.count += 1

  def saveFeatures(self):
    # CREATE VSTACK ARRAY OF ALL FEATURES EXTRACTED
    all_features = np.vstack(self.obtained_features)
    labels = np.array(self.labels)

    # SAVED THE EXTRACTED FEATURES and their corresponding labels FOR FUTURE USE
    features_dir = os.path.join(self.data_path, 'features')
    os.makedirs(features_dir, exist_ok = True)

    np.save(os.path.join(features_dir,f'features{Sampling.count}.npy'), all_features)
    np.save(os.path.join(features_dir,f'labels{Sampling.count}.npy'), labels)

    print("VALIDATION FEATURE EXTRACTION AND SAVING IS DONE!!!")


  def Sampler(self, sample_rate = 5, num_samples = 10):
    # Loop through video files in the activity folder
    for video_file in os.listdir(self.data_path):
        if '.mp4' in video_file:
          frames_sampled = []
          video_path = os.path.join(self.data_path, video_file)
          cap = cv2.VideoCapture(video_path)

          frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

          if self.sampling_type == 'uniform':
            sample = self.UniformSampling(cap = cap, frameCount=frame_count, sample_rate = sample_rate)
            frames_sampled.append(sample)

          elif self.sampling_type == 'random':
            sample = self.RandomSampling(cap = cap, frameCount=frame_count, num_samples=num_samples)
            frames_sampled.append(sample)

          # EXTRACT FEATURE FROM THE FRAMES OF EACH VIDEO
          features_obtained = self.Extractor.features(frames = frames_sampled, ref_mean = self.mean, ref_std = self.std)
          self.obtained_features.append(features_obtained)

          # POPULATE THE LABEL CORRESPONDING TO THE CURRENT VIDEO
          self.labels.append(self.maps[video_file])

  def UniformSampling(self, cap, sample_rate, frameCount):
    for i in range(0, frameCount, sample_rate):
      cap.set(cv2.CAP_PROP_POS_FRAMES, i)
      ret, frame = cap.read()
      if ret:
          return frame

  def RandomSampling(self, cap,num_samples, frameCount):
    sampled_indices = random.sample(range(frameCount), num_samples)

    for i in sampled_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            return frame

  class Feature_extract:
    def __init__(self, sampled_type = 'uniform'):
      # Load pre-trained ResNet50
      self.model = ResNet50(weights='imagenet', include_top=False)
      self.sampled_type = sampled_type

    # Function to normalize a frame
    def normalize_frame(self, frame, ref_mean, ref_std):
        actual_mean = np.mean(frame, axis=(0, 1), keepdims=True)
        actual_std = np.std(frame, axis=(0, 1))
        normalized_frame = (frame - actual_mean) / actual_std * ref_std + ref_mean
        return normalized_frame

    # Function to preprocess frames and extract features using ResNet
    def features(self,frames, ref_mean, ref_std):
        processed_frames = [self.normalize_frame(frame, ref_mean, ref_std) for frame in frames]
        processed_frames = [preprocess_input(frame) for frame in processed_frames]
        features = self.model.predict(np.array(processed_frames))
        return features


RUN EXTRACTION OF THE VALIDATION DATASETS

uniform sampling

In [143]:
%%capture
val_root = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/validate'
sample_val = Sampling(base_dir = val_root)
sample_val.Sampler(sample_rate = rate)
sample_val.saveFeatures()

random sampling

In [144]:
%%capture
sample_val2 = Sampling(base_dir = val_root, sampling_type = 'random')
sample_val2.Sampler(num_samples=samples)
sample_val2.saveFeatures()

### obtain the features of each frame using a pre-trained model and create feature vector

## fuse features extracted using average pooling

## Question: ***Describe in brief the pre-trained model leveraged and why the pre-trained model is selected. What is the dimension of the feature obtained. Remember to save the video features in order for subsequent training. (3 points)***

# 3. **Classifier Training and Evaluation**

**possible classifier I can choose from since my datasets are small (just 25 videos)**

1.   Support Vector Machines (SVM)
2.   Naive Bayes
3. Random Forest





In [145]:
val_base = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/validate'
feature_dir = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/train/features/features1.npy'
labels_dir = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/train/features/labels1.npy'
val_features = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/validate/features/features1.npy'
val_labels = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/validate/features/labels1.npy'
X_train = np.load(feature_dir, allow_pickle = True)
y_train = np.load(labels_dir, allow_pickle = True)
X_val = np.load(val_features, allow_pickle = True)
y_val = np.load(val_labels, allow_pickle = True)

In [157]:
test_size = 0.64

In [158]:
features = np.vstack([X_train, X_val])
# print(y_train)
# print(y_val)
labels = np.hstack([y_train, y_val])
# print(labels.shape)
X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=test_size, random_state=42)

##model training under **uniform** sampled data

In [159]:

# Define the model with 3D convolutional layers
num_classes = 5
model = keras.Sequential([
    layers.Input(shape=X_train.shape[1:]),  # Input shape matches your feature shape
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax')  # Output layer with the number of classes
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7d697d163460>

###model training under random sampled frames

In [160]:
val_base = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/validate'
feature_dir = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/train/features/features2.npy'
labels_dir = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/train/features/labels2.npy'
val_features = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/validate/features/features2.npy'
val_labels = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/validate/features/labels2.npy'
X_train = np.load(feature_dir, allow_pickle = True)
y_train = np.load(labels_dir, allow_pickle = True)
X_val = np.load(val_features, allow_pickle = True)
y_val = np.load(val_labels, allow_pickle = True)

In [161]:

features = np.vstack([X_train, X_val])
# print(y_train)
# print(y_val)
labels = np.hstack([y_train, y_val])
# print(labels.shape)
X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=test_size, random_state=42)

In [162]:
# Define the model with 3D convolutional layers
num_classes = 5
model = keras.Sequential([
    layers.Input(shape=X_train.shape[1:]),  # Input shape matches your feature shape
    layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
    layers.MaxPooling2D(pool_size=(2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax')  # Output layer with the number of classes
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, validation_data=(X_val, y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7d69900c5480>

### Discuss the pros and cons of the type of classifier selected

## evaluate the trained classifier

In [152]:
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# # Assuming you have a trained classifier, such as svm_classifier, as described in Section 3
# # X_val and y_val are the validation features and labels

# # Make predictions on the validation set
# y_pred = svm_classifier.predict(X_val)

# # Calculate evaluation metrics
# accuracy = accuracy_score(y_val, y_pred)
# precision = precision_score(y_val, y_pred)
# recall = recall_score(y_val, y_pred)
# f1 = f1_score(y_val, y_pred)
# confusion = confusion_matrix(y_val, y_pred)

# # Print the results
# print("Accuracy:", accuracy)
# print("Precision:", precision)
# print("Recall:", recall)
# print("F1 Score:", f1)
# print("Confusion Matrix:")
# print(confusion)


### You should repeat steps 1 and 2 for the validation videos to obtain their features and obtain their class predictions with the trained classifier. Compare the predictions with the ground truth label. What is the performance of the trained classifier? (3 points)

In [153]:
# # Step 1: Frame Sampling for Validation Videos
# # - You can follow the same frame sampling process as in Section 1 for your validation videos.
# # - Let's assume you have validation_uniform_frames and validation_random_frames for uniform and random sampling.

# # Step 2: Feature Extraction for Validation Videos
# # - Apply the same feature extraction process as in Section 2 for the validation videos.
# # - Assuming you have a pre-trained model (model) and reference mean and standard deviation (ref_mean, ref_std).

# # Extract features for validation videos
# validation_uniform_features = preprocess_and_extract_features(validation_uniform_frames, model, ref_mean, ref_std)
# validation_random_features = preprocess_and_extract_features(validation_random_frames, model, ref_mean, ref_std)

# # Step 3: Classifier Prediction and Evaluation
# # - Use the trained classifier to predict classes for validation features and compare with ground truth labels.
# # - Assuming you have validation_labels for ground truth labels.

# # Predict classes for validation features
# validation_uniform_predictions = svm_classifier.predict(validation_uniform_features)
# validation_random_predictions = svm_classifier.predict(validation_random_features)

# # Evaluate the performance for uniform sampling
# uniform_accuracy = accuracy_score(validation_labels, validation_uniform_predictions)
# uniform_precision = precision_score(validation_labels, validation_uniform_predictions)
# uniform_recall = recall_score(validation_labels, validation_uniform_predictions)
# uniform_f1 = f1_score(validation_labels, validation_uniform_predictions)

# # Evaluate the performance for random sampling
# random_accuracy = accuracy_score(validation_labels, validation_random_predictions)
# random_precision = precision_score(validation_labels, validation_random_predictions)
# random_recall = recall_score(validation_labels, validation_random_predictions)
# random_f1 = f1_score(validation_labels, validation_random_predictions)

# # Compare and print the results
# print("Performance for Uniform Sampling:")
# print("Accuracy:", uniform_accuracy)
# print("Precision:", uniform_precision)
# print("Recall:", uniform_recall)
# print("F1 Score:", uniform_f1)

# print("\nPerformance for Random Sampling:")
# print("Accuracy:", random_accuracy)
# print("Precision:", random_precision)
# print("Recall:", random_recall)
# print("F1 Score:", random_f1)


#4. **Apply any image enhancement of your choice** and explore how it effects the performance of the trained classifier. Note that the reference mean, and standard deviation value of a normal video frame is "mean" [0.485,0.456,0.406],"standard deviation" [0.229,0.224,0.225].

In [154]:
# import cv2
# import numpy as np
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# # Function to enhance and normalize a frame
# def enhance_and_normalize_frame(frame, ref_mean, ref_std):
#     # Enhance the frame (e.g., histogram equalization)
#     enhanced_frame = cv2.equalizeHist(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
#     enhanced_frame = cv2.cvtColor(enhanced_frame, cv2.COLOR_GRAY2BGR)

#     # Normalize the frame to have the reference mean and standard deviation
#     actual_mean = np.mean(enhanced_frame, axis=(0, 1))
#     actual_std = np.std(enhanced_frame, axis=(0, 1))
#     normalized_frame = (enhanced_frame - actual_mean) / actual_std * ref_std + ref_mean

#     return normalized_frame

# # Function to preprocess frames and extract features using a pre-trained model
# def preprocess_and_extract_features(frames, model, ref_mean, ref_std):
#     processed_frames = [enhance_and_normalize_frame(frame, ref_mean, ref_std) for frame in frames]
#     processed_frames = [preprocess_input(frame) for frame in processed_frames]
#     features = model.predict(np.array(processed_frames))
#     return features

# # Assuming you have validation_videos, validation_labels, a pre-trained model (model),
# # and a trained classifier (svm_classifier)

# # Without Image Enhancement
# features_without_enhancement = preprocess_and_extract_features(validation_videos, model, ref_mean=[0.485, 0.456, 0.406], ref_std=[0.229, 0.224, 0.225])
# predictions_without_enhancement = svm_classifier.predict(features_without_enhancement)

# # With Image Enhancement
# features_with_enhancement = preprocess_and_extract_features(validation_videos, model, ref_mean=[0.485, 0.456, 0.406], ref_std=[0.229, 0.224, 0.225])
# predictions_with_enhancement = svm_classifier.predict(features_with_enhancement)

# # Evaluate the performance without and with image enhancement
# accuracy_without_enhancement = accuracy_score(validation_labels, predictions_without_enhancement)
# f1_score_without_enhancement = f1_score(validation_labels, predictions_without_enhancement)

# accuracy_with_enhancement = accuracy_score(validation_labels, predictions_with_enhancement)
# f1_score_with_enhancement = f1_score(validation_labels, predictions_with_enhancement)

# # Print the results
# print("Performance without Image Enhancement:")
# print("Accuracy:", accuracy_without_enhancement)
# print("F1 Score:", f1_score_without_enhancement)

# print("\nPerformance with Image Enhancement:")
# print("Accuracy:", accuracy_with_enhancement)
# print("F1 Score:", f1_score_with_enhancement)


### Discuss how the chosen image enhancement effects the performance of the trained classifier in detail

### Provide sampled output frames resulting from the image enhancement. (6 points)

# 5. – Improving the HAR Model to Enable End-to-end Training. The aforementioned method is intuitive but is not end-to-end, which limits its applicability in real-world scenarios. Currently, most HAR models are designed end-to-end, without the need to explicitly store the video features. In this step you are to design or implement an HAR model that is end-to-end and evaluate your HAR model. Describe your HAR model in detail, including the structure along with the training and evaluation procedures. Compare your HAR model performance against the prior trained classifiers and discuss the pros and cons of your HAR model. (Additional 10 points)

In [155]:
# import numpy as np
# import tensorflow as tf
# from tensorflow.keras.layers import Conv2D, LSTM, Dense, Flatten, Input
# from tensorflow.keras.models import Model
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# # Assuming you have your training data in train_videos and train_labels
# # Assuming you have your validation data in validation_videos and validation_labels

# # Define the input shape
# input_shape = (sequence_length, frame_height, frame_width, num_channels)

# # Build the end-to-end HAR model
# input_layer = Input(shape=input_shape)
# conv_layer = Conv2D(32, (3, 3), activation='relu')(input_layer)
# lstm_layer = LSTM(64, return_sequences=True)(conv_layer)
# flatten_layer = Flatten()(lstm_layer)
# output_layer = Dense(num_classes, activation='softmax')(flatten_layer)

# model = Model(inputs=input_layer, outputs=output_layer)

# # Compile the model
# model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# # Train the model
# model.fit(train_videos, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(validation_videos, validation_labels))

# # Evaluate the model on the validation set
# validation_predictions = model.predict(validation_videos)
# validation_predictions = np.argmax(validation_predictions, axis=1)  # Assuming one-hot encoding of labels
# validation_labels = np.argmax(validation_labels, axis=1)  # Assuming one-hot encoding of labels

# accuracy = accuracy_score(validation_labels, validation_predictions)
# precision = precision_score(validation_labels, validation_predictions, average='weighted')
# recall = recall_score(validation_labels, validation_predictions, average='weighted')
# f1 = f1_score(validation_labels, validation_predictions, average='weighted')

# print("Accuracy:", accuracy)
# print("Precision:", precision)
# print("Recall:", recall)
# print("F1 Score:", f1)


In [156]:
# from shutil import rmtree
# b = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/validate/random_validation_sampled'
# c = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/validate/uniform_validation_sampled'
# # for file in os.listdir(b):
# #   os.unlink(os.path.join(b, file))
# rmtree(b)

# for f in os.listdir(c):
#   os.unlink(os.path.join(c, file))
  # rmtree(os.path.join(c, f))