<a href="https://colab.research.google.com/github/coldsober-irene/ASSIGNMENTS/blob/main/HAR_assignment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##metadata

In [None]:
from google.colab import drive
drive.mount('/content/drive')

##packages

In [None]:
import cv2
import os
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# 1. **Frames sampling**


In [None]:
class Sampling:
  def __init__(self, base_dir, train_data_folder = 'train'):
    self.data_path = base_dir
    self.train_dir = train_data_folder
    # subfolders
    self.activities = os.listdir(os.path.join(self.data_path, self.train_dir))

  def Sampler(self, sampled_dir = 'sampled_frames', sampling_type = 'uniform', sample_rate = 5, num_samples = 10):
    # Loop through each activity
    for activity in self.activities:
        activity_folder = os.path.join(os.path.join(data_root, parent_dir), activity)

        # Create a subfolder to save sampled frames
        self.sampled_frames_folder = os.path.join(os.path.join(data_root, parent_dir), sampled_dir, activity)
        os.makedirs(self.sampled_frames_folder, exist_ok=True)

        # Loop through video files in the activity folder
        for video_file in os.listdir(activity_folder):
            video_path = os.path.join(activity_folder, video_file)
            cap = cv2.VideoCapture(video_path)

            # Create a subfolder for each video to save sampled frames
            video_name = os.path.splitext(video_file)[0]
            self.video_sampled_frames_folder = os.path.join(self.sampled_frames_folder, video_name)
            os.makedirs(self.video_sampled_frames_folder, exist_ok=True)

            frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

            if sampling_type == 'uniform':
              self.UniformSampling(cap = cap, frameCount=frame_count, sample_rate = sample_rate)
            else:
              self.RandomSampling(cap = cap, frameCount=frame_count, num_samples=num_samples)

  def UniformSampling(self, cap, sample_rate, frameCount):
    for i in range(0, frameCount, sample_rate):
      cap.set(cv2.CAP_PROP_POS_FRAMES, i)
      ret, frame = cap.read()
      if ret:
          frame_filename = os.path.join(self.video_sampled_frames_folder, f'frame_{i}.jpg')
          cv2.imwrite(frame_filename, frame)

  def RandomSampling(self, cap,num_samples, frameCount):
    sampled_indices = random.sample(range(frameCount), num_samples)

    for i in sampled_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if ret:
            frame_filename = os.path.join(self.video_sampled_frames_folder, f'frame_{i}.jpg')
            cv2.imwrite(frame_filename, frame)




In [None]:
data_root = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023'
parent_dir = 'train'
sample = Sampling(base_dir = data_root)
sample.Sampler(sampled_dir = "uniform_sampled")
sample.Sampler(sampled_dir = "random_sampled")

##Question: ***which one is the best between uniform and random sampling***

#2. **Feature extraction**

In [None]:
base = '/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/train'
classes_dir = {class_.lower() : os.path.join(base, class_ + "_features") for class_ in os.listdir(base)[:6]}
anotation = {'jump':0, }
try:
  for v in classes_dir.values():
    os.makedirs(v)
except Exception:
  pass

In [None]:
class Feature_extract:
  def __init__(self, random_frames:list = [], uniform_frames: list = []):
    # Load pre-trained ResNet50
    self.model = ResNet50(weights='imagenet', include_top=False)
    self.random_frames = random_frames
    self.uniform_frames = uniform_frames

  def Get_features(self, unif = False):
    # Assuming uniform_frames and random_frames are defined from Section 1
    if unif:
      uniform_features = self.preprocess_and_extract_features(self.uniform_frames, self.model, ref_mean=[0.07, 0.07, 0.07], ref_std=[0.1, 0.09, 0.08])
      return uniform_features
    else:
      random_features = self.preprocess_and_extract_features(self.random_frames, self.model, ref_mean=[0.07, 0.07, 0.07], ref_std=[0.1, 0.09, 0.08])
      return random_features

  # Function to normalize a frame
  def normalize_frame(self, frame, ref_mean, ref_std):
      frame = cv2.imread(frame)
      actual_mean = np.mean(frame, axis=(0, 1), keepdims=True)
      actual_std = np.std(frame, axis=(0, 1))
      normalized_frame = (frame - actual_mean) / actual_std * ref_std + ref_mean
      return normalized_frame

  # Function to preprocess frames and extract features using ResNet
  def preprocess_and_extract_features(self,frames, model, ref_mean, ref_std):
      processed_frames = [self.normalize_frame(frame, ref_mean, ref_std) for frame in frames]
      processed_frames = [preprocess_input(frame) for frame in processed_frames]
      features = model.predict(np.array(processed_frames))
      return features


dir_rand = "/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/train/random_sampled"
dir_unif = "/content/drive/MyDrive/machine vision assignment 2/EE6222 train and validate 2023/train/uniform_sampled"
rand_frames = {os.path.join(dir_rand, class_, last_dir) : os.listdir(os.path.join(dir_rand, class_, last_dir)) for class_ in os.listdir(dir_rand) for last_dir in os.listdir(os.path.join(dir_rand, class_))}
unif_frames = {os.path.join(dir_rand, class_, last_dir) : os.listdir(os.path.join(dir_rand, class_, last_dir)) for class_ in os.listdir(dir_rand) for last_dir in os.listdir(os.path.join(dir_rand, class_))}
train_data = {"stand" : [], "sit" : [], "jump" : [], "run" : [], "turn" : [], "walk" : []}
annotation = {"stand" : 3, "sit" : 2, "jump" : 0, "run" : 1, "turn" : 4, "walk" : 5}
for k, v in rand_frames.items():
  frames = [os.path.join(k, img) for img in v]
  extractor = Feature_extract(random_frames = frames)
  features = extractor.Get_features()
  for class_ in classes_dir.keys():
    if class_ in os.path.basename(k).lower():
      train_data[class_].append((annotation[class_], features))

full_train_rand_datasets = np.array(train_data.values())
# Save the video-level features as NumPy arrays
np.save('uniform_pooled_features.npy', uniform_pooled_feature)
np.save('random_pooled_features.npy', full_train_rand_datasets)

In [None]:
len(features_dict['jump'])

### obtain the features of each frame using a pre-trained model and create feature vector

In [None]:

rand_frames.items()


## fuse features extracted using average pooling

In [None]:

# Function to perform average pooling on a set of features
def average_pooling(features):
    return np.mean(features, axis=0)

# Apply average pooling to the uniform and random features
uniform_pooled_feature = average_pooling(uniform_features)
random_pooled_feature = average_pooling(random_features)

# Now you have video-level features for uniform and random samples


## Question: ***Describe in brief the pre-trained model leveraged and why the pre-trained model is selected. What is the dimension of the feature obtained. Remember to save the video features in order for subsequent training. (3 points)***

In [None]:
import numpy as np
import pickle

# Save the video-level features as NumPy arrays
np.save('uniform_pooled_features.npy', uniform_pooled_feature)
np.save('random_pooled_features.npy', random_pooled_feature)

# Alternatively, you can save the features as a pickled object
with open('uniform_pooled_features.pkl', 'wb') as file:
    pickle.dump(uniform_pooled_feature, file)

with open('random_pooled_features.pkl', 'wb') as file:
    pickle.dump(random_pooled_feature, file)


# 3. **Classifier Training and Evaluation**

**possible classifier I can choose from since my datasets are small (just 25 videos)**

1.   Support Vector Machines (SVM)
2.   Naive Bayes
3. Random Forest





In [None]:


# Create a dataset (features and labels)
X = np.vstack((uniform_features, random_features))
y = np.array([0] * len(uniform_features) + [1] * len(random_features))

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Train an SVM classifier
svm_classifier = SVC(kernel='linear')
svm_classifier.fit(X_train, y_train)

# Make predictions and evaluate the model
y_pred = svm_classifier.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
report = classification_report(y_val, y_pred)
print("Accuracy:", accuracy)
print(report)


### Discuss the pros and cons of the type of classifier selected

## evaluate the trained classifier

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Assuming you have a trained classifier, such as svm_classifier, as described in Section 3
# X_val and y_val are the validation features and labels

# Make predictions on the validation set
y_pred = svm_classifier.predict(X_val)

# Calculate evaluation metrics
accuracy = accuracy_score(y_val, y_pred)
precision = precision_score(y_val, y_pred)
recall = recall_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)
confusion = confusion_matrix(y_val, y_pred)

# Print the results
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print("Confusion Matrix:")
print(confusion)


### You should repeat steps 1 and 2 for the validation videos to obtain their features and obtain their class predictions with the trained classifier. Compare the predictions with the ground truth label. What is the performance of the trained classifier? (3 points)

In [None]:
# Step 1: Frame Sampling for Validation Videos
# - You can follow the same frame sampling process as in Section 1 for your validation videos.
# - Let's assume you have validation_uniform_frames and validation_random_frames for uniform and random sampling.

# Step 2: Feature Extraction for Validation Videos
# - Apply the same feature extraction process as in Section 2 for the validation videos.
# - Assuming you have a pre-trained model (model) and reference mean and standard deviation (ref_mean, ref_std).

# Extract features for validation videos
validation_uniform_features = preprocess_and_extract_features(validation_uniform_frames, model, ref_mean, ref_std)
validation_random_features = preprocess_and_extract_features(validation_random_frames, model, ref_mean, ref_std)

# Step 3: Classifier Prediction and Evaluation
# - Use the trained classifier to predict classes for validation features and compare with ground truth labels.
# - Assuming you have validation_labels for ground truth labels.

# Predict classes for validation features
validation_uniform_predictions = svm_classifier.predict(validation_uniform_features)
validation_random_predictions = svm_classifier.predict(validation_random_features)

# Evaluate the performance for uniform sampling
uniform_accuracy = accuracy_score(validation_labels, validation_uniform_predictions)
uniform_precision = precision_score(validation_labels, validation_uniform_predictions)
uniform_recall = recall_score(validation_labels, validation_uniform_predictions)
uniform_f1 = f1_score(validation_labels, validation_uniform_predictions)

# Evaluate the performance for random sampling
random_accuracy = accuracy_score(validation_labels, validation_random_predictions)
random_precision = precision_score(validation_labels, validation_random_predictions)
random_recall = recall_score(validation_labels, validation_random_predictions)
random_f1 = f1_score(validation_labels, validation_random_predictions)

# Compare and print the results
print("Performance for Uniform Sampling:")
print("Accuracy:", uniform_accuracy)
print("Precision:", uniform_precision)
print("Recall:", uniform_recall)
print("F1 Score:", uniform_f1)

print("\nPerformance for Random Sampling:")
print("Accuracy:", random_accuracy)
print("Precision:", random_precision)
print("Recall:", random_recall)
print("F1 Score:", random_f1)


#4. **Apply any image enhancement of your choice** and explore how it effects the performance of the trained classifier. Note that the reference mean, and standard deviation value of a normal video frame is "mean" [0.485,0.456,0.406],"standard deviation" [0.229,0.224,0.225].

In [None]:
import cv2
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Function to enhance and normalize a frame
def enhance_and_normalize_frame(frame, ref_mean, ref_std):
    # Enhance the frame (e.g., histogram equalization)
    enhanced_frame = cv2.equalizeHist(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
    enhanced_frame = cv2.cvtColor(enhanced_frame, cv2.COLOR_GRAY2BGR)

    # Normalize the frame to have the reference mean and standard deviation
    actual_mean = np.mean(enhanced_frame, axis=(0, 1))
    actual_std = np.std(enhanced_frame, axis=(0, 1))
    normalized_frame = (enhanced_frame - actual_mean) / actual_std * ref_std + ref_mean

    return normalized_frame

# Function to preprocess frames and extract features using a pre-trained model
def preprocess_and_extract_features(frames, model, ref_mean, ref_std):
    processed_frames = [enhance_and_normalize_frame(frame, ref_mean, ref_std) for frame in frames]
    processed_frames = [preprocess_input(frame) for frame in processed_frames]
    features = model.predict(np.array(processed_frames))
    return features

# Assuming you have validation_videos, validation_labels, a pre-trained model (model),
# and a trained classifier (svm_classifier)

# Without Image Enhancement
features_without_enhancement = preprocess_and_extract_features(validation_videos, model, ref_mean=[0.485, 0.456, 0.406], ref_std=[0.229, 0.224, 0.225])
predictions_without_enhancement = svm_classifier.predict(features_without_enhancement)

# With Image Enhancement
features_with_enhancement = preprocess_and_extract_features(validation_videos, model, ref_mean=[0.485, 0.456, 0.406], ref_std=[0.229, 0.224, 0.225])
predictions_with_enhancement = svm_classifier.predict(features_with_enhancement)

# Evaluate the performance without and with image enhancement
accuracy_without_enhancement = accuracy_score(validation_labels, predictions_without_enhancement)
f1_score_without_enhancement = f1_score(validation_labels, predictions_without_enhancement)

accuracy_with_enhancement = accuracy_score(validation_labels, predictions_with_enhancement)
f1_score_with_enhancement = f1_score(validation_labels, predictions_with_enhancement)

# Print the results
print("Performance without Image Enhancement:")
print("Accuracy:", accuracy_without_enhancement)
print("F1 Score:", f1_score_without_enhancement)

print("\nPerformance with Image Enhancement:")
print("Accuracy:", accuracy_with_enhancement)
print("F1 Score:", f1_score_with_enhancement)


### Discuss how the chosen image enhancement effects the performance of the trained classifier in detail

### Provide sampled output frames resulting from the image enhancement. (6 points)

# 5. – Improving the HAR Model to Enable End-to-end Training. The aforementioned method is intuitive but is not end-to-end, which limits its applicability in real-world scenarios. Currently, most HAR models are designed end-to-end, without the need to explicitly store the video features. In this step you are to design or implement an HAR model that is end-to-end and evaluate your HAR model. Describe your HAR model in detail, including the structure along with the training and evaluation procedures. Compare your HAR model performance against the prior trained classifiers and discuss the pros and cons of your HAR model. (Additional 10 points)

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, LSTM, Dense, Flatten, Input
from tensorflow.keras.models import Model
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming you have your training data in train_videos and train_labels
# Assuming you have your validation data in validation_videos and validation_labels

# Define the input shape
input_shape = (sequence_length, frame_height, frame_width, num_channels)

# Build the end-to-end HAR model
input_layer = Input(shape=input_shape)
conv_layer = Conv2D(32, (3, 3), activation='relu')(input_layer)
lstm_layer = LSTM(64, return_sequences=True)(conv_layer)
flatten_layer = Flatten()(lstm_layer)
output_layer = Dense(num_classes, activation='softmax')(flatten_layer)

model = Model(inputs=input_layer, outputs=output_layer)

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(train_videos, train_labels, epochs=epochs, batch_size=batch_size, validation_data=(validation_videos, validation_labels))

# Evaluate the model on the validation set
validation_predictions = model.predict(validation_videos)
validation_predictions = np.argmax(validation_predictions, axis=1)  # Assuming one-hot encoding of labels
validation_labels = np.argmax(validation_labels, axis=1)  # Assuming one-hot encoding of labels

accuracy = accuracy_score(validation_labels, validation_predictions)
precision = precision_score(validation_labels, validation_predictions, average='weighted')
recall = recall_score(validation_labels, validation_predictions, average='weighted')
f1 = f1_score(validation_labels, validation_predictions, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
