In [11]:
#imports
import matplotlib.pyplot as plt
import librosa.feature as lf
import librosa
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, precision_recall_curve, roc_curve, auc
import numpy as np


from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    average_precision_score,
    accuracy_score,
)


import sys

sys.path.append("..")
from utils.MuppetDataset import MuppetDataset
from utils.visual_evaluation import (
    plot_confusion_matrix,
    plot_roc_curve,
    plot_precision_recall_curve,
    plot_global_confusion_matrix,
)


In [None]:
#libraries to install for the jupyter notebook - execute before the imports when first running it
!pip install matplotlib
!pip install librosa
!pip install numpy
!pip install pandas
!pip install scikit-learn
!pip install seaborn
!pip install tqdm

In [None]:
%run -i ..\\utils\\extract_frames.py 
#run video frame extraction script


In [None]:
#visual kermit detection

import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Function to extract dominant color features from an image
def get_dominant_color(image):
    pixels = np.float32(image.reshape(-1, 3))
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 200, 0.2)
    _, _, center = cv2.kmeans(pixels, 1, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
    dominant_color = center[0].astype(np.uint8)
    return dominant_color

# Function to extract HOG features from an image
def get_hog_features(image):
    features, _ = hog(image, orientations=8, pixels_per_cell=(4, 4), cells_per_block=(1, 1), visualize=True)
    return features


# Function to visualize confusion matrix
def plot_confusion_matrix(y_true, y_pred, classes):
    cm = confusion_matrix(y_true, y_pred)
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes)
    plt.yticks(tick_marks, classes)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.show()
# Loading ground truth - training data episode 02-01-01
train_csv_path = '../data/Muppets-02-01-01.csv'
train_df = pd.read_csv(train_csv_path)

# Loading ground truth - test data episode 02-04-04
test_csv_path = '../data/Muppets-02-04-04.csv'
test_df = pd.read_csv(test_csv_path)

# Loading ground truth - test data episode 03-04-03 
validation_csv_path = '../data/Muppets-03-04-03.csv'
validation_df = pd.read_csv(validation_csv_path)

# Assuming frame images are in a directory
training_frames_directory = '../data/training_frames/'
test_frames_directory = '../data/test_frames/'
validation_frames_directory = '../data/validation-frames/'

# Extract features for training dataset
train_features = []
train_labels = []
index = 0
for index, row in train_df.iterrows():
    if index % 100 != 0: # only analzying every 100th frame - time reasons
        continue
 # Split the columns and convert relevant columns to numeric
    train_df[['Video', 'Frame_number', 'Kermit', 'Pigs', 'Audio_Pigs', 'Cook', 'Audio_Cook', 'StatlerWaldorf', 'Audio_StatlerWaldorf', 'Audio_MissPiggy']] = train_df['Video;Frame_number;Kermit;Pigs;Audio_Pigs;Cook;Audio_Cook;StatlerWaldorf;Audio_StatlerWaldorf;Audio_MissPiggy'].str.split(';', expand=True)
    train_df['Frame_number'] = pd.to_numeric(train_df['Frame_number'])
    train_df['Kermit'] = pd.to_numeric(train_df['Kermit'])

    
    
    label = train_df[train_df['Frame_number'] == index]['Kermit'].values
    frame_path = os.path.join(training_frames_directory, f"frame_{index}.png")
    print(frame_path)

    
    frame = cv2.imread(frame_path)

    # Access the 'Kermit' value from the row directly
    label = train_df.at[index, 'Kermit']
    train_labels.append(label)
    dominant_color = get_dominant_color(frame)
    hog_features = get_hog_features(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
    train_features.append(np.concatenate([dominant_color, hog_features]))
    index = index +100

# Extract features for test dataset
test_features = []
test_labels = []

index = 0

for index, row in test_df.iterrows():
    if index % 100 != 0:
        continue
    # Split the columns and convert relevant columns to numeric
    test_df[['Video', 'Frame_number', 'Kermit', 'Pigs', 'Audio_Pigs', 'Cook', 'Audio_Cook', 'StatlerWaldorf', 'Audio_StatlerWaldorf', 'Audio_MissPiggy']] = test_df['Video;Frame_number;Kermit;Pigs;Audio_Pigs;Cook;Audio_Cook;StatlerWaldorf;Audio_StatlerWaldorf;Audio_MissPiggy'].str.split(';', expand=True)
    test_df['Frame_number'] = pd.to_numeric(test_df['Frame_number'])
    test_df['Kermit'] = pd.to_numeric(test_df['Kermit'])

    
    label = test_df[test_df['Frame_number'] == index]['Kermit'].values
    frame_path = os.path.join(test_frames_directory, f"frame_{index}.png")
    print(frame_path)

    
    frame = cv2.imread(frame_path)

    # Access the 'Kermit' value from the row directly
    label = test_df.at[index, 'Kermit']
    test_labels.append(label)
    
    dominant_color = get_dominant_color(frame)
    hog_features = get_hog_features(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
    test_features.append(np.concatenate([dominant_color, hog_features]))
    index = index +100

# Extract features for validation dataset
validation_features = []
validation_labels = []

index = 0

for index, row in validation_df.iterrows():
    if index % 100 != 0:
        continue
    validation_df[['Video', 'Frame_number', 'Kermit', 'Pigs', 'Audio_Pigs', 'Cook', 'Audio_Cook', 'StatlerWaldorf', 'Audio_StatlerWaldorf', 'Audio_MissPiggy']] = validation_df['Video;Frame_number;Kermit;Pigs;Audio_Pigs;Cook;Audio_Cook;StatlerWaldorf;Audio_StatlerWaldorf;Audio_MissPiggy'].str.split(';', expand=True)
    validation_df['Frame_number'] = pd.to_numeric(validation_df['Frame_number'])
    validation_df['Kermit'] = pd.to_numeric(validation_df['Kermit'])

    label = validation_df.at[index, 'Kermit']
    frame_path = os.path.join(validation_frames_directory, f"frame_{index}.png")
    print(frame_path)

    frame = cv2.imread(frame_path)

    label = validation_df.at[index, 'Kermit']
    validation_labels.append(label)
    dominant_color = get_dominant_color(frame)
    hog_features = get_hog_features(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
    validation_features.append(np.concatenate([dominant_color, hog_features]))
    index = index +100

# Train SVM
svm_classifier = SVC()
svm_classifier.fit(train_features, train_labels)

# Predict on test dataset
test_predictions = svm_classifier.predict(test_features)

# Predict on validation dataset
validation_predictions = svm_classifier.predict(validation_features)

# Evaluate accuracy on test dataset
test_accuracy = accuracy_score(test_labels, test_predictions)
print(f'Test Accuracy: {test_accuracy}')

# Evaluate accuracy on validation dataset
validation_accuracy = accuracy_score(validation_labels, validation_predictions)
print(f'Validation Accuracy: {validation_accuracy}')





# Print precision, recall, and F1 score
test_precision = precision_score(test_labels, test_predictions)
test_recall = recall_score(test_labels, test_predictions)
test_f1 = f1_score(test_labels, test_predictions)

print(f'Test Precision: {test_precision}')
print(f'Test Recall: {test_recall}')
print(f'Test F1 Score: {test_f1}')

# Visualize confusion matrix for test dataset
plot_confusion_matrix(test_labels, test_predictions, classes=['Absent', 'Present'])

# Visualize ROC curve
fpr, tpr, _ = roc_curve(test_labels, test_predictions)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

# Visualize Precision-Recall curve
precision, recall, _ = precision_recall_curve(test_labels, test_predictions)

plt.figure()
plt.step(recall, precision, color='b', where='post')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.show()

In [None]:
#visual waldorf&statler detection - trying it the same way as with kermit, needs refinement & other methods as well - to be done!

import os
import cv2
import numpy as np
import pandas as pd
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Function to extract dominant color features from an image
def get_dominant_color(image):
    pixels = np.float32(image.reshape(-1, 3))
    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 200, 0.2)
    _, _, center = cv2.kmeans(pixels, 1, None, criteria, 10, cv2.KMEANS_RANDOM_CENTERS)
    dominant_color = center[0].astype(np.uint8)
    return dominant_color

# Function to extract HOG features from an image
def get_hog_features(image):
    features, _ = hog(image, orientations=8, pixels_per_cell=(4, 4), cells_per_block=(1, 1), visualize=True)
    return features


# Function to visualize confusion matrix
def plot_confusion_matrix(y_true, y_pred, classes):
    cm = confusion_matrix(y_true, y_pred)
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes)
    plt.yticks(tick_marks, classes)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.show()
# Loading ground truth - training data episode 02-01-01
train_csv_path = '../data/Muppets-02-01-01.csv'
train_df = pd.read_csv(train_csv_path)

# Loading ground truth - test data episode 02-04-04
test_csv_path = '../data/Muppets-02-04-04.csv'
test_df = pd.read_csv(test_csv_path)

# Loading ground truth - test data episode 03-04-03 
validation_csv_path = '../data/Muppets-03-04-03.csv'
validation_df = pd.read_csv(validation_csv_path)

# Assuming frame images are in a directory
training_frames_directory = '../data/training_frames/'
test_frames_directory = '../data/test_frames/'
validation_frames_directory = '../data/validation-frames/'

# Extract features for training dataset
train_features = []
train_labels = []
index = 0
for index, row in train_df.iterrows():
    if index % 100 != 0: # only analzying every 100th frame - time reasons
        continue
 # Split the columns and convert relevant columns to numeric
    train_df[['Video', 'Frame_number', 'Kermit', 'Pigs', 'Audio_Pigs', 'Cook', 'Audio_Cook', 'StatlerWaldorf', 'Audio_StatlerWaldorf', 'Audio_MissPiggy']] = train_df['Video;Frame_number;Kermit;Pigs;Audio_Pigs;Cook;Audio_Cook;StatlerWaldorf;Audio_StatlerWaldorf;Audio_MissPiggy'].str.split(';', expand=True)
    train_df['Frame_number'] = pd.to_numeric(train_df['Frame_number'])
    train_df['StatlerWaldorf'] = pd.to_numeric(train_df['StatlerWaldorf'])

    
    
    label = train_df[train_df['Frame_number'] == index]['StatlerWaldorf'].values
    frame_path = os.path.join(training_frames_directory, f"frame_{index}.png")
    print(frame_path)

    
    frame = cv2.imread(frame_path)

    # Access the 'StatlerWaldorf' value from the row directly
    label = train_df.at[index, 'StatlerWaldorf']
    train_labels.append(label)
    dominant_color = get_dominant_color(frame)
    hog_features = get_hog_features(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
    train_features.append(np.concatenate([dominant_color, hog_features]))
    index = index +100

# Extract features for test dataset
test_features = []
test_labels = []

index = 0

for index, row in test_df.iterrows():
    if index % 100 != 0:
        continue
    # Split the columns and convert relevant columns to numeric
    test_df[['Video', 'Frame_number', 'Kermit', 'Pigs', 'Audio_Pigs', 'Cook', 'Audio_Cook', 'StatlerWaldorf', 'Audio_StatlerWaldorf', 'Audio_MissPiggy']] = test_df['Video;Frame_number;Kermit;Pigs;Audio_Pigs;Cook;Audio_Cook;StatlerWaldorf;Audio_StatlerWaldorf;Audio_MissPiggy'].str.split(';', expand=True)
    test_df['Frame_number'] = pd.to_numeric(test_df['Frame_number'])
    test_df['StatlerWaldorf'] = pd.to_numeric(test_df['StatlerWaldorf'])

    
    label = test_df[test_df['Frame_number'] == index]['StatlerWaldorf'].values
    frame_path = os.path.join(test_frames_directory, f"frame_{index}.png")
    print(frame_path)

    
    frame = cv2.imread(frame_path)

    # Access the 'StatlerWaldorf' value from the row directly
    label = test_df.at[index, 'StatlerWaldorf']
    test_labels.append(label)
    
    dominant_color = get_dominant_color(frame)
    hog_features = get_hog_features(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
    test_features.append(np.concatenate([dominant_color, hog_features]))
    index = index +100

# Extract features for validation dataset
validation_features = []
validation_labels = []

index = 0

for index, row in validation_df.iterrows():
    if index % 100 != 0:
        continue
    validation_df[['Video', 'Frame_number', 'Kermit', 'Pigs', 'Audio_Pigs', 'Cook', 'Audio_Cook', 'StatlerWaldorf', 'Audio_StatlerWaldorf', 'Audio_MissPiggy']] = validation_df['Video;Frame_number;Kermit;Pigs;Audio_Pigs;Cook;Audio_Cook;StatlerWaldorf;Audio_StatlerWaldorf;Audio_MissPiggy'].str.split(';', expand=True)
    validation_df['Frame_number'] = pd.to_numeric(validation_df['Frame_number'])
    validation_df['StatlerWaldorf'] = pd.to_numeric(validation_df['StatlerWaldorf'])

    label = validation_df.at[index, 'StatlerWaldorf']
    frame_path = os.path.join(validation_frames_directory, f"frame_{index}.png")
    print(frame_path)

    frame = cv2.imread(frame_path)

    label = validation_df.at[index, 'StatlerWaldorf']
    validation_labels.append(label)
    dominant_color = get_dominant_color(frame)
    hog_features = get_hog_features(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
    validation_features.append(np.concatenate([dominant_color, hog_features]))
    index = index +100

# Train SVM
svm_classifier = SVC()
svm_classifier.fit(train_features, train_labels)

# Predict on test dataset
test_predictions = svm_classifier.predict(test_features)

# Predict on validation dataset
validation_predictions = svm_classifier.predict(validation_features)

# Evaluate accuracy on test dataset
test_accuracy = accuracy_score(test_labels, test_predictions)
print(f'Test Accuracy: {test_accuracy}')

# Evaluate accuracy on validation dataset
validation_accuracy = accuracy_score(validation_labels, validation_predictions)
print(f'Validation Accuracy: {validation_accuracy}')





# Print precision, recall, and F1 score
test_precision = precision_score(test_labels, test_predictions)
test_recall = recall_score(test_labels, test_predictions)
test_f1 = f1_score(test_labels, test_predictions)

print(f'Test Precision: {test_precision}')
print(f'Test Recall: {test_recall}')
print(f'Test F1 Score: {test_f1}')

# Visualize confusion matrix for test dataset
plot_confusion_matrix(test_labels, test_predictions, classes=['Absent', 'Present'])

# Visualize ROC curve
fpr, tpr, _ = roc_curve(test_labels, test_predictions)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

# Visualize Precision-Recall curve
precision, recall, _ = precision_recall_curve(test_labels, test_predictions)

plt.figure()
plt.step(recall, precision, color='b', where='post')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.ylim([0.0, 1.05])
plt.xlim([0.0, 1.0])
plt.show()

"# Detecting Kermit and Waldorf & Statler based on visual features\n",
    "\n",
    "This notebook documents our approach to detect Kermit and Waldorf and Statler based on visual feature-engineering.  \n",
    "We employ a support vector machine to predict the characters based on different visual features used for the different characters. I have been facing a lot of issues so far that need to be improved. \n",
    "\n",
    "## Time sheet for this notebook\n",
    "\n",
    "**Alina Ehart:**\n",
    "\n",
    "<table>\n",
    "<thead>\n",
    "  <tr>\n",
    "    <th>Date</th>\n",
    "    <th>Task</th>\n",
    "    <th>Hours</th>\n",
    "\n",
    "  </tr>\n",
    "</thead>\n",
    "<tbody>\n",
    "  <tr>\n",
    "    <td>6.12.2023</td>\n",
    "    <td>Setup notebook, dependencies, venv, installations, experimenting with video frame extraction - facing a lot of troubles</td>\n",
    "    <td>6</td>\n",
    "  </tr>\n",
    "  <tr>\n",
    "    <td>7.12.2023</td>\n",
    "    <td>Implementing video frame extraction further</td>\n",
    "    <td>1</td>\n",
    "\n",
    "  </tr>\n",
    "  <tr>\n",
    "    <td>8.12.2023</td>\n",
    "    <td>Implementing video frame extraction furhter, experimenting with visual feature extraction </td>\n",
    "    <td>2</td>\n",
    "  </tr>\n",
    "  <tr>\n",
    "    <td>9.12.2023</td>\n",
    "    <td>Improving video frame extraction, visual feature engineering</td>\n",
    "    <td>4</td>\n",
    "  </tr>\n",
    "  <tr>\n",
    "    <td>10.12.2023</td>\n",
    "    <td>Visual Feature Engineering (dominant colour & texture), classification (SVM) </td>\n",
    "    <td>8</td>\n",
    " 
    "</tbody>\n",
    "</table>\n"
   ]
  },

In [None]:
#fixing issues
!pip install --upgrade numpy
!pip install --upgrade scipy
!pip install --upgrade scikit-learn
!pip install --upgrade audioread
!pip install --upgrade numba
!pip install --upgrade resampy
