In [None]:
# %load C:\Users\sachu\Desktop\My_Documents\Thesis\Base_model\Existing_model\gesture_classification\run_classification.py
from argparse import ArgumentParser
import logging

from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.strategies import DDPStrategy
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks import LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger

from gesture_classification.datasets import SnippetClassificationLightningDataset
from gesture_classification.model import LitModel
from gesture_classification.helpers import (
    get_num_frames, get_subsample_rate,
    get_accelerator, parse_use_keypoints
)
from gesture_classification.constants import SEED

logging.getLogger("lightning").setLevel(logging.WARNING)




In [None]:
import re
import time
dataset_home = r'/beegfs/.global0/ws/sapo684c-sac_space/Ellen_Dataset_with_Optical_Flow_Masking_Final/ellen_show_length_1300_sample_rate_1_iou_0.55'
logger_name = "gesture_classification"
logger_folder = r'/beegfs/ws/0/sapo684c-sac_space/Gesture_Classification/Logger_File'
batch_size = 1
model_name = "videomae"
pretrained_model = "MCG-NJU/videomae-base-finetuned-ssv2"
#zero_normalisation=True
nodes = 1
gpus = 1
epochs = 2
num_workers = 16 * gpus
accumulate_batches = 8
learning_rate = 1e-5
save_top_k = 1
precision = 16
use_keypoints = 0
seed_everything(SEED, workers=True)
subsample_rate = get_subsample_rate(dataset_home)
num_frames = get_num_frames(dataset_home, subsample_rate)
accelerator = get_accelerator()
dm = SnippetClassificationLightningDataset(
    dataset_home, 
    batch_size, 
    num_workers, 
    subsample_rate, 
    num_frames,
    #zero_normalisation,
    use_keypoints, 
    )
model = LitModel(
    model_name, pretrained_model, num_frames, learning_rate, use_keypoints
    )
model.save_hyperparameters()
checkpoint_f1 = ModelCheckpoint(
    save_top_k=save_top_k, mode="max", monitor="val_f1",
    filename="checkpoint-{epoch:02d}-{val_f1:.2f}"
    )
checkpoint_acc = ModelCheckpoint(
    save_top_k=save_top_k, mode="max", monitor="val_acc",
    filename="checkpoint-{epoch:02d}-{val_acc:.2f}"
    )
lr_monitor = LearningRateMonitor(logging_interval="step")

checkpoint_path = r'/beegfs/ws/0/sapo684c-sac_space/Gesture_Classification/Logger_File/gesture_classification/version_162/checkpoints/checkpoint-epoch=135-val_acc=0.79.ckpt'
loaded_model = model.load_from_checkpoint(checkpoint_path,num_frames =num_frames)
match = re.search(r'version_(\d+)', checkpoint_path)
version_string=match.group(0)

#logger = TensorBoardLogger(
 #   name=logger_name,
  #  save_dir=logger_folder)
trainer = Trainer(
    accelerator=accelerator,
    devices=gpus,
    num_nodes=nodes,
    max_epochs=epochs,
    strategy='dp',
    precision=precision,
    enable_progress_bar=False,
    callbacks=[checkpoint_f1, checkpoint_acc, lr_monitor],
    #logger=logger,
    accumulate_grad_batches=accumulate_batches,
    )
#test_results = trainer.test(loaded_model, dm)
start_time= time.time()
test_results = trainer.test(loaded_model, dm.train_dataloader())
end_time = time.time()
total_time =end_time-start_time
print(f"Total Time: {total_time:.2f} seconds")
"""
parser = ArgumentParser()
parser.add_argument("--dataset-home", type=str, default =r'/beegfs/ws/0/sapo684c-sac_space/ellen_show_dataset_trial_OF/ellen_show_length_1300_sample_rate_8_iou_0.55')
parser.add_argument("--logger-name", type=str, default="gesture_classification")
parser.add_argument("--logger-folder", type=str, default=r'/beegfs/ws/0/sapo684c-sac_space/Gesture_Classification/Logger_File')
parser.add_argument("--pretrained-model", type=str, default="")
parser.add_argument("--pretrained-dataset", type=str, default="ssv2")
parser.add_argument("--batch-size", type=int, default=2)
parser.add_argument("--precision", type=int, default=16)
parser.add_argument("--save-top-k", type=int, default=1)
parser.add_argument("--learning-rate", type=float, default=1e-4)
parser.add_argument("--nodes", type=int, default=1)
parser.add_argument("--gpus", type=int, default=1)
parser.add_argument("--workers-per-gpu", type=int, default=16)
parser.add_argument("--accumulate-batches", type=int, default=8)
parser.add_argument("--epochs", type=int,default=2)
parser.add_argument("--use-keypoints", default=0)
parser.add_argument("--model-name", type=str,default="videomae")
args = parser.parse_args()

if __name__ == "__main__":
    main(args)
"""
    

In [3]:
predictions =loaded_model.predictions_new

In [4]:
ground_truth =loaded_model.ground

In [5]:
filenames=loaded_model.filenames

In [6]:
print(filenames)

[['000000000_2014-11-29_939-1223.npz'], ['000000001_2014-11-29_939-1223.npz'], ['000000002_2014-11-29_939-1223.npz'], ['000000003_2014-11-29_939-1223.npz'], ['000000004_2014-11-29_939-1223.npz'], ['000000005_2014-11-29_939-1223.npz'], ['000000006_2014-11-29_939-1223.npz'], ['000000007_2014-11-29_939-1223.npz'], ['000000008_2014-11-29_939-1223.npz'], ['000000009_2014-11-29_939-1223.npz'], ['000000010_2014-11-29_939-1223.npz'], ['000000011_2014-11-29_939-1223.npz'], ['000000012_2014-11-29_939-1223.npz'], ['000000013_2014-11-29_939-1223.npz'], ['000000014_2014-11-29_939-1223.npz'], ['000000015_2014-11-29_939-1223.npz'], ['000000016_2014-11-29_939-1223.npz'], ['000000017_2014-11-29_939-1223.npz'], ['000000018_2014-11-29_939-1223.npz'], ['000000019_2014-11-29_939-1223.npz'], ['000000020_2014-11-29_939-1223.npz'], ['000000021_2014-11-29_939-1223.npz'], ['000000022_2014-11-29_939-1223.npz'], ['000000023_2014-11-29_939-1223.npz'], ['000000024_2014-11-29_939-1223.npz'], ['000000025_2014-11-29_9

In [7]:
flattened_list = [filename for sublist in filenames for filename in sublist]

In [8]:
flattened_ground_list = [filename for sublist in ground_truth for filename in sublist]

In [None]:
flattened_ground_list

In [10]:
flattened_prediction_list = [filename for sublist in predictions for filename in sublist]

In [None]:
flattened_prediction_list

In [12]:
print(flattened_list)

['000000000_2014-11-29_939-1223.npz', '000000001_2014-11-29_939-1223.npz', '000000002_2014-11-29_939-1223.npz', '000000003_2014-11-29_939-1223.npz', '000000004_2014-11-29_939-1223.npz', '000000005_2014-11-29_939-1223.npz', '000000006_2014-11-29_939-1223.npz', '000000007_2014-11-29_939-1223.npz', '000000008_2014-11-29_939-1223.npz', '000000009_2014-11-29_939-1223.npz', '000000010_2014-11-29_939-1223.npz', '000000011_2014-11-29_939-1223.npz', '000000012_2014-11-29_939-1223.npz', '000000013_2014-11-29_939-1223.npz', '000000014_2014-11-29_939-1223.npz', '000000015_2014-11-29_939-1223.npz', '000000016_2014-11-29_939-1223.npz', '000000017_2014-11-29_939-1223.npz', '000000018_2014-11-29_939-1223.npz', '000000019_2014-11-29_939-1223.npz', '000000020_2014-11-29_939-1223.npz', '000000021_2014-11-29_939-1223.npz', '000000022_2014-11-29_939-1223.npz', '000000023_2014-11-29_939-1223.npz', '000000024_2014-11-29_939-1223.npz', '000000025_2014-11-29_939-1223.npz', '000000026_2014-11-29_939-1223.npz', 

# Writing CSV for ELAN software

In [13]:
import pandas as pd

In [None]:

import os
previous_file_group = None
output=r'/beegfs/.global0/ws/sapo684c-sac_space/ELAN_Mapping_prediction/Train_dataset'
output_dir= os.path.join(output,version_string)

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

for filename, ground, prediction in zip(flattened_list, flattened_ground_list, flattened_prediction_list):
    file_group = filename.split('_')[1:]
    file_group = '_'.join(file_group)
    file_group = file_group.split('.')[0]
    
    if file_group != previous_file_group: 
        if previous_file_group is not None:
           
            output_filename = f'{previous_file_group}.csv'
            df = pd.DataFrame(data, columns=['Filename', 'Ground Truth', 'Predictions', 'Start Time', 'End Time'])
            out1=os.path.join(output_dir,output_filename)
            df.to_csv(out1, index=False)
            print(f"CSV file '{output_filename}' generated successfully.")
        
        data = []
        start_time = 0
        end_time = 1300
        previous_file_group = file_group
    
    data.append([filename, ground, prediction, start_time, end_time])
    start_time = end_time
    end_time += 1300


if previous_file_group is not None:
    output_filename = f'{previous_file_group}.csv'
    df = pd.DataFrame(data, columns=['Filename', 'Ground Truth', 'Predictions', 'Start Time', 'End Time'])
    out2=os.path.join(output_dir,output_filename)
    df.to_csv(out2, index=False)
    print(f"CSV file '{output_filename}' generated successfully.")

# Writing CSV for ELAN software with taking time from filename

In [14]:
import pandas as pd

In [None]:
import os
previous_file_group = None
output=r'/beegfs/.global0/ws/sapo684c-sac_space/ELAN_Mapping_prediction'
output_dir= os.path.join(output,version_string)

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

for filename, ground, prediction in zip(flattened_list, flattened_ground_list, flattened_prediction_list):
    file_group = filename.split('_')[1:3]
    file_group = '_'.join(file_group)
    file_group = file_group.split('.')[0]
    start_time = filename.split('_')[3]
    end_time = filename.split('_')[4].split('.')[0]
    #print(file_group)
    
    if file_group != previous_file_group: 
        if previous_file_group is not None:
            #print('PREvious_File_group',previous_file_group)
           
            output_filename = f'{previous_file_group}.csv'
            df = pd.DataFrame(data, columns=['Filename', 'Ground Truth', 'Predictions', 'Start Time', 'End Time'])
            out1=os.path.join(output_dir,output_filename)
            df.to_csv(out1, index=False)
            print(f"CSV file '{output_filename}' generated successfully.")
        
        data = []
        
        previous_file_group = file_group
        #print(start_time,end_time)
    
    data.append([filename, ground, prediction, start_time, end_time])
    #start_time = end_time
    #end_time += 1300


if previous_file_group is not None:
    output_filename = f'{previous_file_group}.csv'
    df = pd.DataFrame(data, columns=['Filename', 'Ground Truth', 'Predictions', 'Start Time', 'End Time'])
    out2=os.path.join(output_dir,output_filename)
    df.to_csv(out2, index=False)
    print(f"CSV file '{output_filename}' generated successfully.")

# Precision_Recall Curve

In [13]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve, auc

In [14]:
y_true=flattened_ground_list

In [15]:
y_scores=flattened_prediction_list

In [16]:
precision, recall, thresholds = precision_recall_curve(y_true, y_scores)
area = auc(recall, precision)

In [None]:
print(f"AUC value: {area:.2f}")

In [None]:
plt.figure(figsize=(8, 6))
plt.plot(recall, precision, label=f"Masking:-(AUC = {area:.2f})")
#plt.plot(recall, precision, marker='.')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.grid(True)
plt.legend(loc="best")
plt.show()

# ROC curve

In [19]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from sklearn.model_selection import train_test_split

In [20]:
fpr, tpr, _ = roc_curve(y_true, y_scores)
roc_auc = auc(fpr, tpr)

In [None]:
plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.show()

In [22]:
import csv


In [None]:
'''fp="/beegfs/.global0/ws/sapo684c-sac_space/ELAN_Mapping_prediction/prec_rec_v106_train.csv"
data = list(zip(y_true,y_scores))
with open(fp,mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['GT-106','PT-106'])
    writer.writerows(data)'''

In [None]:
csv_filename=r"/beegfs/.global0/ws/sapo684c-sac_space/ELAN_Mapping_prediction/prec_rec_1300_train_msec.csv"
data=[]
with open(csv_filename, 'r') as csvfile:
    csv_reader = csv.reader(csvfile)
    header = next(csv_reader)
    for row in csv_reader:
        data.append(row)
print(data)
new_header = header + ["GT-v106", "PT-v106"]
combined_data = []
for existing_row, val1, val2 in zip(data, y_true, y_scores):
    combined_data.append(existing_row + [val1, val2])



with open(csv_filename, 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerow(new_header)
    csv_writer.writerows(combined_data)

print(f"CSV file '{csv_filename}' updated successfully.")
    


In [None]:
 # Replace with your actual y_scores data

csv_filename=r"/beegfs/.global0/ws/sapo684c-sac_space/ELAN_Mapping_prediction/prec_rec_250_without_NA_layer_msec.csv"

# Create a list of lists with custom headers
new_data = [["GT-v230", "PT-v230"]]  # Headers for y_true and y_scores
new_data += [[yt, ys] for yt, ys in zip(y_true, y_scores)]

# Write data with custom headers to a new CSV file
with open(csv_filename, 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)
    csv_writer.writerows(new_data)

print(f"CSV file '{csv_filename}' created successfully.")

# Precision-Recall curve All together

In [24]:
len(y_scores)

1656

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve, auc

# Load the CSV file using pandas
csv_filename = "/beegfs/.global0/ws/sapo684c-sac_space/ELAN_Mapping_prediction/prec_rec_250_msec.csv"  # Update with your file name
df = pd.read_csv(csv_filename)

# List of versions
version_labels = {'v228': '3-OF', 'v225': '3-VC', 'v237': '6-channel'}
#version_labels = {'v104': '3-OF', 'v98': '3-VC', 'v106': '6-channel'}
versions= [106,104,98]

# Iterate through each version
for version in version_labels.keys():
    gt_column = f"GT-{version}"
    pt_column = f"PT-{version}"

    gt_values = df[gt_column]
    pt_values = df[pt_column]

    precision, recall, _ = precision_recall_curve(gt_values, pt_values)
    area = auc(recall, precision)

    plt.plot(recall, precision, label=f"{version_labels[version]} (AUC = {area:.2f})")

# Customize the plot
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curves for Different Versions")
plt.legend(loc="best")
plt.grid(True)

plt.xlim(0, 1.02)  # Set x-axis limits
plt.ylim(0.3, 1.02)

plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc

# Load the CSV file using pandas
csv_filename = "/beegfs/.global0/ws/sapo684c-sac_space/ELAN_Mapping_prediction/prec_rec_250_msec.csv"  # Update with your file name
df = pd.read_csv(csv_filename)

# Define version-label mappings
version_labels = {'v228': '3-OF', 'v225': '3-VC', 'v237': '6-channel'}

# Iterate through each version
for version in version_labels.keys():
    gt_column = f"GT-{version}"
    pt_column = f"PT-{version}"

    gt_values = df[gt_column]
    pt_values = df[pt_column]

    fpr, tpr, _ = roc_curve(gt_values, pt_values)
    area = auc(fpr, tpr)

    plt.plot(fpr, tpr, label=f"{version_labels[version]} (AUC = {area:.2f})")

# Customize the plot
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curves for Different Versions")
plt.legend(loc="best")
plt.grid(True)
plt.show()


# Probab Distribution

In [None]:
import matplotlib.pyplot as plt

In [None]:
print(len(flattened_prediction_list))

In [None]:
neg_list=[]
pos_list=[]

In [None]:
for elem in flattened_prediction_list:
    if elem >= 0.5:
        pos_list.append(elem)
    else:
        neg_list.append(elem)

In [None]:
print(len(pos_list))

In [None]:
print(len(neg_list))

In [None]:
plt.hist(neg_list, bins=10, histtype= 'bar')
plt.hist(pos_list, bins=10)

In [None]:
plt.hist(flattened_prediction_list, bins=10)

In [None]:
plt.hist(flattened_ground_list, bins=10)

In [None]:
flattened_ground_list

In [None]:
flattened_prediction_list

# Correct Prob Distribution

In [22]:
import matplotlib.pyplot as plt

In [None]:
for val1, val2,val3 in zip(flattened_ground_list,flattened_prediction_list,flattened_list):
    print(val1,val2,val3)

In [24]:
neg=[]
pos=[]
for elem1, elem2 in zip(flattened_ground_list,flattened_prediction_list):
    if elem1==0.0:
        neg.append(elem2)
    else:
        pos.append(elem2)
      
        

In [None]:
print(len(neg))

In [None]:
print(len(pos))

In [None]:
plt.hist(neg, bins=40, histtype= 'bar', alpha =0.5, label='non-gesture')
plt.hist(pos, bins=40, histtype= 'bar',alpha =0.5,label='gesture')
plt.xlabel("predictions")
plt.title('Distribution of predictions')
plt.ylabel("frequency")
plt.legend()
plt.ylim(0,1500)
plt.show()

In [28]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
data1=neg
data2=pos
sns.set(style="whitegrid")
sns.kdeplot(data1, shade=True, label="Non gesture",bw_adjust=0.4)
sns.kdeplot(data2, shade=True, label="gesture",bw_adjust=0.4)
plt.xlabel("predictions")
plt.ylabel("frequency")
plt.title("Prediction analysis")
plt.legend(loc='upper center')
plt.ylim(0,10)
plt.show()


# Analysis of FN, FP

Writing all prediction for all snippets

In [None]:
full=[]
for val1, val2,val3 in zip(flattened_ground_list,flattened_prediction_list,flattened_list):
    dist={'GT':np.float(val1),'PT':np.float(val2),'File':val3}
    full.append(dist)     
        
newfp=r'/beegfs/.global0/ws/sapo684c-sac_space/Testing_from_models/False_positive_negative_analysis/version98.json'

import json
with open(newfp,'w') as file:
    json.dump(full,file)

Writing prediction for FP, FN

In [None]:
FN=[]
FP=[]
for val1, val2,val3 in zip(flattened_ground_list,flattened_prediction_list,flattened_list):
    if val1==0 and val2>=0.5:
        dist={'GT':np.float(val1),'PT':np.float(val2),'File':val3}
        FP.append(dist)
    if val1==1 and val2<0.5:
        dist={'GT':np.float(val1),'PT':np.float(val2),'File':val3}
        FN.append(dist)

In [None]:
wrong=FP+FN

In [None]:
newfp=r'/beegfs/.global0/ws/sapo684c-sac_space/Testing_from_models/False_positive_negative_analysis/version98_FP_FN.json'

In [None]:
import json
with open(newfp,'w') as file:
    json.dump(wrong,file)

In [None]:

with open(newfp, 'r') as file:
    loaded_data = json.load(file)

print(loaded_data)