In [2]:
import pandas as pd, numpy as np, os, math, sklearn.metrics as skm, seaborn as sns, pickle
from matplotlib import pyplot as plt

# This cell can be used to calculate statistics
# related to object detection and classification

# add some basic info necessary for calculations

with open('./useful_dicts/file_to_nearest_neighbor.pkl', 'rb') as f:
    file_to_nn = pickle.load(f)

with open('./useful_dicts/file_to_scaling.pkl', 'rb') as f:
    file_to_scaling = pickle.load(f)

def add_info(df, path_obj):
    if os.path.isfile(f'./{root}/rich_data/{path_obj.name[:-4]}_corrected.csv'):
        return pd.read_csv(f'./{root}/rich_data/{path_obj.name[:-4]}_corrected.csv')
    
    def IoU(df):
        right_i = df[['rgt', 'rpd']].min(axis=1)
        left_i = df[['lgt', 'lpd']].max(axis=1)

        right_u = df[['rgt', 'rpd']].max(axis=1)
        left_u = df[['lgt', 'lpd']].min(axis=1)
        IoU = (right_i - left_i)/(right_u - left_u)
        return IoU

    # Add in left to right rank for correlation with scalled
    # results data with original data
    df['rank'] = (df['file'] != df['file'].shift()).cumsum() - 1
    df['rank'] = df.groupby('file')['rank'].cumcount()

    # Extract predicted class
    df['pcls'] = df.iloc[:, 8:17].idxmax(axis=1)
    # Counts as a match if predicted class matches true and the data contains a GT Box and Pred Box
    df['cls'].fillna('phi', inplace=True)
    df['match'] = (df['pcls'] == df['cls'])
    
    # We are swapping out the term 'bg' for 'phi' AFTER matches picked
    bg_mask = df['pcls'] == 'bg'
    df.loc[bg_mask, 'pcls'] = 'phi'

    # Calculate IoUs
    IoU_mask = (df['gt'] == 1) & (df['pcls'] != 'phi')
    df.loc[IoU_mask, 'IoU'] = IoU(df)

    # Pcls mask
    pcls_mask = df['pcls'] != 'phi'
    df['pcls_prob'] = float('inf')
    df.loc[pcls_mask, 'pcls_prob'] = df[df.columns[8:17]].max(axis=1)

    df.to_csv(f'./{root}/rich_data/corrected_{path_obj.name[:-4]}.csv', index=False)
    return df

# calculate diffusion matrix
def confusion_matrix(df, mechs):
    # confusion matrix generated only on data with aligning GT and PD boxes
    cm = skm.confusion_matrix(df['cls'], df['pcls'], labels=mechs, normalize='true') # normalize='true'
    return cm

def plot_conf_mat(df, cm, labels, title):
    plt.subplots(figsize=(8,5))
    sns.heatmap(cm, annot=True, fmt='.2f', xticklabels=labels, yticklabels=labels, cmap='viridis')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    tp = len(df.loc[(df['match']) & (df['cls'] != 'phi')])
    fn = len(df.loc[(df['gt'] == 0) & (df['pd'] == 0)])
    fp1 = len(df.loc[(df['gt'] == 0) & (df['pd'] == 1) & (df['pcls'] != 'phi')])
    fp2 = len(df.loc[(df['gt'] == 1) & (df['pd'] == 1) & (df['match'] == False)])
    accuracy = tp / (fn + fp1 + fp2 + tp)
    plt.title(round(accuracy, 4))
    plt.savefig(title, dpi=200)
    plt.clf()
    #plt.show()

# calculates stats related to object detection results
def obj_det_stats(df, save=False, path=None, train_noise=0, test_noise=0, scan_rates=6):
    stats = {}
    stats['predicted_pos'] = len(df.loc[df['pd'] == 1]) # TP, FP
    stats['population'] = len(df.loc[df['gt'] == 1]) # total GT bounds
    stats['false_neg'] = len(df.loc[(df['gt'] == 1) & (df['pd'] == 0)]) # FN
    stats['false_pos'] = len(df.loc[(df['gt'] == 0) & (df['pd'] == 1)])
    stats['true_pos'] = len(df.loc[(df['gt'] == 1) & (df['pd'] == 1)]) # TP

    # Object Detection Stats
    stats['precision'] = stats['true_pos'] / stats['predicted_pos']
    stats['recall'] = stats['true_pos'] / stats['population']
    stats['f1'] = 2*(stats['precision']*stats['recall'])/(stats['precision'] + stats['recall'])

    if save:
        od_df = pd.DataFrame(stats.values(), index=stats.keys(), columns=[f'{train_noise}_{test_noise}'])
        od_df.to_csv(f'{path}/Object_Detection_Stats_{train_noise}_{test_noise}_{scan_rates}.csv', index=True)
    return stats

# calculates stats related to classification results 
def class_stats(df, save=False, path=None, train_noise=0, test_noise=0, scan_rates=6):
    # Notes: False positive for classes is only misclassifications, backgrounds dropped
    stats = {}
    stats['predicted_pos'] = len(df.loc[(df['pd'] == 1)]) # TP, FP
    stats['population'] = len(df.loc[df['gt'] == 1]) # total GT bounds
    stats['false_neg'] = len(df.loc[(df['gt'] == 1) & (df['pd'] == 0)]) # FN
    stats['false_pos'] = len(df.loc[(df['pd'] == 1) & (df['match'] == False)])
    stats['true_pos'] = len(df.loc[df['match']]) # Classification corrected TP
    stats['OD_matches'] = len(df.loc[(df['gt'] == 1) & (df['pd'] == 1)])

    # Classification Stats
    stats['overall_precision'] = stats['true_pos'] / stats['predicted_pos']
    stats['overall_recall'] = stats['true_pos'] / stats['population']
    stats['overall_f1'] = 2*(stats['overall_precision']*stats['overall_recall'])/(stats['overall_precision'] + stats['overall_recall'])

    def calc_f1(df, label, val):
        lcl = df.loc[df[label] == val]
        true_pos = len(lcl.loc[lcl['match']])
        pp = len(lcl.loc[(lcl['pd'] == 1)])
        pop = len(lcl.loc[lcl['gt'] == 1])
        precision = true_pos/pp
        recall = true_pos/pop
        return 2 * (precision * recall)/(precision + recall)

    # Event Count F1s
    value_counts = df['file'].value_counts()  # Count the occurrences of each value in the "file"
    count_column = df['file'].map(value_counts.get)  # Map the count of the desired value to each row
    df['count'] = count_column
    for i in range(4):
        stats[f'f1_{i+1}_event'] = calc_f1(df, 'count', i+1)

    # Neighbor Count F1s
    df['neighbors'] = 0
    df.loc[(df['count'] >= 2) & ((df['rank'] == 0) | (df['rank'] == df['count']-1)), 'neighbors'] = 1
    df.loc[(df['count'] > 1) & (df['neighbors'] == 0), 'neighbors'] = 2
    for i in range(3):
        stats[f'f1_{i}_neighbors'] = calc_f1(df, 'neighbors', i)

    # Keeping here to make sure it differs from precision
    stats['stats_accuracy'] = stats['true_pos']/(stats['predicted_pos'] + stats['false_neg'])
    stats['acc_on_OD_TPs'] = stats['true_pos']/stats['OD_matches']

    # IoU
    stats['average_IoU'] = np.mean(df.loc[df['match']].IoU)
    
    # Prediction Confidence
    stats['prediction_confidence'] = np.mean(df.loc[df['pcls_prob'] < float('inf'), 'pcls_prob'])
    stats['pred_conf_correct'] = np.mean(df.loc[(df['match']) & (df['pcls_prob'] < float('inf')), 'pcls_prob'])
    stats['pred_conf_incorrect'] = np.mean(df.loc[(df['match'] == False) & (df['pcls_prob'] < float('inf')), 'pcls_prob'])

    # Mech by mech things
    mechs = ['E','ECb','ECa','ECE','DISP','SR','T','ECP']
    prec_numerator = 0
    prec_denominator = 0
    rec_numerator = 0
    rec_denominator = 0
    for mech in mechs:
        # Calculate IoU
        f = df.loc[(df['cls'] == mech) & (df['match'])].copy()
        stats[f'IoU_{mech}'] = np.mean(f['IoU'])

        if len(f) == 0:
            stats[f'prec_{mech}'] = 0
            stats[f'rec_{mech}'] = 0
            stats[f'F1_{mech}'] = 0

        else:
            # Calculate class precisions
            f = df.loc[df['pcls'] == mech]
            hits = len(f.loc[f['cls'] == mech])
            stats[f'prec_{mech}'] = hits/len(f)
            prec_numerator += hits
            prec_denominator += len(f)

            # Calculate class recalls
            f = df.loc[df['cls'] == mech]
            hits = len(f.loc[f['pcls'] == mech])
            stats[f'rec_{mech}'] = hits/len(f) 
            rec_numerator += hits
            rec_denominator += len(f)
            
            # Calculate class f1
            if stats[f'prec_{mech}']+stats[f'rec_{mech}'] == 0:
                stats[f'F1_{mech}'] = 0
            else:
                stats[f'F1_{mech}'] = 2*(stats[f'prec_{mech}']*stats[f'rec_{mech}'])/(stats[f'prec_{mech}']+stats[f'rec_{mech}'])

    # these are to double check that overall prec and rec align
    stats['overall_prec_2'] = prec_numerator/prec_denominator
    stats['overall_rec_2'] = rec_numerator/rec_denominator

    if save:
        cls_df = pd.DataFrame(stats.values(), index=stats.keys(), columns=[f'{train_noise}_{test_noise}'])
        cls_df.to_csv(f'{path}/Overall_Stats_{train_noise}_{test_noise}_{scan_rates}.csv', index=True)

    return stats

# adds info from og metadata files to results
def add_og_info(df):
    df['key'] = df.apply(lambda x : x['file'].split('/')[-1][:-4] + str(x['rank']), axis=1)
    df['scaling'] = df['key'].map(file_to_scaling.get)
    df['closest_redox'] = df['key'].map(file_to_nn.get)
    df.drop('key', axis=1, inplace=True)
    return df

root = '230515_sr_results'

for path_obj in os.scandir(f'./{root}/data'):
    file_path = path_obj.path
    file_name = path_obj.name
    print(file_name)
    details = file_name.split('_')
    # sr = 6
    sr = details[-1][:-4]
    train_noise = 0.01
    test_noise = 0.01
    # train_noise = details[-1][:-4]
    test_noise = train_noise
    full_data = pd.read_csv(file_path)
    full_data = add_info(full_data, path_obj)
    
    # calculate object detection related information
    od_stats = obj_det_stats(full_data, save=True, path=f'./{root}/stats', train_noise=train_noise, test_noise=test_noise, scan_rates=sr)
    notdetected = full_data.loc[(full_data['gt'] == 1) & (full_data['pd'] == 0)].copy()
    notdetected.reset_index(drop=True, inplace=True)
    notdetected = add_og_info(notdetected)
    notdetected.to_csv(f'./{root}/errs/notdetected_{train_noise}_{test_noise}_{sr}.csv', index=False)

    # calculate overall (OD + classification) related information
    full_data = full_data.loc[~((full_data['gt'] == 0) & (full_data['pcls'] == 'phi'))]
    cls_stats = class_stats(full_data, save=True, path=f'./{root}/stats', train_noise=train_noise, test_noise=test_noise, scan_rates=sr)
    misclasses = full_data.loc[(full_data['gt'] == 1) & (full_data['pd'] == 1) & (full_data['pcls'] != full_data['cls'])].copy()
    misclasses = misclasses.loc[misclasses['pcls'] != 'phi']
    misclasses.reset_index(drop=True, inplace=True)
    misclasses = add_og_info(misclasses)
    misclasses.to_csv(f'./{root}/errs/misclasses_{train_noise}_{test_noise}_{sr}.csv', index=False)
    
    print(file_name + ': completed')

scan_rate_3.txt
scan_rate_3.txt: completed
scan_rate_2.txt
scan_rate_2.txt: completed
scan_rate_1.txt
scan_rate_1.txt: completed
scan_rate_5.txt
scan_rate_5.txt: completed
scan_rate_4.txt
scan_rate_4.txt: completed
scan_rate_6.txt
scan_rate_6.txt: completed


In [132]:
import os, pandas as pd, pickle

# Used for general meta data calculations on OG data

path = '/Volumes/LaCie/20230310/Reports'
file_to_neighbor = {}
file_to_scaling = {}
for obj in os.scandir(path):
   if not obj.name.startswith('info'):
      continue
   df = pd.read_csv(obj.path)
   df['rank'] = (df['File'] != df['File'].shift()).cumsum() - 1
   df['rank'] = df.groupby('File')['rank'].cumcount()

   # num events (0)    
   value_counts = df['File'].value_counts()  
   count_column = df['File'].map(value_counts.get)
   df['eventcount'] = count_column 

   # has left neighbor (1)
   df['hasleft'] = False
   left_mask = df['rank'] > 0
   df.loc[left_mask, 'hasleft'] = True

   # has right neighbor (2)
   df['hasright'] = False
   right_mask = df['rank'] < df['eventcount']-1
   df.loc[right_mask, 'hasright'] = True

   # calculate nearest left (3)
   df['leftneighbor'] = float('inf')
   df.loc[left_mask, 'leftneighbor'] = df['Merge_Left'] - df['Merge_Right'].shift(1)

   # calculate nearest right (4)
   df['rightneighbor'] = float('inf')
   df.loc[right_mask, 'rightneighbor'] = df['Merge_Left'].shift(-1) - df['Merge_Right']

   # calc global min (5)
   df['closest_redox'] = df[['leftneighbor', 'rightneighbor']].min(axis=1)
   # drop (0-4)
   df['key'] = df.apply(lambda x : x['File'] + str(x['rank']), axis=1)
   file_to_neighbor.update(df[['key', 'closest_redox']].set_index('key')['closest_redox'].to_dict())
   file_to_scaling.update(df[['key', 'Scaling']].set_index('key')['Scaling'].to_dict())

with open('file_to_nearest_neighbor.pkl', 'wb') as f:
   pickle.dump(file_to_neighbor, f)
with open('file_to_scaling.pkl', 'wb') as f:
   pickle.dump(file_to_scaling, f)

In [3]:
import os, pickle, pandas as pd, matplotlib.pyplot as plt

def plot_trends(data, xlabel, ylabel, zlabel, labels, title):
   for label in labels:
      cur = data.loc[data[zlabel] == label]
      cur = cur[[xlabel, ylabel]].set_index(xlabel)[ylabel].to_dict()
      cur = sorted(cur.items())
      x, y = zip(*cur)
      plt.plot(x, y, label=label)
   plt.title(f'{ylabel} vs. {xlabel}')
   plt.xlabel(xlabel)
   plt.ylabel(ylabel)
   plt.legend(title=zlabel)
   plt.savefig(title)   
   plt.clf()

def noise_box_plots(type, train_noise, file):
   labels = ['scaling', 'closest_redox']
   test_noises = [0.0, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0]
   for label in labels:
      datas = []
      for test_noise in test_noises:
         misclasses = pd.read_csv(f'./230505_noise_results/errs/{type}_{train_noise}_{test_noise}_6.csv')
         misclasses = misclasses.loc[misclasses[label] < float('inf')]
         datas.append(misclasses[label])

      plt.boxplot(datas, labels=test_noises, showmeans=True, meanline=True)
      plt.title(f'{type} Events versus Test Noise, Train Noise {train_noise}')
      plt.ylabel(label)
      plt.xlabel('Test Noise')
      plt.savefig(f'{file}_{train_noise}_{label}.png', dpi=200) 
      plt.clf()

def sr_box_plots(type, file):
   labels = ['scaling', 'closest_redox']
   scan_rates = [1,2,3,4,5,6]
   for label in labels:
      data = []
      for sr in scan_rates:
         misclasses = pd.read_csv(f'./230505_sr_results/errs/{type}_0.01_0.01_{sr}.csv')
         misclasses = misclasses.loc[misclasses[label] < float('inf')]
         data.append(misclasses[label])
      plt.boxplot(data, labels=scan_rates, showmeans=True, meanline=True)
      plt.title(f'{type} Events Scan Rate Count, Train Noise {train_noise}')
      plt.ylabel(label)
      plt.xlabel('Number Scan Rates')
      plt.savefig(f'{file}_{label}.png', dpi=200) 
      plt.clf()

def make_table(data, title):
   plt.rcParams["figure.figsize"] = [7.00, 3.50]
   plt.rcParams["figure.autolayout"] = True
   fig, axs = plt.subplots(1, 1)
   fig.set_figheight(10)
   axs.axis('off')
   labels = list(data.iloc[:, 0])
   vals = list(data.iloc[:, 1])
   text = [(l, v) for l, v in zip(labels, vals)]
   axs.table(cellText=text, colLabels=['Metric', 'Value'], loc='center')
   plt.savefig(title, dpi=150)
   plt.clf()

def plot_distributions(og_data, data, label, title):
   og_data = [d for d in og_data.values() if d < float('inf')]
   data = data.loc[data[label] < float('inf')]
   fig, ax1 = plt.subplots()
   ax1.hist(og_data, bins=10, color='blue', label='errs')
   ax2 = ax1.twinx()
   ax2.hist(data[label], bins=10, color='red', alpha=0.5, label='all')
   fig.legend(loc="upper right", bbox_to_anchor=(1,1), bbox_transform=ax1.transAxes)
   plt.savefig(title)
   plt.clf()

def do_general(base):
   for file in os.scandir(f'{base}/rich_data'):
      df = pd.read_csv(file.path)
      mechs = ['E','ECb','ECa','ECE','DISP','SR','T','ECP', 'phi']
      conf_mat = confusion_matrix(df, mechs)
      plot_conf_mat(df, conf_mat, mechs, f'{base}/figures/{file.name[:-4]}.png')
      df = df.loc[df['pcls'] != 'phi']
      mechs = mechs[:-1]
      conf_mat = confusion_matrix(df, mechs)
      plot_conf_mat(df, conf_mat, mechs, f'{base}/figures/{file.name[:-4]}_no_phi.png')
   for file in os.scandir(f'{base}/stats'):
      df = pd.read_csv(file.path)
      make_table(df, f'{base}/figures/{file.name[:-4]}_table.png')
   with open('./useful_dicts/file_to_nearest_neighbor.pkl', 'rb') as cr:
      all_closest_redox = pickle.load(cr)
   with open('./useful_dicts/file_to_scaling.pkl', 'rb') as sc:
      all_scalings = pickle.load(sc)
   for file in os.scandir(f'{base}/errs'):
      df = pd.read_csv(file.path)
      plot_distributions(all_scalings, df, 'scaling', f'{base}/figures/{file.name[:-4]}_scaling.png')
      plot_distributions(all_closest_redox, df, 'closest_redox', f'{base}/figures/{file.name[:-4]}_closest_redox.png')

def do_noise(base):
   od_vals = []
   od_columns = []
   cls_vals = []
   cls_columns = []
   for file in os.scandir(f'{base}/stats'):
      df = pd.read_csv(file.path)
      details = file.name.split('_')
      train_noise = float(details[-3])
      test_noise = float(details[-2])
      srs = int(details[-1][:-4])
      if file.name.startswith('Object'):
         df = pd.read_csv(file.path)
         if len(od_columns) == 0:
            od_columns = df.iloc[:, 0].to_list()
            od_columns.extend(['train_noise', 'test_noise', 'scan_rates'])
         vals = list(df.iloc[:, 1])
         vals.extend([train_noise, test_noise, srs])
         od_vals.append(vals)
      else:
         df = pd.read_csv(file.path)
         if len(cls_columns) == 0:
            cls_columns = df.iloc[:, 0].to_list()
            cls_columns.extend(['train_noise', 'test_noise', 'scan_rates'])
         vals = list(df.iloc[:, 1])
         vals.extend([train_noise, test_noise, srs])
         cls_vals.append(vals)
   od_df = pd.DataFrame(od_vals, columns=od_columns)
   cls_df = pd.DataFrame(cls_vals, columns=cls_columns)
   od_metrics = ['false_neg', 'false_pos', 'f1']
   train_noises = [0.0, 0.01, 0.05, 0.1, 0.2]
   for metric in od_metrics:
      title = f'{base}/figures/{metric}_od.png'
      plot_trends(od_df, 'test_noise', metric, 'train_noise', train_noises, title)
   cls_metrics = ['overall_f1', 'average_IoU', 'acc_on_OD_TPs', 'prediction_confidence']
   for metric in cls_metrics:
      title = f'{base}/figures/{metric}_cls.png'
      plot_trends(cls_df, 'test_noise', metric, 'train_noise', train_noises, title)
   
   types = ['misclasses', 'notdetected']
   train_noises = [0.01, 0.05]
   for type in types:
      for train_noise in train_noises:
         title = f'{base}/figures/{type}'
         noise_box_plots(type, train_noise, title)

def do_sr(base):
   od_vals = []
   od_columns = []
   cls_vals = []
   cls_columns = []
   for file in os.scandir(f'{base}/stats'):
      df = pd.read_csv(file.path)
      details = file.name.split('_')
      train_noise = 0.01
      test_noise = 0.01
      srs = int(details[-1][:-4])
      if file.name.startswith('Object'):
         df = pd.read_csv(file.path)
         if len(od_columns) == 0:
            od_columns = df.iloc[:, 0].to_list()
            od_columns.extend(['train_noise', 'test_noise', 'scan_rates'])
         vals = list(df.iloc[:, 1])
         vals.extend([train_noise, test_noise, srs])
         od_vals.append(vals)
      else:
         df = pd.read_csv(file.path)
         if len(cls_columns) == 0:
            cls_columns = df.iloc[:, 0].to_list()
            cls_columns.extend(['train_noise', 'test_noise', 'scan_rates'])
         vals = list(df.iloc[:, 1])
         vals.extend([train_noise, test_noise, srs])
         cls_vals.append(vals)
   od_df = pd.DataFrame(od_vals, columns=od_columns)
   cls_df = pd.DataFrame(cls_vals, columns=cls_columns)
   od_metrics = ['f1', 'precision', 'recall']
   for metric in od_metrics:
      title = f'{base}/figures/{metric}_od.png'
      plot_trends(od_df, 'scan_rates', metric, 'train_noise', [0.01], title)
   cls_metrics = ['overall_f1', 'average_IoU', 'prediction_confidence', 'false_neg', 'false_pos']
   for metric in cls_metrics:
      title = f'{base}/figures/{metric}_cls.png'
      plot_trends(cls_df, 'scan_rates', metric, 'train_noise', [0.01], title)

   sr_box_plots('notdetected', f'{base}/figures/notdetected')
   sr_box_plots('misclasses', f'{base}/figures/misclasses')

for obj in os.scandir(r'./'):
   if not obj.name.startswith('23'):
      continue

   base = obj.path
   if not os.path.isdir(f'{base}/figures'):
      os.mkdir(f'{base}/figures')
   
   if 'general' in obj.name:
      pass
      # do_general(base)

   if 'noise' in obj.name:
      pass
      # do_noise(base)
      
   if 'sr' in obj.name:
      do_sr(base)
      # pass

<Figure size 432x288 with 0 Axes>

In [11]:
# for path_obj in os.scandir('.'):
#     if not path_obj.name.endswith('csv'):
#         continue
#     file_path = path_obj.path
#     file_name = path_obj.name
#     details = file_name.split('_')
#     train_noise = details[2]
#     test_noise = details[3][:-4]
#     print(file_name)
#     if os.path.isfile(f'./230505_noise_results/noise_errs/notdetected_{train_noise}_{test_noise}.csv'):
#         continue
#     full_data = pd.read_csv(file_path)
#     full_data = add_info(full_data, path_obj)
#     od_stats = obj_det_stats(full_data)
#     cls_stats = class_stats(full_data, file_name)
#     od_df = pd.DataFrame(od_stats.values(), index=od_stats.keys(), columns=[f'{train_noise}_{test_noise}'])
#     od_df.to_csv(f'./230505_noise_results/noise_stats/Object_Detection_Stats_{train_noise}_{test_noise}.csv', index=True)
#     cls_df = pd.DataFrame(cls_stats.values(), index=cls_stats.keys(), columns=[f'{train_noise}_{test_noise}'])
#     cls_df.to_csv(f'./230505_noise_results/noise_stats/Overall_Stats_{train_noise}_{test_noise}.csv', index=True)
#     with open('file_to_meta.pkl', 'rb') as f:
#         file_to_meta = pickle.load(f) 
#     # note: background dropped from full_data at this point
#     misclasses = full_data.loc[(full_data['gt'] == 1) & (full_data['pd'] == 1) & (full_data['pcls'] != full_data['cls'])].copy()
#     misclasses.reset_index(drop=True, inplace=True)
#     notdetected = full_data.loc[(full_data['gt'] == 1) & (full_data['pd'] == 0)].copy()
#     notdetected.reset_index(drop=True, inplace=True)
#     misclasses = add_og_info(misclasses)
#     notdetected = add_og_info(notdetected)
#     notdetected.to_csv(f'./230505_noise_results/noise_errs/notdetected_{train_noise}_{test_noise}.csv', index=False)
#     misclasses.to_csv(f'./230505_noise_results/noise_errs/misclasses_{train_noise}_{test_noise}.csv', index=False)
#     print(file_name + ': completed')

# noise = 0.1
# big_df = pd.DataFrame()
# for obj in os.scandir('./230505_noise_results/rich_data'):
    # details = obj.name.split('_')
    # train_noise = details[2]
    # test_noise = details[3]
    # if float(train_noise) != noise or float(test_noise) > noise:
    #     continue
    # data = pd.read_csv(obj.path)
    # data.loc[data['gt'] == 0, 'cls'] = 'phi'
    # data.loc[data['pcls'] == 'FN', 'pcls'] = 'phi'
    # data['noise'] = test_noise
    # big_df = pd.concat([big_df, data])
# conf_mat, labels = confusion_matrix(big_df)
# plot_conf_mat(big_df, conf_mat, labels, f'{noise}_results_confmat.png')
import pandas as pd, matplotlib.pyplot as plt, pickle, numpy as np, os
import matplotlib.patches as mpatches
df = pd.read_csv('./230510_general_results/data/result_noise_final_0.01.txt')
files = list(df['file'])
indexes = {5:1, 7:2, 9:3, 11:4}
all_files = [set() for _ in range(4)]
for file in files:
    count = len(file.split('_'))
    all_files[indexes[count]-1].add(file)

samples = []
for subset in all_files:
    samples.append([subset.pop() for _ in range(15)])

data_path = '/Volumes/LaCie/20230310/Generated_Data'
results = pd.read_csv('./230510_general_results/rich_data/result_noise_final_0.01_corrected.csv')
for count, lst in enumerate(samples):
    if count < 3:
        continue
    # if not os.path.isdir(f'../Graphs_w_Predictions/{count+1}'):
    #     os.mkdir(f'../Graphs_w_Predictions/{count+1}')
    if not os.path.isdir(f'../Graphs_w_Predictions/onlygt'):
        os.mkdir(f'../Graphs_w_Predictions/onlygt')
    for file in lst:
        print(file)
        with open(f'{data_path}/{file}', 'rb') as f:
            raw_data = pickle.load(f)

        max_V = raw_data.loc[:, 'V'].max(axis=0)
        shrink_factor = 1000/max_V
        raw_data['V'] = shrink_factor*raw_data['V']
        len_0 = len(results.loc[results['file'] == file])
        cur = results.loc[(results['file'] == file) & (results['cls'] != 'phi') & (results['pcls'] != 'phi')]
        len_1 = len(cur)
        if (len(cur) == 0):
            continue
        cur.reset_index(inplace=True, drop=True)
        noise_mag = set(cur['noise_mag']).pop()
        noise = noise_mag*np.random.randn(len(raw_data))
        raw_data['A'] += noise

        colors = ['#344499' ,'#3A54A1', '#4161AA', '#4474B6', '#4A9AD2', '#42C2EE']
        for j, v in enumerate(raw_data['v'].unique()):
            t = raw_data.loc[raw_data['v'] == v]
            plt.plot(t['V'], t['A'], c=colors[j], linewidth=3)


        max_height = raw_data.loc[:, 'A'].max(axis=0)
        min_height = raw_data.loc[:, 'A'].min(axis=0)
        overall = max_height-min_height

        title_parts = []
        for i in range(len(cur)):
            lcl_chunk = f'{cur.loc[i, "cls"]}, {cur.loc[i, "pcls"]} {round(100*cur.loc[i, "pcls_prob"], 1)}' 
            title_parts.append(lcl_chunk)
            lgt, rgt = cur.loc[i, 'lgt'], cur.loc[i, 'rgt'] 
            lpd, rpd = cur.loc[i, 'lpd'], cur.loc[i, 'rpd'] 
            rect=mpatches.Rectangle((lgt,min_height+0.02*overall),(rgt-lgt),0.96*overall, 
                            fill = False,
                            color = "maroon",
                            linewidth = 2,
                            linestyle='dashed',
                            zorder=i+10)
            plt.gca().add_patch(rect)
            rect=mpatches.Rectangle((lpd,min_height+0.02*overall),(rpd-lpd),0.96*overall, 
                fill = False,
                linestyle='dashed',
                color = "red",
                linewidth = 2,
                zorder=i+10)
            plt.gca().add_patch(rect)

        title_parts.append(f'{len_0}_{len_1}')
        title = "–".join(title_parts)
        plt.title(title,fontdict={'size':10})
        img_name = file.split('/')[-1][:-4]
        plt.savefig(f'../Graphs_w_Predictions/onlygt/{img_name}.png', transparent=True, dpi=600)
        #plt.show()
        plt.clf()
results

20230310_30000/EC_2035_E_1403_EC_1759_T_50650_153329960_data
20230310_80000/SR_2941_E_513_SR_521_EC_2074_223909189_data
20230310_15000/EC_442_EC_419_E_10120_T_5003_133735326_data
20230310_30000/ECE_3327_E_9177_DISP_812_SR_537_155544437_data
20230310_20000/T_3252_E_8136_SR_4608_E_9365_143106806_data
20230310_5000/ECP_9917_EC_1046_E_3047_E_2538_120647220_data
20230310_70000/E_3004_EC_2331_CE_5281_E_7867_212224192_data
20230310_25000/EC_2299_DISP_1374_E_4303_SR_4492_150917804_data
20230310_35000/E_11855_DISP_12500_ECE_12870_DISP_9902_160629568_data
20230310_70000/E_5695_EC_3029_SR_4284_DISP_5352_212832161_data
20230310_5000/SR_3613_ECE_6115_EC_1380_EC_1029_12060265_data
20230310_75000/DISP_10781_E_12871_EC_3445_SR_270_214233823_data
20230310_20000/ECP_51705_E_6807_DISP_8510_E_1820_141959582_data
20230310_10000/EC_2106_ECE_5470_SR_250_EC_2879_131825487_data
20230310_5000/E_1470_SR_4909_ECE_10555_EC_3161_122048441_data


Unnamed: 0,file,gt,lgt,rgt,cls,pd,lpd,rpd,bg,E,...,ECP,DISP,SR,T,noise_mag,rank,pcls,match,IoU,pcls_prob
0,20230310_10000/ECP_5181_SR_2388_E_11102_124834...,1,39.0,178.0,ECP,1,38.860275,176.480865,0.001,0.000,...,0.996,0.000,0.000,0.000,0.006779,0,ECP,True,0.988078,0.996
1,20230310_10000/ECP_5181_SR_2388_E_11102_124834...,1,333.0,476.0,SR,1,331.668121,475.545807,0.002,0.001,...,0.001,0.001,0.994,0.001,0.006779,1,SR,True,0.987625,0.994
2,20230310_10000/ECP_5181_SR_2388_E_11102_124834...,1,668.0,883.0,E,1,667.826843,882.961609,0.000,0.992,...,0.000,0.001,0.001,0.001,0.006779,2,E,True,0.999017,0.992
3,20230310_5000/ECP_3213_E_5131_CE_1612_12073566...,1,19.0,135.0,ECP,1,19.239174,136.436768,0.002,0.000,...,0.985,0.001,0.001,0.001,0.005818,0,ECP,True,0.985729,0.985
4,20230310_5000/ECP_3213_E_5131_CE_1612_12073566...,1,276.0,484.0,E,1,265.285461,473.615540,0.002,0.991,...,0.000,0.001,0.001,0.001,0.005818,1,E,True,0.903532,0.991
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21246,20230310_45000/ECE_6013_E_3579_EC_1198_SR_3177...,1,526.0,672.0,ECb,1,521.102600,671.911560,0.090,0.012,...,0.003,0.140,0.002,0.003,0.006441,2,ECb,True,0.966959,0.747
21247,20230310_45000/ECE_6013_E_3579_EC_1198_SR_3177...,1,745.0,841.0,SR,1,745.793579,841.146362,0.001,0.001,...,0.001,0.001,0.994,0.001,0.006441,3,SR,True,0.990224,0.994
21248,20230310_80000/ECE_7036_SR_1306_DISP_5902_2216...,1,168.0,549.0,ECE,1,163.029099,552.245300,0.000,0.001,...,0.000,0.001,0.000,0.001,0.007737,0,ECE,True,0.978890,0.995
21249,20230310_80000/ECE_7036_SR_1306_DISP_5902_2216...,1,632.0,685.0,SR,1,632.024719,684.597107,0.001,0.001,...,0.000,0.000,0.998,0.000,0.007737,1,SR,True,0.991932,0.998


<Figure size 432x288 with 0 Axes>

In [37]:
import pickle, matplotlib.pyplot as plt, numpy.random as rand
with open('/Volumes/LaCie/20230310/Generated_Data/20230310_20000/ECP_51705_E_6807_DISP_8510_E_1820_141959582_data', 'rb') as f:
    data = pickle.load(f)

noise_levels = [0.0, 0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1.0]
fig, ax = plt.subplots(4,2)
for i, lvl in enumerate(noise_levels):
    row = i // 2
    col = i % 2
    noise = lvl*rand.randn(len(data))
    cur = data.copy()
    cur['A'] += noise
    ax[row, col].scatter(cur['V'], cur['A'], c=cur['v'], s=0.01)
    ax[row, col].set_xticks([])
    ax[row, col].set_yticks([])
    ax[row, col].set_title(f'Noise Level: {lvl}')
fig.tight_layout(pad=0)
fig.set_figheight(8)
plt.savefig(f'./noise_examples/ECP_51705_E_6807_DISP_8510_E_1820_141959582_data_all.png', dpi=300)
plt.clf()

<Figure size 432x576 with 0 Axes>