## Random Policy

This notebook contains parts to analyse errors of detections performed with a random policy.

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from math import sqrt, ceil, floor
%matplotlib inline
from sklearn.metrics import mean_absolute_error, mean_squared_error
from icecream import ic

In [3]:
import pandas as pd
print(pd.__version__)

1.3.5


#### 0- Function to calculate errors: Mean Absolute Error, Mean Squarred error, Difference, and Absolute difference between two series.

In [4]:
def calculate_metrics(df_ground : pd.DataFrame, df_calculated : pd.DataFrame, estimated_column_name: str):
    
    true_values = df_ground      
    calculated_values = df_calculated

    true_values = true_values.set_index('timestamp')
    calculated_values = calculated_values.set_index('timestamp')

    true_values = true_values.sort_index()
    calculated_values = calculated_values.sort_index()

    # Visualize data
    #plt.figure(figsize=(20, 10), dpi=80)
    #plt.scatter(true_values.index, true_values["count"], color="blue", label="original")
    #plt.plot(calculated_values.index, calculated_values[estimated_column_name], color="red", label="predicted")
    #plt.title("True Vs Calculated")
    #plt.legend()
    #plt.show() 
    
    #ic(true_values["count"])
    #ic(calculated_values[estimated_column_name])

    
    ae_all = abs(true_values["count"] - calculated_values[estimated_column_name])
    difference_all = true_values["count"] - calculated_values[estimated_column_name]
    mae = mean_absolute_error(true_values, calculated_values)
    mse = mean_squared_error(true_values, calculated_values)

    return {
        "mae": mae,
        "mse": mse,
        "all_absolute-errors": ae_all,
        "all_difference": difference_all
    }

In [85]:
def percentage(part, whole):
    percentage = 100 * float(part)/float(whole)
    return float(percentage) 


### Error Analysis and Generate Plots

#### 1- Main function

In [114]:
## Code for plots of all policy percentages and cameras.
import seaborn as sn
#from pandas_ml import ConfusionMatrix
from pycm import *
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import cohen_kappa_score
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D

# Parameters

cams = ['jervskogen_1', 'jervskogen_2', 'nilsbyen_2', 'nilsbyen_3', 'skistua']
policy_percentages = [ 100, 80, 60, 40, 20]
image_num_grid_tiles = 64
MONTHS = ['_2021-12', '_2022-01', '_2022-02', '_2022-03']     # 2022-01-11       # For all images it could be empty with underscore '_' or for month '_2022-01' 
all_months = '' # To store months
number_of_months_to_include = 4
DRAW_SUBFIGURES = False

# Set dates
start_date = '2021-12-01' # This is included, '2021-12-01'
end_date = '2022-04-01'   # This is excluded, 2022-03-01'


# Set file paths 
database_path = '../data/datasets/'    
exp_name_folder_random = 'Experiment_operational_random/'
exp_name_folder_learned = 'Experiment_operational_egreedy_with_alpha_0.5/'

# Add all months and make a string
for i in range(number_of_months_to_include):  
    all_months +=  str(MONTHS[i])

# Initialisation
data_ground = dict() # To store df of ground data
data_policy_random = dict() # To store df of policy
data_policy_learned = dict() # To store df of policy
data_policy_temp_random = dict()
data_policy_temp_learned = dict()

# For errors
mean_dae_cams_policy_random = dict() # To store mean of daily average absolute error for all policies of all cameras 



mean_dae_cams_policy_learned = dict() # To store mean of daily average absolute error for all policies of all cameras 



# Iterate over all cameras/devices
for cam in cams:
    data_ground[cam] = pd.read_csv(database_path + 'local/all/'+ cam + '_all_timestamps_count' + '.csv', parse_dates=['timestamp'])
    data_ground[cam] = data_ground[cam][(data_ground[cam]['timestamp'] >= start_date) & (data_ground[cam]['timestamp'] < end_date)]
    data_ground[cam] = data_ground[cam][['timestamp', 'count']]
    
    total_person_detected_ground = data_ground[cam]["count"].sum()
     #ic(data_ground[cam].describe())
   
    # For error Random
    mae_policy_random = dict()
    mse_policy_random = dict()
    dae_policy_random = dict()
    
    # For error Learned
    mae_policy_learned = dict()
    mse_policy_learned = dict()
    dae_policy_learned = dict()
    
    # Iterate over all percentages (100, 80, 60, 40, 20)
    for policy_percentage in policy_percentages:
            
            ## Random
            # Make path string to get read from policy csv file
            path_string_timestamps_random = ''
            path_string_timestamps_random = exp_name_folder_random + '/' + cam + '_all_timestamps_count_' + '_policy_' + str(policy_percentage) + '_tiles_' + str(image_num_grid_tiles) +  "_MONTHS" + all_months
            
            # Read from csv file
            data_policy_random[policy_percentage] = pd.read_csv('../data/datasets/local/' + path_string_timestamps_random + '.csv', parse_dates=['timestamp'])   
            
            # Select samples from required period
            data_policy_random[policy_percentage] = data_policy_random[policy_percentage][(data_policy_random[policy_percentage]['timestamp'] >= start_date ) & (data_policy_random[policy_percentage]['timestamp'] < end_date)]
            
            # Count total persons detected via policy
            total_person_detected_policy_random = data_policy_random[policy_percentage]["count"].sum()
            
            policy_column_name = "count"
            data_policy_temp_random[policy_percentage] = data_policy_random[policy_percentage][['timestamp',  policy_column_name]]
            df_random = data_policy_temp_random[policy_percentage]
            df_random_difference = data_ground[cam]["count"] - df_random["count"]
            #ic(df_random_difference)
            
                        #---------------------------------------------------#

            
            ## Learned
            # Make path string to get read from policy csv file
            path_string_timestamps_learned = ''
            path_string_timestamps_learned = exp_name_folder_learned + '/' + cam + '_all_timestamps_count_' + '_policy_' + str(policy_percentage) + '_tiles_' + str(image_num_grid_tiles) +  "_MONTHS" + all_months
            
            # Read from csv file
            data_policy_learned[policy_percentage] = pd.read_csv('../data/datasets/local/' + path_string_timestamps_learned + '.csv', parse_dates=['timestamp'])   
            
            # Select samples from required period
            data_policy_learned[policy_percentage] = data_policy_learned[policy_percentage][(data_policy_learned[policy_percentage]['timestamp'] >= start_date ) & (data_policy_learned[policy_percentage]['timestamp'] < end_date)]
            
            # Count total persons detected via policy
            total_person_detected_policy_learned = data_policy_learned[policy_percentage]["count"].sum()
            
            policy_column_name = "count"
            data_policy_temp_learned[policy_percentage] = data_policy_learned[policy_percentage][['timestamp',  policy_column_name]]
            df_learned = data_policy_temp_learned[policy_percentage]
            df_learned_difference = data_ground[cam]["count"] - df_learned["count"]

            
            #########################-------------------------#############################
            # Here we calculate the difference of person detected from all samples
            print ("\nCam = {} and Percentage = {}".format(cam, policy_percentage))
            print ("Total person detected Ground: ", total_person_detected_ground)
            
            # Random
            
            tp_random_missed = total_person_detected_ground - total_person_detected_policy_random
            #print ("Difference b/w Ground and Random: ", (tp_random_missed))
            tp_random_missed_percent =  percentage(tp_random_missed, total_person_detected_ground)
            #print ("Random missed percentage: ", (tp_random_missed_percent))
            
            # Count Samples
            print ("Random\nTotal person detected Random with errors: " , total_person_detected_policy_random)
            random_total_samples = df_random["count"].count()
            print ("Total Samples: {}".format(random_total_samples))

            #--With Samples Count--#
            # Underestimated
            random_indices_underestimated = np.where(df_random_difference > 0)
            random_underestimated_person_count = df_random_difference[random_indices_underestimated[0]].count()
            print ("Count/nUnderestimated count: {} and Percentage: {} ".format(random_underestimated_person_count,(random_underestimated_person_count/ random_total_samples)*100))
            
            # Overestimated/False Positives
            random_indices_overestimated = np.where(df_random_difference < 0)
            random_overestimated_person_count = abs(df_random_difference[random_indices_overestimated[0]].count())
            print ("Overestimated/False positives count: {} and Percentage: {} ".format(random_overestimated_person_count,(random_overestimated_person_count/ random_total_samples)*100))
            
            # Correct
            random_indices_correct = np.where(df_random_difference == 0)
            random_correct_person_count = abs(df_random_difference[random_indices_correct[0]].count())
            print ("Correct count: {} and Percentage: {} ".format(random_correct_person_count,(random_correct_person_count/ random_total_samples)*100))
            
            
             #--With Person Sum--#
            # Underestimated
            random_indices_underestimated = np.where(df_random_difference > 0)
            random_underestimated_person_sum = df_random_difference[random_indices_underestimated[0]].sum()
            print ("Sum/nUnderestimated sum: {} and Percentage: {} ".format(random_underestimated_person_sum,(random_underestimated_person_sum/ total_person_detected_ground)*100))
            
            # Overestimated/False Positives
            random_indices_overestimated = np.where(df_random_difference < 0)
            random_overestimated_person_sum = abs(df_random_difference[random_indices_overestimated[0]].sum())
            print ("Overestimated/False positives sum: {} and Percentage: {} ".format(random_overestimated_person_sum,(random_overestimated_person_sum/ total_person_detected_ground)*100))
            
            # Correct
            random_correct_detections = total_person_detected_policy_random - random_overestimated_person_sum
            print ("Correct sum: {} and Percentage: {} ".format(random_correct_detections,(random_correct_detections/ total_person_detected_ground)*100))

            

            
            #random_correct_detections = total_person_detected_policy_random - random_overestimated_person_count
            #print ("Correct detections: {} and Percentage: {}".format(random_correct_detections, (random_correct_detections/total_person_detected_ground)*100))
            
            
            # To store value and later make plot
            dae_policy_random[policy_percentage] = tp_random_missed_percent

                        #---------------------------------------------------#
            
            # Learned
            print ("Learned\nTotal person detected Learned with errors: ", total_person_detected_policy_learned)
            tp_learned_missed = total_person_detected_ground - total_person_detected_policy_learned            
            print ("Difference b/w Ground and Learned: ", (total_person_detected_ground - total_person_detected_policy_learned))
            tp_learned_missed_percent =  percentage(tp_learned_missed, total_person_detected_ground)
            print ("Learned missed percentage: ", (tp_learned_missed_percent))
            
            # To store value and later make plot
            dae_policy_learned[policy_percentage] = tp_learned_missed_percent
            
            # Difference random with learned
            #difference_random_learned = tp_random_missed - tp_learned_missed
            #difference_random_learned_percentage = percentage(difference_random_learned, tp_random_missed)
            #print("Missed difference random - learned ", difference_random_learned)
            #print("Missed Percentage difference random - learned ", difference_random_learned_percentage)

            
            
                        
            #########################-------------------------#############################
            # From here onwards we compare the ground truth with the policies (random and learned)
            
            # We consider samples where we detect the persons in the ground truth
            df_ground_wo = data_ground[cam].loc[data_ground[cam]['count'] != 0]
            
            
            # Then, we get only those timestamps from random policy array
            df_random_wo = df_random[df_random['timestamp'].isin(df_ground_wo["timestamp"].tolist())]
            df_difference_error_random =  df_ground_wo["count"] -  df_random_wo["count"]
            #ic(df_difference_error_random.mean())
            # Store result mean of error difference
            #dae_policy_random[policy_percentage] = df_difference_error_random.mean()
            #  Store result cohen kappa without zeros
            #dae_policy_random[policy_percentage] = cohen_kappa_score(df_ground_wo["count"], df_random_wo["count"])
            #  Store result cohen kappa without zeros
            #dae_policy_random[policy_percentage] = cohen_kappa_score(data_ground[cam]["count"], df_random["count"])
            
            # Next, we get only those timestamps from learned policy array
            df_learned_wo = df_learned[df_learned['timestamp'].isin(df_ground_wo["timestamp"].tolist())]
            df_difference_error_learned =  df_ground_wo["count"] -  df_learned_wo["count"]
            #ic(df_difference_error_learned.mean())
            ## Store result
            #dae_policy_learned[policy_percentage] = df_difference_error_learned.mean()
            ##  Store result cohen kappa without zeros
           # dae_policy_learned[policy_percentage] = cohen_kappa_score(df_ground_wo["count"], df_learned_wo["count"])
            ##  Store result cohen kappa with zeros
            #dae_policy_learned[policy_percentage] = cohen_kappa_score(data_ground[cam]["count"], df_learned["count"])
            
            #########################-------------------------#############################

            # Calculate MAE and MSE of policy with respect to ground data
            #results = calculate_metrics(data_ground[cam], data_policy_temp[policy_percentage], policy_column_name )
            #mae_policy[policy_percentage] = results["mae"]
            #mse_policy[policy_percentage] = results["mse"]
            
            ## Next calculate other statistics
            # Confusion Matrix
            #confusion_matrix = pd.crosstab(data_ground[cam][policy_column_name], data_policy_temp[policy_percentage][policy_column_name], rownames=['Actual'], colnames=['Predicted'], margins = True)
            #sn.heatmap(confusion_matrix, annot=True)
            #plt.show()
            
            #print(classification_report(data_ground[cam][policy_column_name], data_policy_temp[policy_percentage][policy_column_name]))
            #print(cam + "_" + str(policy_percentage))
            #print(cohen_kappa_score(data_ground[cam][policy_column_name], data_policy_temp[policy_percentage][policy_column_name]))

            #Confusion_Matrix = ConfusionMatrix(data_ground[cam][policy_column_name].to_numpy() , data_policy_temp[policy_percentage][policy_column_name].to_numpy() )
            #ic(Confusion_Matrix.overall_stat)

            # Errors
            #ae_all = results["all_absolute-errors"]
            #difference_all = results["all_difference"]
            #ae_all_exploitation = results_exploitation["all_absolute-errors"]
            #difference_all_exploitation = results_exploitation["all_difference"]
            #ic(difference_all.index[103])
            #ic(np.where(difference_all < 0))
            #ic(difference_all.describe())
            #for index, value in enumerate(difference_all):
             #   print (difference_all[index], value)


            # Absolute errors, group by date 
            #df_error_policy = pd.DataFrame({'timestamp':ae_all.index, 'errors':ae_all.values})
            #df_error_policy = df_error_policy.groupby(by=df_error_policy['timestamp'].dt.date).agg({"errors":"mean", "timestamp":"count"})
            #dae_policy[policy_percentage] =   df_error_policy["errors"].mean()
           


    
    cam_markers = {
        "jervskogen_1": "^",
        "jervskogen_2": "v",
        "nilsbyen_2":   "*",
        "nilsbyen_3":   "8",
        "skistua":      ".",
        "ronningen_1":  "S"
    }
    cam_lines = {
        "jervskogen_1": "dashed",
        "jervskogen_2": "solid",
        "nilsbyen_2":   "dashed",
        "nilsbyen_3":   "solid",
        "skistua":      "solid",
        "ronningen_1":  "S"
    }
    
    # Here we store all percentages error mean in dict
    mean_dae_cams_policy_random[cam] = dae_policy_random.values()
    mean_dae_cams_policy_learned[cam] = dae_policy_learned.values()

  
   
# Mean of errors 
plt.close()
x = np.arange(len(dae_policy_random))
for cam in cams:    
    plt.ylabel ('Missed no. of persons (%)') #Mean of Errors (Persons)
    plt.xlabel ('Percentage of tiles send (%)')
    random = plt.plot(x, mean_dae_cams_policy_random[cam],  color = "black", marker = cam_markers[cam],linestyle= "dashed", linewidth=1, alpha = 0.8, mfc='none', label = cam + "R")
    learned = plt.plot(x, mean_dae_cams_policy_learned[cam],  color = "black", marker = cam_markers[cam],linestyle= "solid", linewidth=1, alpha = 0.8, mfc='none', label = cam)

    plt.xticks (ticks = x, labels = dae_policy_learned.keys(), rotation = 0)

#plt.title("All Cams")
#_ = plt.legend()

# manually define a new patch 
#patch1 = mpatches.Patch(linestyle= "solid",linewidth=1.0, facecolor='none', fill=False, label='Random Policy')
#patch2 = mpatches.Patch(linestyle= "dashed", label='Learned Policy')


patch1 = Line2D([0], [0], marker='o', color='w', label='Circle',
                        markerfacecolor='r', markersize=15)

#patch2 = Line2D([0], [0], linestyle = "solid", linewidth=1.0, label='Random Policy' )
#handles=patch1
custom_policy = [Line2D([0], [0],  linestyle = "dashed",color="black",  lw=1),
                Line2D([0], [0],  linestyle = "solid", color="black", lw=1)]
#linestyle = "none"
custom_cams = [Line2D([0], [0],  marker = cam_markers["jervskogen_1"],  mfc='none', color="black"),
               Line2D([0], [0],  marker = cam_markers["jervskogen_2"], mfc='none', color="black"),
               Line2D([0], [0],  marker = cam_markers["nilsbyen_2"], mfc='none', color="black"),
               Line2D([0], [0],  marker = cam_markers["nilsbyen_3"], mfc='none', color="black"),
               Line2D([0], [0],  marker = cam_markers["skistua"], mfc='none', color="black")]

leg1 = plt.legend(custom_policy, ['Random', 'Learned'], ncol=1, loc='upper center',title= "Policy",  fancybox=False, shadow=False, frameon=False, bbox_to_anchor=(0.44, 1)) #(0.87, 1) cohen kappa
leg2 = plt.legend(custom_cams, ['Jervskogen 1', 'Jervskogen 2', 'Nilsbyen 2', 'Nilsbyen 3', 'Skistua'], title= "Locations", loc='upper right', fancybox=False, shadow=False, frameon=False, bbox_to_anchor=(0.33, 1)) #(0.33, 0.5) cohen kappa
# Manually add the first legend back
plt.gca().add_artist(leg1)
plt.savefig(database_path + 'local/' + exp_name_folder_learned + '/' + 'figures/' 'all_cams_difference_results' + '.pdf', dpi=300, format='pdf', bbox_inches='tight')
plt.show()    





Cam = jervskogen_1 and Percentage = 100
Total person detected Ground:  4202
Random
Total person detected Random with errors:  4202
Total Samples: 13597
Underestimated count: 0 and Percentage: 0.0 
Overestimated/False positives count: 0 and Percentage: 0.0 
Correct count: 13597 and Percentage: 100.0 
Underestimated sum: 0 and Percentage: 0.0 
Overestimated/False positives sum: 0 and Percentage: 0.0 
Correct sum: 4202 and Percentage: 100.0 
Learned
Total person detected Learned with errors:  4202
Difference b/w Ground and Learned:  0
Learned missed percentage:  0.0

Cam = jervskogen_1 and Percentage = 80
Total person detected Ground:  4202
Random
Total person detected Random with errors:  3015
Total Samples: 13597
Underestimated count: 1164 and Percentage: 8.560711921747444 
Overestimated/False positives count: 208 and Percentage: 1.529749209384423 
Correct count: 12225 and Percentage: 89.90953886886814 
Underestimated sum: 1434 and Percentage: 34.12660637791528 
Overestimated/False pos

KeyboardInterrupt: 