In [1]:
import json
import pandas as pd
import numpy as np
import boto3

In [2]:
# Get list of .json files in "results" directory
s3 = boto3.client("s3")
    
bucket = "w210-poverty-mapper"
directory_path = "modeling/results/"
contents = s3.list_objects(Bucket=bucket, Prefix=directory_path)['Contents']

directory_items = []

for f in contents:
    directory_items.append(f["Key"])

result_jsons = [x for x in directory_items if ".json" in x]

In [3]:
# Create emtpy df to capture results
results_df = pd.DataFrame()

# Create counter for file ID
count = 0

# Grab results from json files
for file in result_jsons:
    
    count += 1

    response = s3.get_object(Bucket = bucket, Key = file)
    content = response['Body']
    result = json.loads(content.read())
    
    # Get dataframe with model spec
    spec = pd.DataFrame.from_records([result["model_spec_content"]])
    spec = spec[["split_name", "num_classes", "bin_method", "pretrained", "freeze_layers", "epochs", "learning_rate", 
                "gamma", "step_size", "batch_size", "num_workers"]]
 
    single_metrics_list = ["test_acc"]
    
    for field in single_metrics_list: 
        spec[field] = result[field]
    
    # Subset full result dictionary to get dictionaries of metrics
    multi_metrics_list = ["train_losses", "train_accs", "val_losses", "val_accs"]
        
    for epoch in range(result["model_spec_content"]["epochs"]): 
        spec["epoch"] = epoch
        
        # Get metrics with one record per epoch
        for field in multi_metrics_list: 
            spec[field] = result[field][epoch]

        spec["any_cm_len_invalid"] = 0

        # Process train, val confusion matrices
        for cf in ["train", "val"]:
            cf_field = "{}_cfs".format(cf)
            
            # Get number of classes 
            num_classes = result["model_spec_content"]["num_classes"]    

            cm = np.array(result[cf_field][epoch])

            # Get precision, recall, f1 for binary case
            # Note: FP and FN reversed due to order of arguements provided to 
            # skleran confusion matrix function in trainer.py & zero is positive case
            if num_classes == 2:
                #print(cm)
                if cm.shape[0] == num_classes and cm.shape[1] == num_classes:
                    #print(cm)
                    precision = cm[0,0]/(cm[0,0] + cm[0,1])
                    #print(precision)
                    recall = cm[0,0]/(cm[0,0] + cm[1,0])
                    #print(recall)
                    f1 = (2 * precision * recall) / (precision + recall)
                    #print(f1)
                    spec["{}_precision".format(cf)] = precision
                    spec["{}_recall".format(cf)] = recall
                    spec["{}_f1".format(cf)] = f1
                else:
                    spec["any_cm_len_invalid"] = 1
                    spec["{}_precision".format(cf)] = np.nan
                    spec["{}_recall".format(cf)] = np.nan
                    spec["{}_f1".format(cf)] = np.nan
            
            # Get per class accuracy for multi-class case
            else:   
                if cm.shape[0] == num_classes and cm.shape[1] == num_classes:
                    accuracies = np.array(result[cf_field][epoch]).diagonal()/np.array(result[cf_field][epoch]).sum(axis=0)
                    for acc in range(len(accuracies)): 
                        spec["{}_class_{}_accuracy".format(cf, acc)] = accuracies[acc]
                else:
                    spec["any_cm_len_invalid"] = 1
                    for acc in range(num_classes): 
                        spec["{}_class_{}_accuracy".format(cf, acc)] = np.nan
        
        # Process test confusion matrix
        # Note: FP and FN reversed due to order of arguements provided to 
        # skleran confusion matrix function in trainer.py & zero is positive case
        cf = "test"
        cf_field = "test_cf".format(cf)
        
        cm = np.array(result[cf_field])

        if num_classes == 2:
            if cm.shape[0] == num_classes and cm.shape[1] == num_classes:
                precision = cm[0,0]/(cm[0,0] + cm[0,1])
                recall = cm[0,0]/(cm[0,0] + cm[1,0])
                f1 = (2 * precision * recall) / (precision + recall)
                spec["{}_precision".format(cf)] = precision
                spec["{}_recall".format(cf)] = recall
                spec["{}_f1".format(cf)] = f1
            else:
                spec["any_cm_len_invalid"] = 1
                spec["{}_precision".format(cf)] = np.nan
                spec["{}_recall".format(cf)] = np.nan
                spec["{}_f1".format(cf)] = np.nan
                
        else: 
            if cm.shape[0] == num_classes and cm.shape[1] == num_classes:
                accuracies = np.array(cm).diagonal()/np.array(cm).sum(axis=0)
                for acc in range(len(accuracies)): 
                    spec["{}_class_{}_accuracy".format(cf, acc)] = accuracies[acc] 
            else:
                spec["any_cm_len_invalid"] = 1
                for acc in range(num_classes): 
                    spec["{}_class_{}_accuracy".format(cf, acc)] = np.nan
                
        spec["id"] = count
    
        results_df = pd.concat([results_df, spec], axis=0)



In [4]:
# Count invalid confusion matrices
sum(results_df["any_cm_len_invalid"])

483

In [5]:
# Inspect invalid confusion matrices
results_df[results_df["any_cm_len_invalid"] == 1]

Unnamed: 0,split_name,num_classes,bin_method,pretrained,freeze_layers,epochs,learning_rate,gamma,step_size,batch_size,...,id,train_precision,train_recall,train_f1,val_precision,val_recall,val_f1,test_precision,test_recall,test_f1
0,leave_one_out_tajikistan_5k_50d,2,across,resnet18,no,20,0.0010,0.1,10,448,...,287,0.143113,0.071187,0.095080,,,,0.0,,
0,leave_one_out_tajikistan_5k_50d,2,across,resnet18,no,20,0.0010,0.1,5,448,...,288,0.204713,0.128315,0.157751,,,,0.0,,
0,leave_one_out_tajikistan_5k_50d,5,across,resnet18,no,20,0.0010,0.1,10,448,...,303,,,,,,,,,
0,within_country_tajikistan_10k_50d,2,across,resnet18,no,10,0.0001,0.1,10,448,...,821,0.000000,,,,,,,,
0,within_country_tajikistan_10k_50d,2,across,resnet18,no,10,0.0001,0.1,10,448,...,821,,,,0.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,within_country_tajikistan_5k_50d,5,across,resnet18,no,20,0.0010,0.1,5,448,...,888,,,,,,,,,
0,within_country_tajikistan_5k_50d,5,across,resnet18,no,20,0.0010,0.1,5,448,...,888,,,,,,,,,
0,within_country_tajikistan_5k_50d,5,across,resnet18,no,20,0.0010,0.1,5,448,...,888,,,,,,,,,
0,within_country_tajikistan_5k_50d,5,across,resnet18,no,20,0.0010,0.1,5,448,...,888,,,,,,,,,


In [6]:
# Add split category column
results_df["split"] = results_df.apply(lambda row: "_".join(row["split_name"].split("_")[:-2]), axis=1)

def extract_split_group(string): 
    if string.startswith("leave_one_out"):
        return "leave_one_out"
    elif string.startswith("within_country"):
        return "within_country"
    else: 
        return "similar geography"

results_df["split_group"] = results_df.apply(lambda row: extract_split_group(row["split_name"]), axis=1) 

In [7]:
# Add indicator for top val accuracy by group
grouped = results_df.groupby(["split", "num_classes", "bin_method", "id"])
grouped = grouped["val_f1"].max()
grouped = grouped.reset_index()
grouped = grouped.rename(columns={"val_f1": "max_val_f1"})

def get_top_n(col, in_name, out_name, k):
    values = sorted(col[in_name].tolist(), reverse=True)
    thresh = values[min(k, len(values))-1]
    col[out_name] = col.apply(lambda row: 1 if row[in_name] >= thresh else 0, axis=1)
    return col

grouped = grouped.groupby(["split", "num_classes", "bin_method"]).apply(lambda x: get_top_n(x, "max_val_f1", "top_n_max_val_f1", 3))
joined = results_df.merge(grouped, on=["split", "num_classes", "bin_method", "id"], how="outer")

In [8]:
# Write combined results as csv
joined.to_csv("s3://w210-poverty-mapper/modeling/results/combined_results.csv", index = False)

In [9]:
# Print unique split names for invalid confusion matrices
joined[joined["any_cm_len_invalid"] == 1]["split_name"].unique()

array(['leave_one_out_tajikistan_5k_50d',
       'within_country_tajikistan_10k_50d',
       'within_country_tajikistan_5k_50d'], dtype=object)