## Notebook for assigning official labels to pseudolabeled data in SSAL experiments

Allows you to load in predictions from a specific experiment. Predictions file contains information about model's predictions on the prediction set, including which were used as pseudolabels. This notebook finds those used as pseudolabels and looks for a label in the master labeling csv. If it finds a label, it uses it and updates the predictions file. If it doesn't find an official label, it requests one from a human, flashing the image on the screen and allowing for input. Finally, it saves the newly updated predictions file with its new human-gathered official labels and saves the master label csv.

In [1]:
import psycopg2 as pg
import pandas.io.sql as psql
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from skimage.transform import resize
from IPython.display import clear_output
import time
import sys

from psycopg2.extensions import register_adapter, AsIs

#helps postgres convert  data types without errors
def addapt_numpy_float64(numpy_float64):
    return AsIs(numpy_float64)
def addapt_numpy_int32(numpy_int32):
    return AsIs(numpy_int32)

def addapt_numpy_int64(numpy_int64):
    return AsIs(numpy_int64)
register_adapter(np.float64, addapt_numpy_float64)
register_adapter(np.int32, addapt_numpy_int32)
register_adapter(np.int64, addapt_numpy_int64)



In [2]:
#adds a new row to the labeled data csv after getting a label from humans
def update_local_label_df(df, image_name, official_label):
    
    new_row = pd.DataFrame(columns = ["image_name","citizen_label","official_label"], data = [[image_name,np.nan,official_label]])
    
    return df.append(new_row)
    
#saves the passed dataframe to a csv with the passed name    
def save_local_label_df(df, name):
    
    #try to save the dataframe as is
    try:
        df.reset_index(drop = True, inplace = True)
        df.to_csv(name)
        
        print("saving local df")
        
        return True
        
    except PermissionError:
        print("found permission error, you probably have the file open, waiting 10 seconds")
        
        return False
    
    except Exception as e:
        
        print("could not save, found exception", e)
        
        
        return False
        
#updates a dataframe with a new label        
def set_official_label_pred_csv(df, name, label):
    
    #where image_name is name, update official_label to be label
    df.loc[df["image_name"] == name, ["official_label"]] = label
    
def save_label_pred_csv(df, path):

    try:
        
        df.to_csv(path)
        
        return True
        
    except PermissionError:
        print("found permission error, you probably have the file open, waiting 10 seconds")
        
        return False
    
    except Exception as e:
        
        print("could not save, found exception", e)
        
        
        return False
    
def userInput():
    
    #get user input (0-0, save, stop)
    user_input = input()
   
    return user_input        

In [4]:
## get all images in ActiveLearning table
title_dict = {"0":"0 - Closed Forest", "1":"1 - Woodland", "2":"2 - Shrubland/Thicket", "3":"3 - Dwarf Shrubland", "4":"4 - Herbaceous Veg", "5":"5 - Barren", "6":"6 - Wetland", "7":"7 - Open Water", "8":"8 - Cultivated Land", "9":"9 - Urban", "x":"x","f":"f", "couldnt load":"couldnt load"}

#paths to csv data and images
labeled_data_path = "./data/labeled_data_globe.csv"
labeled_images = pd.read_csv(labeled_data_path, index_col = 0)

#paths to predictions
path_to_predictions = "./SSAL_outputs/GLOBE_SSAL_base_att1/GLOBE_SSAL_base_att1_0/iteration_predictions/GLOBE_SSAL_base_att1_0_predictions_2.csv"
preds_df = pd.read_csv(path_to_predictions, index_col = 0)

#finds rows used as pseudolabels, doesn't have an official label yet 
predictions_pseudolabels = preds_df[preds_df["used_as_pseudolabel"] == 1]
predictions_pseudolabels = predictions_pseudolabels[predictions_pseudolabels["official_label"].isna()]
    
if(predictions_pseudolabels.shape[0] == 0):
    
    print("already labeled")
    
    sys.exit()


#for each image, check if label already exists in labeled_data.csv
for index, row in predictions_pseudolabels.iterrows():
    
    image_name = row["image_name"]

    #if label exists, update predictions csv with label 
    if(image_name in labeled_images.image_name.values):
        
        print(image_name,"has a label")
        official_label = labeled_images[labeled_images["image_name"] == image_name].official_label.values[0]
        
        set_official_label_pred_csv(preds_df, image_name, official_label)
        
        print("official label updated from local csv")
        
    #label doesn't exist, get one from human
    else:
        
        #gets path to images
        image_dir = "./data/images/all_images/"
        image_path = image_dir + image_name
        
        #load image
        try:
    
            img = plt.imread(image_path)
            img = resize(img, (900,900))
        except:
            print("couldnt load",image_path)
            continue


        #plot image
        plt.ion()
        plt.figure(figsize = (12,8))
        plt.axis("off")

        plt.imshow(img)

        #plot key
        plt.plot([],[],label = "Assign new label \n0 - Closed Forest\n1 - Woodland\n2 - Shrubland/Thicket\n3 - Dwarf Shrubland/Thickect\n4 - Herbaceous Veg\n5 - Barren\n6 - Wetland\n7 - Open Water\n8 - Cultivated Land\n9 - Urban\n\nenter - pass", color = "white")
        plt.legend(loc = 1, bbox_to_anchor = (1.85,1.0), fontsize = 14)

        #plot name
        plt.text(400,875,image_name, color = "white", fontsize = 10)

        #show plot
        plt.show(block = False)    

        #get user input
        user_label = userInput()
        
        set_official_label_pred_csv(preds_df, image_name, user_label)
        labeled_images = update_local_label_df(labeled_images, image_name, user_label)
    
        #clear output for next image
        clear_output(wait = False)
        
        

print("done with batch, commit changes to preds csv and local csv file?")
ans = str(input())


if(ans == "yes"):
    
    #while(save_local_label_df(labeled_images, labeled_data_path) == False and save_label_pred_csv(preds_df,path_to_predictions) == False):
        
        #time.sleep(10)
        
    save_local_label_df(labeled_images, labeled_data_path)
    save_label_pred_csv(preds_df,path_to_predictions)
    
    
    
    print("saved local file and commited ")        


done with batch, commit changes to preds csv and local csv file?
yes
saving local df
saved local file and commited 
