# 1 - Discre experiments

## 1.1 - Small data

In [2]:
import pandas as pd
import json
import statistics as stats
from os import listdir
import numpy as np

methods = [
    "MEAN", 
    "MICE", 
    "GLFM", 
    "HIVAE", 
    "VBSEM", 
    "glsl_EMPTY"
          ]
max_percentage = 6
n_runs = 5
directories = ["../../missing_results/discrete/"]
accuracy_error_dfs = []
data_names = [
        "hiv_test", 
        "hayes_roth",
        "balance_scale", 
        "car_evaluation",
        "nursery", 
        "breast_cancer", 
        "web_phishing",
        "solar_flare",
        "zoo",
        "vote", 
        "spect_heart",
         "alarm"
]


# Iterate through the missing percentage values, and for each dataset, recover the methods' results
for i in range(1, max_percentage):
    miss_percentage_string = "0" + str(i)

    df_error_results = pd.DataFrame()

    for directory in directories:

        for data_name in data_names:
            error_results = {"dataset": data_name}
            for method_name in methods:
                json_name = data_name + "_" + miss_percentage_string + "_results_" + method_name + ".json"
                full_path = directory + data_name + "/" + json_name
                try:
                    with open(full_path) as json_file:
                        json_data = json.load(json_file)
                        runs_data = json_data["runs"]
                        errors = []
                        learning_times = []
                        for j in range(1, n_runs + 1):
                            errors.append(1.0 - runs_data["run_" + str(j)]["accuracy"]) 

                        avg_error = stats.mean(errors)
                        #stdev_error = stats.stdev(errors)
                        error_results[method_name] = avg_error
                
                except Exception as e:
                    #print(e)
                    print("["+method_name+"]: " + full_path)

            df_error_results = df_error_results.append(error_results, ignore_index=True)

    accuracy_error_dfs.append(df_error_results)

## 1.2 - Large data

In [3]:
import pandas as pd
import json
import statistics as stats
from os import listdir
import numpy as np

methods = [
    "MEAN", 
    "MICE", 
    "GLFM", 
    "HIVAE", 
    "VBSEM",
#     "glsl_EMPTY"
          ]
max_percentage = 6
n_runs = 5
directories = ["../../missing_results/discrete/"]
accuracy_error_dfs = []
data_names = [
        "coil_42", 
        "news_100",
        "webkb_336"
]


# Iterate through the missing percentage values, and for each dataset, recover the methods' results
for i in range(1, max_percentage):
    miss_percentage_string = "0" + str(i)

    df_error_results = pd.DataFrame()

    for directory in directories:

        for data_name in data_names:
            error_results = {"dataset": data_name}
            for method_name in methods:
                json_name = data_name + "_" + miss_percentage_string + "_results_" + method_name + ".json"
                full_path = directory + data_name + "/" + json_name
                try:
                    with open(full_path) as json_file:
                        json_data = json.load(json_file)
                        runs_data = json_data["runs"]
                        errors = []
                        learning_times = []
                        for j in range(1, n_runs + 1):
                            errors.append(1.0 - runs_data["run_" + str(j)]["accuracy"]) 

                        avg_error = stats.mean(errors)
                        #stdev_error = stats.stdev(errors)

                        error_results[method_name] = avg_error
                
                except Exception as e:
                    #print(e)
                    print("["+method_name+"]: " + full_path)

            df_error_results = df_error_results.append(error_results, ignore_index=True)

    accuracy_error_dfs.append(df_error_results)

# 2 - Continuous experiments

## 2.1 - Small data

In [4]:
import pandas as pd
import json
import statistics as stats
from os import listdir
import numpy as np

methods = [
    "MEAN", 
    "MICE", 
    "GLFM", 
    "HIVAE", 
    "VBSEM", 
    "glsl_EMPTY"
          ]
max_percentage = 6
n_runs = 5
directories = ["../../missing_results/continuous/"]
accuracy_error_dfs = []
data_names = [
        "real_state_valuation", 
        "buddymove", 
        "qsar_fish_toxicity", 
        "qsar_aqua_toxicity", 
        "ilpd",
        "alcohol",
        "travel_reviews",
        "wine_quality_white", 
        "wine", 
        "leaf", 
        "nba", 
        "wdbc", 
]


# Iterate through the missing percentage values, and for each dataset, recover the methods' results
for i in range(1, max_percentage):
    miss_percentage_string = "0" + str(i)

    df_error_results = pd.DataFrame()

    for directory in directories:

        for data_name in data_names:
            error_results = {"dataset": data_name}
            for method_name in methods:
                json_name = data_name + "_" + miss_percentage_string + "_results_" + method_name + ".json"
                full_path = directory + data_name + "/" + json_name
                try:
                    with open(full_path) as json_file:
                        json_data = json.load(json_file)
                        runs_data = json_data["runs"]
                        errors = []
                        learning_times = []
                        for i in range(1, n_runs + 1):
                            #i=1
                            errors.append(1.0 - runs_data["run_" + str(i)]["nrmse"]) 

                            avg_error = stats.mean(errors)
                            #stdev_error = stats.stdev(errors)

                        error_results[method_name] = avg_error
                
                except Exception as e:
                    #print(e)
                    print("["+method_name+"]: " + full_path)

            df_error_results = df_error_results.append(error_results, ignore_index=True)

    accuracy_error_dfs.append(df_error_results)

## 2.2 - Large data

In [5]:
import pandas as pd
import json
import statistics as stats
from os import listdir
import numpy as np

methods = [
    "MEAN", 
    "MICE", 
    "GLFM", 
    "HIVAE", 
    "VBSEM",
#     "glsl_EMPTY"
]
max_percentage = 6
n_runs = 5
directories = ["../../missing_results/continuous/"]
accuracy_error_dfs = []
data_names = [
    "waveform", 
    "100_plants", 
    "geo_music"
]


# Iterate through the missing percentage values, and for each dataset, recover the methods' results
for i in range(1, max_percentage):
    miss_percentage_string = "0" + str(i)

    df_error_results = pd.DataFrame()

    for directory in directories:

        for data_name in data_names:
            error_results = {"dataset": data_name}
            for method_name in methods:
                json_name = data_name + "_" + miss_percentage_string + "_results_" + method_name + ".json"
                full_path = directory + data_name + "/" + json_name
                try:
                    with open(full_path) as json_file:
                        json_data = json.load(json_file)
                        runs_data = json_data["runs"]
                        errors = []
                        learning_times = []
                        for i in range(1, n_runs + 1):
                            errors.append(1.0 - runs_data["run_" + str(i)]["nrmse"]) 

                        avg_error = stats.mean(errors)
                        #stdev_error = stats.stdev(errors)

                        error_results[method_name] = avg_error
                
                except Exception as e:
                    #print(e)
                    print("["+method_name+"]: " + full_path)

            df_error_results = df_error_results.append(error_results, ignore_index=True)

    accuracy_error_dfs.append(df_error_results)

[HIVAE]: ../../missing_results/continuous/geo_music/geo_music_01_results_HIVAE.json
[HIVAE]: ../../missing_results/continuous/geo_music/geo_music_02_results_HIVAE.json
[HIVAE]: ../../missing_results/continuous/geo_music/geo_music_03_results_HIVAE.json
[HIVAE]: ../../missing_results/continuous/geo_music/geo_music_04_results_HIVAE.json
[HIVAE]: ../../missing_results/continuous/geo_music/geo_music_05_results_HIVAE.json


# 3 - Mixed experiments

## 3.1 - Small data

In [6]:
import pandas as pd
import json
import statistics as stats
from os import listdir
import numpy as np

methods = [
    "MEAN", 
    "MICE", 
    "GLFM", 
    "HIVAE", 
    "VBSEM", 
    "glsl_EMPTY"
          ]
max_percentage = 6
n_runs = 5
directories = ["../../missing_results/mixed/"]
accuracy_error_dfs = []
data_names = [
        "haberman", 
        "iris", 
        "user_knowledge",
        "vertebral", 
        "ecoli", 
        "planning_relax", 
        "thoracic_surgery",
        "vehicle", 
        "thyroid",
        "parkinsons", 
        "autos", 
        "ionosphere"
]


# Iterate through the missing percentage values, and for each dataset, recover the methods' results
for i in range(1, max_percentage):
    miss_percentage_string = "0" + str(i)

    df_error_results = pd.DataFrame()

    for directory in directories:

        for data_name in data_names:
            error_results = {"dataset": data_name}
            for method_name in methods:
                json_name = data_name + "_" + miss_percentage_string + "_results_" + method_name + ".json"
                full_path = directory + data_name + "/" + json_name
                try:
                    with open(full_path) as json_file:
                        json_data = json.load(json_file)
                        runs_data = json_data["runs"]
                        errors = []
                        learning_times = []
                        for j in range(1, n_runs + 1):
                            errors.append(1.0 - runs_data["run_" + str(j)]["average_error"]) 

                        avg_error = stats.mean(errors)
                        #stdev_error = stats.stdev(errors)

                        error_results[method_name] = avg_error
                
                except Exception as e:
                    #print(e)
                    print("["+method_name+"]: " + full_path)

            df_error_results = df_error_results.append(error_results, ignore_index=True)

    accuracy_error_dfs.append(df_error_results)

## 3.2 - Large data

In [7]:
import pandas as pd
import json
import statistics as stats
from os import listdir
import numpy as np

methods = [
    "MEAN", 
    "MICE", 
    "GLFM", 
    "HIVAE", 
    "VBSEM",
#     "glsl_EMPTY"    
]
max_percentage = 6
n_runs = 5
directories = ["../../missing_results/mixed/"]
accuracy_error_dfs = []
data_names = [
    "qsar_biodeg",
    "housing_prices",
    "census_india"
]


# Iterate through the missing percentage values, and for each dataset, recover the methods' results
for i in range(1, max_percentage):
    miss_percentage_string = "0" + str(i)

    df_error_results = pd.DataFrame()

    for directory in directories:

        for data_name in data_names:
            error_results = {"dataset": data_name}
            for method_name in methods:
                json_name = data_name + "_" + miss_percentage_string + "_results_" + method_name + ".json"
                full_path = directory + data_name + "/" + json_name
                try:
                    with open(full_path) as json_file:
                        json_data = json.load(json_file)
                        runs_data = json_data["runs"]
                        errors = []
                        learning_times = []
                        for j in range(1, n_runs + 1):
                            errors.append(1.0 - runs_data["run_" + str(j)]["average_error"]) 

                        avg_error = stats.mean(errors)
                        #stdev_error = stats.stdev(errors)

                        error_results[method_name] = avg_error
                
                except Exception as e:
                    #print(e)
                    print("["+method_name+"]: " + full_path)

            df_error_results = df_error_results.append(error_results, ignore_index=True)

    accuracy_error_dfs.append(df_error_results)