In [2]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import r2_score
from sklearn.linear_model import LinearRegression
from numpy import genfromtxt
import os

In [3]:
#FOLDS with best results: 
#SM:
#income: FOLD_SM_LATLONG_RIDGE_RANDOM_14Jul23_2058_income/3/
#population: FOLD_SM_RIDGE_RANDOM_30May23_2233_population
#literacy: FOLD_SM_RIDGE_RANDOM_30May23_2206_literacy
#longevity: FOLD_SM_RIDGE_RANDOM_30May23_2219_longevity


#SV:
#income: FOLD_SV_GROUPED_RIDGE_RANDOM_14Jul23_1827_income
#population:
#literacy:
#longevity:


#ST:
#income: FOLD_SAT_GRP_RIDGE_RANDOM_31Jul23_1307_income
#population: FOLD_SAT_GRP_RIDGE_RANDOM_04Aug23_1455_population
#literacy: FOLD_SAT_GRP_RIDGE_RANDOM_04Aug23_1457_literacy
#longevity: FOLD_SAT_GRP_RIDGE_RANDOM_04Aug23_1459_longevity


#SM-SV-ST:
#income: FOLD_SV_SM_SAT_AGG_RIDGE_RANDOM_21Aug23_0113_income/4/
#population: FOLD_SV_SM_SAT_AGG_RIDGE_RANDOM_14Aug23_2039_population
#literacy: FOLD_SV_SM_SAT_AGG_RIDGE_RANDOM_14Aug23_2039_literacy
#longevity: FOLD_SV_SM_SAT_AGG_RIDGE_RANDOM_14Aug23_2040_longevity

In [4]:
experiments = [
    {
        'name': 'SM',
        'income': 'FOLD_SM_LATLONG_RIDGE_RANDOM_14Jul23_2058_income/3/',
        'population': 'FOLD_SM_BALANCE_RIDGE_RANDOM_28Aug23_2020_population/3',
        'literacy': 'FOLD_SM_BALANCE_RIDGE_RANDOM_28Aug23_2020_literacy/0',
        'longevity': 'FOLD_SM_BALANCE_RIDGE_RANDOM_28Aug23_2021_longevity/3',
    },
    {
        'name': 'SV',
        'income': 'FOLD_SV_AGG_500_RIDGE_RANDOM_28Aug23_2211_income/0',
        'population': 'FOLD_SV_AGG_500_RIDGE_RANDOM_29Aug23_0022_population/2',
        'literacy': 'FOLD_SV_AGG_500_RIDGE_RANDOM_28Aug23_2319_literacy/2',
        'longevity': 'FOLD_SV_AGG_500_RIDGE_RANDOM_29Aug23_0127_longevity/4',
    },
    {
        'name': 'ST',
        'income': 'FOLD_SAT_GRP_RIDGE_RANDOM_31Jul23_1307_income/0/',
        'population': 'FOLD_SAT_GRP_RIDGE_RANDOM_04Aug23_1455_population/0',
        'literacy': 'FOLD_SAT_GRP_RIDGE_RANDOM_04Aug23_1457_literacy/2',
        'longevity': 'FOLD_SAT_GRP_RIDGE_RANDOM_04Aug23_1459_longevity/1',
    },
    {
        'name': 'ST-SM-SV',
        'income': 'FOLD_SV_SM_SAT_AGG_RIDGE_RANDOM_21Aug23_0113_income/4/',
        'population': 'FOLD_SV_SM_SAT_AGG_550_RIDGE_RANDOM_28Aug23_2051_population/4',
        'literacy': 'FOLD_SV_SM_SAT_AGG_550_RIDGE_RANDOM_28Aug23_2051_literacy/3',
        'longevity': 'FOLD_SV_SM_SAT_AGG_550_RIDGE_RANDOM_28Aug23_2052_longevity/3',
    },
    
]

In [5]:
indicators = ['income', 'population', 'literacy', 'longevity']


In [6]:
output_folder = "./plots/"

In [7]:
# Create output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

In [8]:
def read_data_and_generate_graph(experiment, indicator):
    y_test_path = f"./{experiment[indicator]}/real.csv"
    y_predicted_path = f"./{experiment[indicator]}/pred.csv"

    y_test = genfromtxt(y_test_path, delimiter=',')
    y_predicted = genfromtxt(y_predicted_path, delimiter=',')

    fig, ax = plt.subplots()
    ax.scatter(y_test, y_predicted)
    ax.plot([0, 1], [0, 1], 'k--', lw=1)
    ax.set_xlabel('Actual Value')
    ax.set_ylabel('Estimated Value')

    y_test_reshaped, y_predicted_reshaped = y_test.reshape(-1, 1), y_predicted.reshape(-1, 1)
    regression = LinearRegression().fit(y_test_reshaped, y_predicted_reshaped)
    r2 = r2_score(y_test_reshaped, y_predicted_reshaped)
    ax.plot(y_test_reshaped, regression.predict(y_test_reshaped))
    ax.annotate("r-squared = {:.2f}".format(r2_score(y_test_reshaped, y_predicted_reshaped)), (0, 1))
    
    ax.set_title(f"{experiment['name']} {indicator.capitalize()}")
    filename = f"{experiment['name']}-{indicator}-r2-score-{r2:.2f}.jpg"
    filepath = os.path.join(output_folder, filename)
    
    plt.savefig(filepath)
    plt.close()
    #plt.show()

In [9]:
for experiment in experiments:
    for indicator in indicators:
        read_data_and_generate_graph(experiment, indicator)