In [1]:
import pickle
import pandas as pd
from sklearn.preprocessing import StandardScaler
import os

In [2]:
def load_model(indicator, cluster_id, model_dir='../models/pklFiles/'):
    # Search for the model file in the directory
    model_filename = None
    model_name = None
    for file in os.listdir(model_dir):
        if f"{indicator}_cluster_{cluster_id}_model_" in file:
            model_filename = file
            model_name = file.split("_model_")[-1].replace(".pkl", "")  # Extract model name
            break

    if model_filename is None:
        return f"No model found for indicator '{indicator}' and cluster '{cluster_id}'."

    # Load the model
    model_path = os.path.join(model_dir, model_filename)
    with open(model_path, 'rb') as file:
        model = pickle.load(file)

    # Initialize the result dictionary
    result = {
        "name": model_name,
        "model": model,
        "scaler": None
    }

    # If the model is LSTM, look for the scaler file
    if model_name == "LSTM":
        scaler_filename = f"{indicator}_cluster_{cluster_id}_scaler_{model_name}.pkl"
        scaler_path = os.path.join(model_dir, scaler_filename)
        if os.path.exists(scaler_path):
            with open(scaler_path, 'rb') as file:
                scaler = pickle.load(file)
                result["scaler"] = scaler
        else:
            print(f"Scaler file not found for LSTM model: {scaler_filename}")

    return result


def predict_value(year, country, indicator):
    df = pd.read_csv('ClusterDataForTimeSeries.csv')
    
    try:
        cluster = df.loc[df['name'] == country, 'cluster'].values[0]
    except IndexError:
        return f"Country {country} not found in the dataset."


    model = load_model(indicator, cluster)



    # Prepare input data
    input_data = pd.DataFrame({'year': [year]})
    if model['name']=='LSTM':
        scaled_input = model['scaler'].transform(input_data.values)
        scaled_input = scaled_input.reshape((scaled_input.shape[0], 1, 1))
        prediction = model['model'].predict(scaled_input)
        prediction = model['scaler'].inverse_transform(prediction)
    else:
        # Non-LSTM models
        prediction = model['model'].predict(input_data)

    return prediction[0]  # Return the first (and only) prediction


In [6]:
predicted_value = predict_value(2050, 'Pakistan', 'Tobacco use%')
print(f"\n\nPredicted value: {predicted_value}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 673ms/step


Predicted value: [375.08878]


In [7]:
df = pd.read_csv('ClusterDataForTimeSeries.csv')

df[df['name'] == 'Pakistan'].head(25)

Unnamed: 0.1,Unnamed: 0,name,health_expenditure,who_region,world_bank_income_level,population growth rate%,year,population,life_expectancy,health_life_expectancy,Number of new HIV infections,Suicide deaths,Adult obesity%,Tobacco use%,Alcohol consumption,Prevalence of hypertension%,cluster
3250,3250,Pakistan,2.91,2,2,1.6,2000,155.0,60.1,52.3,0.876806,8.8,2.806799,26.129849,4.193742,36.39768,7
3251,3251,Pakistan,2.91,2,2,1.6,2001,159.0,60.35,52.52,0.880171,8.9,10.977025,23.602304,5.392418,42.37036,7
3252,3252,Pakistan,2.91,2,2,1.6,2002,163.0,60.6,52.74,0.895943,9.1,25.204582,24.187423,0.964502,36.305572,7
3253,3253,Pakistan,2.91,2,2,1.6,2003,167.0,60.85,52.96,0.886901,9.1,18.21275,20.229564,3.284292,43.319696,7
3254,3254,Pakistan,2.91,2,2,1.6,2004,171.0,61.1,53.18,0.890266,9.2,17.245495,22.62915,0.230046,44.865296,7
3255,3255,Pakistan,2.91,2,2,1.6,2005,175.0,61.35,53.4,0.856412,9.0,9.116674,20.257851,4.419986,40.471023,7
3256,3256,Pakistan,2.91,2,2,1.6,2006,180.0,61.6,53.62,0.869924,9.2,20.516757,26.576266,0.775412,39.15303,7
3257,3257,Pakistan,2.91,2,2,1.6,2007,184.0,61.85,53.84,0.848476,9.1,2.624432,27.253043,4.354216,38.257258,7
3258,3258,Pakistan,2.91,2,2,1.6,2008,189.0,62.1,54.06,0.824767,9.0,12.552708,26.278817,1.61922,36.200621,7
3259,3259,Pakistan,2.91,2,2,1.6,2009,194.0,62.35,54.28,0.788653,8.8,15.878959,29.720947,3.692865,41.769594,7
