In [1]:
import pickle
import pandas as pd
from sklearn.preprocessing import StandardScaler
import os

In [4]:
def load_model(indicator, cluster_id, model_dir='../models/pklFiles/'):
    # Search for the model file in the directory
    model_filename = None
    model_name = None
    for file in os.listdir(model_dir):
        if f"{indicator}_cluster_{cluster_id}_model_" in file:
            model_filename = file
            model_name = file.split("_model_")[-1].replace(".pkl", "")  # Extract model name
            break

    if model_filename is None:
        return f"No model found for indicator '{indicator}' and cluster '{cluster_id}'."

    # Load the model
    model_path = os.path.join(model_dir, model_filename)
    with open(model_path, 'rb') as file:
        model = pickle.load(file)

    # Initialize the result dictionary
    result = {
        "name": model_name,
        "model": model,
        "scaler": None
    }

    # If the model is LSTM, look for the scaler file
    if model_name == "LSTM":
        scaler_filename = f"{indicator}_cluster_{cluster_id}_scaler_{model_name}.pkl"
        scaler_path = os.path.join(model_dir, scaler_filename)
        if os.path.exists(scaler_path):
            with open(scaler_path, 'rb') as file:
                scaler = pickle.load(file)
                result["scaler"] = scaler
        else:
            print(f"Scaler file not found for LSTM model: {scaler_filename}")

    return result


def predict_value(year, country, indicator):
    df = pd.read_csv('../models/pklFiles/ClusterDataForTimeSeries.csv')
    
    try:
        cluster = df.loc[df['name'] == country, 'Assigned Cluster'].values[0]
    except IndexError:
        return f"Country {country} not found in the dataset."


    model = load_model(indicator, cluster)



    # Prepare input data
    input_data = pd.DataFrame({'year': [year]})
    if model['name']=='LSTM':
        scaled_input = model['scaler'].transform(input_data.values)
        scaled_input = scaled_input.reshape((scaled_input.shape[0], 1, 1))
        prediction = model['model'].predict(scaled_input)
        prediction = model['scaler'].inverse_transform(prediction)
    else:
        # Non-LSTM models
        prediction = model['model'].predict(input_data)

    return prediction[0]  # Return the first (and only) prediction


In [5]:
predicted_value = predict_value(2050, 'Pakistan', 'Tobacco use%')
print(f"\n\nPredicted value: {predicted_value}")

2024-11-16 17:48:27.821047: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-16 17:48:27.836166: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-11-16 17:48:27.878674: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1731761307.974203  124490 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1731761308.001344  124490 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-16 17:48:28.089407: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 642ms/step


Predicted value: [304.87207]


In [6]:
df = pd.read_csv('../models/pklFiles/ClusterDataForTimeSeries.csv')

df[df['name'] == 'Pakistan'].head(25)

Unnamed: 0.1,Unnamed: 0,health_expenditure,who_region,world_bank_income_level,population growth rate%,year,population,life_expectancy,health_life_expectancy,Number of new HIV infections,Suicide deaths,Adult obesity%,Tobacco use%,Alcohol consumption,Prevalence of hypertension%,name,Assigned Cluster
3250,3250,2.91,2,2,1.6,2000,155.0,60.1,52.3,1.029753,8.8,2.806799,26.129849,4.193742,36.39768,Pakistan,1
3251,3251,2.91,2,2,1.6,2001,159.0,60.35,52.52,1.041693,8.9,10.977025,23.602304,5.392418,42.37036,Pakistan,1
3252,3252,2.91,2,2,1.6,2002,163.0,60.6,52.74,1.065573,9.1,25.204582,24.187423,0.964502,36.305572,Pakistan,1
3253,3253,2.91,2,2,1.6,2003,167.0,60.85,52.96,1.065573,9.1,18.21275,20.229564,3.284292,43.319696,Pakistan,1
3254,3254,2.91,2,2,1.6,2004,171.0,61.1,53.18,1.077513,9.2,17.245495,22.62915,0.230046,44.865296,Pakistan,1
3255,3255,2.91,2,2,1.6,2005,175.0,61.35,53.4,1.053633,9.0,9.116674,20.257851,4.419986,40.471023,Pakistan,1
3256,3256,2.91,2,2,1.6,2006,180.0,61.6,53.62,1.077513,9.2,20.516757,26.576266,0.775412,39.15303,Pakistan,1
3257,3257,2.91,2,2,1.6,2007,184.0,61.85,53.84,1.065573,9.1,2.624432,27.253043,4.354216,38.257258,Pakistan,1
3258,3258,2.91,2,2,1.6,2008,189.0,62.1,54.06,1.053633,9.0,12.552708,26.278817,1.61922,36.200621,Pakistan,1
3259,3259,2.91,2,2,1.6,2009,194.0,62.35,54.28,1.029753,8.8,15.878959,29.720947,3.692865,41.769594,Pakistan,1
