In [5]:
import pandas as pd
import numpy as np
import json
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from tensorflow.keras.models import load_model

def load_preprocessor(params_file):
    """Load preprocessing parameters from saved file."""
    with open(params_file, 'r') as f:
        params = json.load(f)
    return params

def validate_input_data(data, params):
    """Validate that input data has all required columns."""
    required_columns = params['numerical_features'] + params['categorical_features']
    missing_columns = [col for col in required_columns if col not in data.columns]
    
    if missing_columns:
        raise ValueError(f"Missing required columns: {missing_columns}")
    
    # Check for any NaN values
    if data.isnull().any().any():
        raise ValueError("Input data contains NaN values. Please clean your data first.")
    
    return True

def preprocess_data(data, params):
    """Manually apply preprocessing transformations."""
    # Validate input first
    validate_input_data(data, params)
    
    # Separate numerical and categorical data
    numerical_data = data[params['numerical_features']]
    categorical_data = data[params['categorical_features']]
    
    # Apply StandardScaler transformation manually
    scaler_mean = np.array(params['scaler_mean'])
    scaler_scale = np.array(params['scaler_scale'])
    numerical_scaled = (numerical_data.values - scaler_mean) / scaler_scale
    
    # Apply OneHotEncoder transformation manually
    categorical_encoded = []
    for i, feature in enumerate(params['categorical_features']):
        feature_categories = params['encoder_categories'][i]
        feature_values = categorical_data[feature].values
        
        # Create one-hot encoding for this feature
        encoded_feature = np.zeros((len(feature_values), len(feature_categories)))
        for j, value in enumerate(feature_values):
            if value in feature_categories:
                category_idx = feature_categories.index(value)
                encoded_feature[j, category_idx] = 1
            # If unknown category, all zeros (handle_unknown='ignore' behavior)
        
        categorical_encoded.append(encoded_feature)
    
    # Combine numerical and categorical features
    if categorical_encoded:
        categorical_combined = np.hstack(categorical_encoded)
        processed_data = np.hstack([numerical_scaled, categorical_combined])
    else:
        processed_data = numerical_scaled
    
    return processed_data

def predict_unemployment(new_data, model_path='Unemployment_AI_Optimized.keras', 
                        params_path='preprocessing_params_optimized.json'):
    """
    Make unemployment prediction on new data.
    
    Args:
        new_data: DataFrame with same columns as training data (except target)
        model_path: Path to saved Keras model
        params_path: Path to preprocessing parameters JSON
    
    Returns:
        Predicted unemployment rate
    """
    # Load model and preprocessing parameters
    model = load_model(model_path)
    params = load_preprocessor(params_path)
    
    # Preprocess the new data
    new_data_processed = preprocess_data(new_data, params)
    
    # Make prediction
    prediction = model.predict(new_data_processed, verbose=0)
    
    return prediction.flatten()[0]

if __name__ == "__main__":
    # Example new data point
    new_data = pd.DataFrame({
        'Region': ['Europe and Central Asia'],  
        'Trade union density': [78.699997],
        'Combined corporate income tax rate': [28.0],
        'Education spending': [0.0734319847255705],
        'Health spending': [0.0631525528524754],
        'Housing spending': [0.0057497428086187],
        'Community development spending': [0.0025634702523358],
        'IRLT': [5.1075],
        'Population, total': [8895960.0],
        'GDP per capita (current US$)': [27259.4806735435],
        'Inflation, consumer prices (annual %)': [2.40595834145438],
        'Gini index': [26.5]
    })
    
    # Make prediction
    try:
        predicted_unemployment = predict_unemployment(new_data)
        print(f"Predicted Unemployment Rate: {predicted_unemployment:.2f}%")
        
    except FileNotFoundError as e:
        print(f"Error: Could not find required file - {e}")
        print("Make sure both 'Unemployment_AI_Optimized.keras' and 'preprocessing_params_optimized.json' are in the same directory")
        
    except ValueError as e:
        print(f"Data validation error: {e}")
        
    except Exception as e:
        print(f"Unexpected error making prediction: {e}")
        print("Please check your input data format and file paths")
    


Predicted Unemployment Rate: 6.61%
