In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
file_path = "/Users/eva/Downloads/World Happiness Dataset.csv"  # Update with correct path if needed
df = pd.read_csv(file_path)

# Handle missing values by filling with median
df.fillna(df.median(numeric_only=True), inplace=True)

# Convert 'happiness_score' into categorical labels (Low: 0, Medium: 1, High: 2)
df['happiness_label'] = pd.qcut(df['happiness_score'], q=3, labels=[0, 1, 2])

# Encode categorical feature 'region'
label_encoder = LabelEncoder()
df['region_encoded'] = label_encoder.fit_transform(df['region'])

# Selecting relevant features for classification
features = ['gdp_per_capita', 'social_support', 'healthy_life_expectancy',
            'freedom_to_make_life_choices', 'generosity', 'perceptions_of_corruption', 'region_encoded']
X = df[features]
y = df['happiness_label']

# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

# Define and train the MLP model with optimizations
mlp = MLPClassifier(hidden_layer_sizes=(64, 32), activation='relu', solver='adam',
                    max_iter=1000, random_state=42, learning_rate_init=0.001,
                    batch_size=64, early_stopping=True)
mlp.fit(X_train, y_train)

# Make predictions
y_pred = mlp.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred, target_names=['Low', 'Medium', 'High'])

# Print results
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Classification Report:\n", classification_rep)


Accuracy: 67.88%
Classification Report:
               precision    recall  f1-score   support

         Low       0.74      0.88      0.81        92
      Medium       0.62      0.25      0.36        91
        High       0.64      0.90      0.75        91

    accuracy                           0.68       274
   macro avg       0.67      0.68      0.64       274
weighted avg       0.67      0.68      0.64       274

