In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import r2_score
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import OneHotEncoder

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Load the dataset
file_path = 'global_meta.csv'  # Update with actual file path
data = pd.read_csv(file_path)

# Handle missing values by filling with the median for numerical columns and mode for categorical
for col in data.select_dtypes(include=['number']).columns:
    data[col] = data[col].fillna(data[col].median())
for col in data.select_dtypes(include=['object']).columns:
    data[col] = data[col].fillna(data[col].mode()[0])

# Assuming the last column is the target variable
target_column = data.columns[-12]  # Replace with name if known
X = data.drop(columns=[target_column])
y = data[target_column]

# Convert categorical columns to numerical using one-hot encoding
categorical_cols = X.select_dtypes(include=['object']).columns
X = pd.get_dummies(X, columns=categorical_cols, drop_first=True)

# Convert target variable to numerical if it's categorical
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# One-Hot Encode the target variable
onehot_encoder = OneHotEncoder(sparse_output=False) # Specify sparse=False
y = onehot_encoder.fit_transform(y.reshape(-1, 1))

# Ensure dataset is not empty after preprocessing
if X.isnull().sum().sum() > 0:
    X = X.fillna(X.median())  # Fill any remaining NaNs in numerical columns

print(f"Shape of X: {X.shape}, Shape of y: {y.shape}")  # Debugging step

if X.shape[0] == 0:
    raise ValueError("No valid data available after preprocessing.")

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Define the teacher model (larger model)
teacher = keras.Sequential([
    keras.layers.InputLayer(input_shape=(X_train.shape[1],)),
    keras.layers.Dense(256, activation='relu'),
    keras.layers.Dropout(0.5),  # Dropout layer to prevent overfitting
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(y.shape[1], activation='softmax')  # Output probabilities
])
teacher.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # Changed loss/metrics

# Define learning rate and training rate
learning_rate = 0.001  # Learning rate
training_rate = 100  # Number of epochs for training

# Train the teacher model with EarlyStopping and ReduceLROnPlateau
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)

teacher.fit(X_train, y_train, epochs=training_rate, batch_size=32, validation_split=0.1, verbose=1, callbacks=[early_stopping, reduce_lr])

# Define the student model (smaller model)
student = keras.Sequential([
    keras.layers.InputLayer(input_shape=(X_train.shape[1],)),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dropout(0.3),  # Dropout layer to prevent overfitting
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(y.shape[1], activation='softmax')  # Output probabilities
])

# Define knowledge distillation loss function
class DistillationLoss(keras.losses.Loss):
    def __init__(self, temperature=3.0):
        super().__init__()
        self.temperature = temperature

    def call(self, y_true, y_pred):
        # Softmax probabilities for teacher and student
        teacher_probs = tf.nn.softmax(y_true / self.temperature, axis=1)
        student_probs = tf.nn.softmax(y_pred / self.temperature, axis=1)

        # Calculate KL divergence
        kl_divergence = tf.keras.losses.KLDivergence()(teacher_probs, student_probs)
        return kl_divergence

# Compile and train the student model
student.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss=DistillationLoss(), metrics=['accuracy'])  # Changed metric

# Train with early stopping and learning rate reduction
student.fit(X_train, y_train, epochs=training_rate, batch_size=32, validation_split=0.1, verbose=1, callbacks=[early_stopping, reduce_lr])

# Evaluate the student model
loss, accuracy = student.evaluate(X_test, y_test, verbose=1)
accuracy_percentage = accuracy * 100
print(f'Student Model Accuracy: {accuracy_percentage:.2f}%')

# Make predictions - Decode One-Hot Encoded Predictions
predictions = student.predict(X_test)
predicted_labels = np.argmax(predictions, axis=1)

# Decode Labels
original_labels = label_encoder.inverse_transform(predicted_labels)

print('Predictions:', original_labels[:10])

# Step 1: Create the results table
results = {
    "Learning Rate": [learning_rate],
    "Training Rate": [training_rate],
    "Student Model Accuracy (%)": [accuracy_percentage],
}

results_table = pd.DataFrame(results)

# Display the table of results
print("\nModel Training Results Table:")
print(results_table)


  return np.nanmean(a, axis, out=out, keepdims=keepdims)


Shape of X: (1716, 6039), Shape of y: (1716, 2)


  updated_mean = (last_sum + new_sum) / updated_sample_count
  T = new_sum / new_sample_count
  new_unnormalized_variance -= correction**2 / new_sample_count


Epoch 1/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.9984 - loss: 0.6836 - val_accuracy: 0.9855 - val_loss: 0.6562 - learning_rate: 0.0010
Epoch 2/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9984 - loss: 0.6461 - val_accuracy: 0.9855 - val_loss: 0.6213 - learning_rate: 0.0010
Epoch 3/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.9984 - loss: 0.6108 - val_accuracy: 0.9855 - val_loss: 0.5885 - learning_rate: 0.0010
Epoch 4/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9984 - loss: 0.5775 - val_accuracy: 0.9855 - val_loss: 0.5578 - learning_rate: 0.0010
Epoch 5/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9984 - loss: 0.5463 - val_accuracy: 0.9855 - val_loss: 0.5289 - learning_rate: 0.0010
Epoch 6/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1