Imitative Learning

In [None]:
# data loading
import pandas as pd
dataset = pd.read_csv('FINAL_MATHDIAL - final.csv')

In [None]:
# divide state and action
dataset = dataset.dropna(subset=['next_action_id'])
state = dataset[['misconception_type', 'convo_turn','done','listen_to_feedback','problem_progress','progress_delta','correct_solution','next_action_hint_strength']]
action = dataset[['next_action_id']]

In [None]:
categorical_cols = ['misconception_type', 'done', 'listen_to_feedback', 'correct_solution', 'next_action_hint_strength']
continuous_cols = ['convo_turn', 'problem_progress', 'progress_delta']

In [None]:
from sklearn.preprocessing import StandardScaler

state_cat = pd.get_dummies(state[categorical_cols], drop_first=False).reset_index(drop=True)
state_cont = pd.DataFrame(
    scaler.fit_transform(state[continuous_cols]),
    columns=continuous_cols
).reset_index(drop=True)

state_processed = pd.concat([state_cat, state_cont], axis=1)
state_tensor = torch.tensor(state_processed.values, dtype=torch.float32)
action_tensor = torch.tensor(action.values, dtype=torch.long)

In [None]:
from torch.utils.data import Dataset, DataLoader
import torch

class ImitationDataset(Dataset):
    def __init__(self, states, actions):
        self.states = states
        self.actions = actions

    def __len__(self):
        return len(self.states)

    def __getitem__(self, idx):
        return self.states[idx], self.actions[idx]

dataset = ImitationDataset(state_tensor, action_tensor)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

In [None]:
import torch
import torch.nn as nn
import torch.nn.init as init

class PolicyNetwork(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(PolicyNetwork, self).__init__()

        self.fc1 = nn.Linear(state_dim, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, action_dim)
        self.relu = nn.ReLU()

        init.kaiming_uniform_(self.fc1.weight, nonlinearity='relu')
        init.zeros_(self.fc1.bias)
        init.kaiming_uniform_(self.fc2.weight, nonlinearity='relu')
        init.zeros_(self.fc2.bias)
        init.xavier_uniform_(self.fc3.weight)
        init.zeros_(self.fc3.bias)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
import torch.optim as optim

input_dim = state_tensor.shape[1]
output_dim = action_tensor.max().item() + 1

model = PolicyNetwork(input_dim, output_dim)
optimizer = optim.Adam(model.parameters(), lr=1e-2)
criterion = nn.CrossEntropyLoss()

num_epochs = 100

for epoch in range(num_epochs):
    total_loss = 0
    for batch_states, batch_actions in dataloader:
        batch_actions -= batch_actions.min()

        outputs = model(batch_states)

        loss = criterion(outputs, batch_actions.squeeze())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Total Loss: {total_loss:.4f}")

Epoch 1/100, Total Loss: 503.3179
Epoch 2/100, Total Loss: 454.2665
Epoch 3/100, Total Loss: 439.9753
Epoch 4/100, Total Loss: 419.4604
Epoch 5/100, Total Loss: 422.8289
Epoch 6/100, Total Loss: 422.4335
Epoch 7/100, Total Loss: 407.5324
Epoch 8/100, Total Loss: 413.5846
Epoch 9/100, Total Loss: 416.0070
Epoch 10/100, Total Loss: 413.1643
Epoch 11/100, Total Loss: 403.7424
Epoch 12/100, Total Loss: 414.7692
Epoch 13/100, Total Loss: 401.1116
Epoch 14/100, Total Loss: 411.7721
Epoch 15/100, Total Loss: 407.0780
Epoch 16/100, Total Loss: 399.3377
Epoch 17/100, Total Loss: 397.3970
Epoch 18/100, Total Loss: 394.2145
Epoch 19/100, Total Loss: 401.7543
Epoch 20/100, Total Loss: 407.2274
Epoch 21/100, Total Loss: 398.0978
Epoch 22/100, Total Loss: 418.2607
Epoch 23/100, Total Loss: 404.8504
Epoch 24/100, Total Loss: 412.8212
Epoch 25/100, Total Loss: 399.0020
Epoch 26/100, Total Loss: 404.3587
Epoch 27/100, Total Loss: 402.4156
Epoch 28/100, Total Loss: 402.6145
Epoch 29/100, Total Loss: 399