In [1]:

import json

with open('final_updated_classification.json') as f:
    data = json.load(f)
    
teacher_state_positions = {
    'Topic Open': 0,
    'Topic Ask': 1,
    'Importance': 2,
    'Short Explanation': 3,
    'Detailed Explanation': 4,
    'Fact': 5,
    'Example': 6,
    'Story': 7,
    'Clarification': 8,
    'Answer': 9,
    'Open Ask': 10,
    'Question Ask': 11,
    'Answer Respond': 12,
    'Connect': 13,
    'Branch': 14,
    'Other': 15
}

student_state_positions = {
    'Topic Request': 16,
    'Request': 17,
    'Open Response': 18,
    'Answer': 19,
    'Correction': 20,
    'Aware': 21,
    'Unaware': 22,
    'Unclear': 23,
    'Misunderstood': 24,
    'Understood': 25,
    'Agree': 26,
    'Disagree': 27,
    'Ask Question': 28,
    'Learn Emotional': 29,
    'Pondering': 30,
    'Connect': 31,
    'Other': 32
}

no_of_teacher_states = len(teacher_state_positions.items())
no_of_student_states = len(student_state_positions.items())

n = len(data)
i = 0

input_output_pairs = []

while(i < n-2):
    curr = data[i]
    nex = data[i+1]
    nexnex = data[i+2]
    
    if curr['responder'] == 'Teacher':
        input_vector = [0 for _ in range(no_of_teacher_states + no_of_student_states)]
        output_vector = [0 for _ in range(no_of_teacher_states)]
        
        curr_states = curr['states']
        nex_states = nex['states']
        nexnex_states = nexnex['states']
        
        for state in curr_states:
            input_vector[teacher_state_positions[state]] = 1
            
        for state in nex_states:
            input_vector[student_state_positions[state]] = 1
            
        for state in nexnex_states:
            output_vector[teacher_state_positions[state]] = 1
            
        input_output_pairs.append((input_vector, output_vector))
        
    i += 1

print(f"Prepared {len(input_output_pairs)} input-output pairs")


Prepared 86 input-output pairs


In [2]:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import numpy as np

X = np.array([pair[0] for pair in input_output_pairs], dtype=np.float32)
Y = np.array([pair[1] for pair in input_output_pairs], dtype=np.float32)

class TeachingStateDataset(Dataset):
    def __init__(self, inputs, targets):
        self.X = torch.tensor(inputs)
        self.Y = torch.tensor(targets)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]

dataset = TeachingStateDataset(X, Y)
train_loader = DataLoader(dataset, batch_size=8, shuffle=True)


In [3]:

class StatePredictorMLP(nn.Module):
    def __init__(self, input_size, output_size):
        super(StatePredictorMLP, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, output_size),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.net(x)

model = StatePredictorMLP(input_size=33, output_size=16)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [4]:

num_epochs = 50

for epoch in range(num_epochs):
    total_loss = 0.0
    model.train()
    
    for inputs, targets in train_loader:
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss:.4f}")


Epoch 1/50, Loss: 7.7091
Epoch 2/50, Loss: 7.5075
Epoch 3/50, Loss: 7.1811
Epoch 4/50, Loss: 6.6435
Epoch 5/50, Loss: 5.8012
Epoch 6/50, Loss: 4.8108
Epoch 7/50, Loss: 4.1678
Epoch 8/50, Loss: 3.8403
Epoch 9/50, Loss: 3.7825
Epoch 10/50, Loss: 3.6940
Epoch 11/50, Loss: 3.6669
Epoch 12/50, Loss: 3.6236
Epoch 13/50, Loss: 3.6168
Epoch 14/50, Loss: 3.6056
Epoch 15/50, Loss: 3.5828
Epoch 16/50, Loss: 3.5298
Epoch 17/50, Loss: 3.5358
Epoch 18/50, Loss: 3.5108
Epoch 19/50, Loss: 3.4600
Epoch 20/50, Loss: 3.4308
Epoch 21/50, Loss: 3.4100
Epoch 22/50, Loss: 3.3728
Epoch 23/50, Loss: 3.3630
Epoch 24/50, Loss: 3.3278
Epoch 25/50, Loss: 3.2464
Epoch 26/50, Loss: 3.2446
Epoch 27/50, Loss: 3.2111
Epoch 28/50, Loss: 3.1454
Epoch 29/50, Loss: 3.1645
Epoch 30/50, Loss: 3.0822
Epoch 31/50, Loss: 3.0938
Epoch 32/50, Loss: 3.0295
Epoch 33/50, Loss: 3.0006
Epoch 34/50, Loss: 3.0132
Epoch 35/50, Loss: 2.9567
Epoch 36/50, Loss: 2.9535
Epoch 37/50, Loss: 2.8968
Epoch 38/50, Loss: 2.8864
Epoch 39/50, Loss: 2.

In [5]:

model.eval()
with torch.no_grad():
    for inputs, targets in train_loader:
        outputs = model(inputs)
        predictions = (outputs > 0.5).int()
        print("Predicted:", predictions[0])
        print("Actual   :", targets[0])
        break


Predicted: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0], dtype=torch.int32)
Actual   : tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0.])


In [7]:
torch.save(model.state_dict(), 'teacher_state_predictor.pth')