In [6]:
# 1. Imports and GPU Setup

import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

# GPU Check
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [7]:
# 2. Dataset Preparation

df = pd.read_csv("Cleaned_Expanded_Students_Grading_Dataset.csv")

selected_features = [
    'Attendance (%)', 'Midterm_Score', 'Final_Score', 'Assignments_Avg',
    'Quizzes_Avg', 'Participation_Score', 'Projects_Score', 'Total_Score',
    'Stress_Level (1-10)', 'Sleep_Hours_per_Night', 'life_satisfaction',
    'has_close_friends', 'is_bullied', 'disrespected_by_peers', 'participates_in_activities'
]

df_subset = df.sample(n=2000, random_state=42).reset_index(drop=True)
student_profiles = df_subset[['Student_ID'] + selected_features].copy()
binary_columns = ['has_close_friends', 'is_bullied', 'disrespected_by_peers', 'participates_in_activities']
for col in binary_columns:
    student_profiles[col] = student_profiles[col].map({'Yes': 1, 'No': 0})
student_profiles.fillna(0, inplace=True)
student_profiles.reset_index(drop=True, inplace=True)

# Initialize environment before model loading
env = StudentAllocationEnv(student_profiles)

In [8]:
# 3. Environment Class

class StudentAllocationEnv:
    def __init__(self, student_profiles, num_classrooms=10):
        self.student_profiles = student_profiles
        self.num_students = len(student_profiles)
        self.num_classrooms = num_classrooms
        self.reset()

    def reset(self):
        self.unassigned_students = list(range(self.num_students))
        self.classrooms = {i: [] for i in range(self.num_classrooms)}
        random.shuffle(self.unassigned_students)
        self.current_student_idx = self.unassigned_students.pop()
        return self.get_state()

    def get_state(self):
        student_features = self.student_profiles.iloc[self.current_student_idx].drop('Student_ID').values
        return torch.tensor(student_features, dtype=torch.float32, device=device)

    def step(self, action):
        classroom_id = action
        self.classrooms[classroom_id].append(self.current_student_idx)
        reward = self.calculate_reward(classroom_id)
        done = len(self.unassigned_students) == 0
        if not done:
            self.current_student_idx = self.unassigned_students.pop()
        next_state = self.get_state() if not done else None
        return next_state, reward, done

    def calculate_reward(self, classroom_id):
        reward = 0
        classroom_students = self.classrooms[classroom_id]
        if len(classroom_students) < 2:
            return 0

        current_student = self.student_profiles.iloc[self.current_student_idx]
        friends_in_class, bullied_in_class, disrespect_in_class = 0, 0, 0
        total_scores, stress_levels, life_satisfactions = [], [], []

        for idx in classroom_students:
            student = self.student_profiles.iloc[idx]
            total_scores.append(student['Total_Score'])
            stress_levels.append(student['Stress_Level (1-10)'])
            life_satisfactions.append(student['life_satisfaction'])
            if current_student['has_close_friends'] == 1 and student['has_close_friends'] == 1:
                friends_in_class += 1
            if current_student['is_bullied'] == 1 and student['has_close_friends'] == 1:
                bullied_in_class += 1
            if current_student['disrespected_by_peers'] == 1 and student['disrespected_by_peers'] == 1:
                disrespect_in_class += 1

        avg_total_score = np.mean(total_scores)
        avg_stress = np.mean(stress_levels)
        avg_life_satisfaction = np.mean(life_satisfactions)

        reward += 10 * friends_in_class
        reward += 10 * bullied_in_class
        if 60 <= avg_total_score <= 80:
            reward += 5
        if 4 <= avg_stress <= 7 and avg_life_satisfaction >= 5:
            reward += 5
        reward -= 10 * disrespect_in_class
        return reward

In [9]:
# 4. Q-Network & Agent

class QNetwork(nn.Module):
    def __init__(self, state_size, action_size):
        super(QNetwork, self).__init__()
        self.fc1 = nn.Linear(state_size, 128)
        self.fc2 = nn.Linear(128, 128)
        self.fc3 = nn.Linear(128, action_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

class DQNAgent:
    def __init__(self, state_size, action_size, learning_rate=0.001, gamma=0.99, epsilon_start=1.0, epsilon_end=0.1, epsilon_decay=0.995):
        self.state_size = state_size
        self.action_size = action_size
        self.gamma = gamma
        self.epsilon = epsilon_start
        self.epsilon_min = epsilon_end
        self.epsilon_decay = epsilon_decay
        self.model = QNetwork(state_size, action_size).to(device)
        self.optimizer = optim.Adam(self.model.parameters(), lr=learning_rate)
        self.loss_fn = nn.MSELoss()

    def get_action(self, state):
        if np.random.rand() < self.epsilon:
            return random.randrange(self.action_size)
        with torch.no_grad():
            return torch.argmax(self.model(state)).item()

    def train(self, state, action, reward, next_state, done):
        state = state.unsqueeze(0)
        if next_state is not None:
            next_state = next_state.unsqueeze(0)

        q_values = self.model(state)
        q_target = q_values.clone().detach()
        if done:
            q_target[0, action] = reward
        else:
            next_q_values = self.model(next_state)
            q_target[0, action] = reward + self.gamma * torch.max(next_q_values).item()

        self.optimizer.zero_grad()
        loss = self.loss_fn(q_values, q_target)
        loss.backward()
        self.optimizer.step()

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [10]:
# 5. Load Trained Model

state_size = env.get_state().shape[0]
action_size = env.num_classrooms
agent = DQNAgent(state_size, action_size)
agent.model.load_state_dict(torch.load('classroom_allocation_dqn_model_final.pth'))
agent.model.eval()
print("Trained model loaded successfully.")

Trained model loaded successfully.


  agent.model.load_state_dict(torch.load('classroom_allocation_dqn_model_final.pth'))


In [11]:
# 6. Sample Allocation Test

# Allocate a few students using trained model

def test_allocation(env, agent, num_students=10):
    env.reset()
    assigned = []
    for _ in range(num_students):
        state = env.get_state()
        action = agent.get_action(state)
        next_state, reward, done = env.step(action)
        student_id = env.student_profiles.iloc[env.current_student_idx]['Student_ID']
        assigned.append((student_id, action))
        if done:
            break
    return assigned

In [12]:
# Run test
allocations = test_allocation(env, agent, num_students=10)
print("\nSample Allocations:")
for sid, cls in allocations:
    print(f"Student {sid} → Classroom {cls}")


Sample Allocations:
Student 20588.0 → Classroom 9
Student 6657.0 → Classroom 8
Student 16068.0 → Classroom 4
Student 9388.0 → Classroom 4
Student 15870.0 → Classroom 8
Student 1785.0 → Classroom 9
Student 17222.0 → Classroom 5
Student 1098.0 → Classroom 7
Student 19234.0 → Classroom 3
Student 6365.0 → Classroom 2
