In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from typing import List, Tuple, Dict
from dataclasses import dataclass
import random
from torch.utils.data import Dataset, DataLoader

In [2]:
#Constants:
FACULTY_NAMES = ['Computer Science', 'Economics', 'Psychology', 'Law', 'Art']

In [3]:

class UniversityMLP(nn.Module):
    """Simple MLP for university decisions"""
    def __init__(self, n_features: int, n_faculties: int):
        super().__init__()
        self.network = nn.Sequential(
            # Add one-hot encoded faculty to features
            nn.Linear(n_features, 32),
            nn.ReLU(),
            nn.Linear(32, n_faculties)
        )
    
    def forward(self, x):
        return self.network(x)

class ApplicantMLP(nn.Module):
    """MLP for applicant decisions with softmax output"""
    def __init__(self, n_features: int, n_faculties: int):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(n_features, 32),
            nn.ReLU(),
            nn.Linear(32, n_faculties),
            nn.Softmax(dim=1)
        )
    
    def forward(self, x):
        return self.network(x)

In [4]:
@dataclass
class FacultyParams:
    """Parameters for each faculty"""
    name: str
    utility_vector: np.ndarray  # Hidden vector that determines student success
    capacity: int  # Number of spots available (can be infinite)

@dataclass
class SupplierParams:
    """Parameters for each preparation supplier"""
    name: str
    diff_vector: np.ndarray  # How this supplier modifies student features

class UniversityEnvironment:
    def __init__(
        self,
        n_features: int = 10,  # Number of student features (e.g., math, english, etc.)
        n_faculties: int = 5,  # Number of different faculties
        n_suppliers: int = 7,  # Number of preparation suppliers
        noise_range: Tuple[float, float] = (-5, 5)  # Range for uniform noise
    ):
        self.n_features = n_features
        self.n_faculties = n_faculties
        self.n_suppliers = n_suppliers
        self.noise_range = noise_range
        
        # Initialize faculties with random utility vectors
        # Initialize faculties with normalized random utility vectors
        self.faculties = [
            FacultyParams(
                name=FACULTY_NAMES[i],  # Using the predefined faculty names
                utility_vector=self._create_normalized_vector(n_features),
                capacity=np.inf  # As per description, infinite capacity
            )
            for i in range(n_faculties)
        ] 
        
        # Initialize suppliers with random modification vectors
        self.suppliers = [
          SupplierParams(
              name=f"Supplier_{i}",
              diff_vector=np.array([
                  20 if j == idx1 else 0 if j == idx2 else 0
                  for j in range(n_features)
              ]),
          )
          for i in range(n_suppliers)
          for idx1, idx2 in [np.random.choice(n_features, size=2, replace=False)]
        ]
        
        self.past_applicants_df = None
        self.current_applicants_df = None

    def _create_normalized_vector(self, size: int) -> np.ndarray:
        """
        Create a normalized random vector of given size.
        Normalization ensures ||vector|| = 1
        """
        vector = np.random.uniform(0, 1, size)
        # Normalize the vector to unit length
        return vector / np.linalg.norm(vector, ord=1)
    
    def _generate_truncated_normal_features(self, n_samples: int) -> np.ndarray:
        """
        Generate features using truncated normal distribution between 55 and 100.
        Uses mean at center of range (77.5) and std that makes the distribution fit well in the range.
        """
        mean = 70
        std = 25 # This ensures ~99.7% of values fall within range before truncation
        
        features = np.random.normal(mean, std, (n_samples, self.n_features))
        
        # Truncate values to be within [40, 100]
        features = np.clip(features, 40, 100)
        
        return features
    
    def generate_past_applicants(
        self,
        n_applicants: int = 1000
    ) -> pd.DataFrame:
        """Generate dataset of past applicants with their outcomes"""
        # Generate random feature vectors
        features = self._generate_truncated_normal_features(n_applicants)
        
        # Randomly assign faculty for each applicant
        df = pd.DataFrame(features, columns=[f"feature_{i}" for i in range(self.n_features)])
        df['assigned_faculty'] = np.random.randint(0, self.n_faculties, n_applicants)
        
        # Calculate grade only for assigned faculty
        faculty_vectors = np.array([f.utility_vector for f in self.faculties])
        grades = np.zeros(n_applicants)
        # Get faculty vectors for each applicant based on their assigned faculty
        faculty_vectors_per_applicant = faculty_vectors[df['assigned_faculty']]
        
        # Calculate base grades using matrix multiplication
        base_grades = np.sum(features * faculty_vectors_per_applicant, axis=1)
        
        # Generate noise for all applicants at once
        noise = np.random.uniform(*self.noise_range, size=n_applicants)
        
        # Calculate final grades
        grades = base_grades + noise
            
        df['final_grade'] = grades
        self.past_applicants_df = df
        return df

    def generate_current_applicants(
        self,
        n_applicants: int = 100
    ) -> pd.DataFrame:
        """Generate dataset of current applicants"""
        # Generate random feature vectors
        features = self._generate_truncated_normal_features(n_applicants)
        
        # Create DataFrame
        feature_cols = [f"feature_{i}" for i in range(self.n_features)]
        df = pd.DataFrame(features, columns=feature_cols)
        
        # Add desired faculty (random)
        df['desired_faculty'] = np.random.randint(0, self.n_faculties, n_applicants)
        
        self.current_applicants_df = df
        return df
    
    def train_applicant_model(
        self,
        past_data: pd.DataFrame = None
    ) -> ApplicantMLP:
        """Train applicant model on past data"""
        if past_data is None:
            past_data = self.past_applicants_df
        
        if past_data is None:
            raise ValueError("No past data available. Generate past applicants first.")
        
        # Create and train applicant's MLP model
        feature_cols = [f"feature_{i}" for i in range(self.n_features)]
        X_train = torch.FloatTensor(past_data[feature_cols].values)
        y_train = torch.LongTensor(past_data['assigned_faculty'].values)
        
        model = ApplicantMLP(self.n_features, self.n_faculties)
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
        
        # Train the model
        model.train()
        for epoch in range(100):  # Quick training, adjust epochs as needed
            optimizer.zero_grad()
            outputs = model(X_train)
            loss = criterion(outputs, y_train)
            print(f'loss: {loss} at epoch {epoch} at applicants training')
            loss.backward()
            optimizer.step()
        
        return model

    def choose_supplier_for_applicant(
        self,
        applicant_features: np.ndarray,
        desired_faculty: int,
        applicant_model: ApplicantMLP = None
    ) -> Tuple[int, np.ndarray]:
        """
        Choose the best supplier for an applicant based on past data and supplier effects.
        
        Args:
            applicant_features: The current features of the applicant
            desired_faculty: The faculty index the applicant wants to get into
            past_data: Optional past data to train on. If None, uses self.past_applicants_df
        
        Returns:
            Tuple of (chosen_supplier_idx, modified_features)
        """
        # Evaluate each supplier's effect
        applicant_model.eval()
        best_probability = -1
        best_supplier_idx = -1
        best_modified_features = None
        
        original_features = torch.FloatTensor(applicant_features).unsqueeze(0)
        
        with torch.no_grad():
            # Try each supplier
            for i, supplier in enumerate(self.suppliers):
                # Apply supplier's modification
                modified_features_unclipped = original_features + torch.FloatTensor(supplier.diff_vector)
                modified_features = np.clip(modified_features_unclipped, 40, 100)
                
                # Get probability distribution over faculties
                probabilities = applicant_model(modified_features)
                
                # Check probability for desired faculty
                prob_desired = probabilities[0, int(desired_faculty)].item()
                
                if prob_desired > best_probability:
                    best_probability = prob_desired
                    best_supplier_idx = i
                    best_modified_features = modified_features.squeeze(0).numpy()
        
        if best_supplier_idx == -1:
            # If no supplier improves probability, return original features with no supplier
            return (-1, applicant_features)
        
        return (best_supplier_idx, best_modified_features)
        
    def recommend(
        self,
        student_features: np.ndarray,
        recommended_faculties: np.ndarray
    ) -> np.ndarray:
        """Calculate final grades for students given their features and recommended faculties
        
        Args:
            student_features: Features matrix of shape (n_students, n_features)
            recommended_faculties: Array of faculty indices of shape (n_students,)
            
        Returns:
            Array of final grades of shape (n_students,)
        """
        # Get utility vectors for all recommended faculties
        faculty_vectors = np.array([self.faculties[f].utility_vector for f in recommended_faculties])
        
        print(f'faculty_vectors: {faculty_vectors}')
        print(f'student_features: {student_features}')
        
        # Calculate base grades using batch matrix multiplication
        base_grades = np.sum(student_features * faculty_vectors, axis=1)
        
        # Generate noise for all students at once
        noise = np.random.uniform(*self.noise_range, size=len(student_features))
        
        return base_grades + noise

    def train_university_model(
        self,
        past_data: pd.DataFrame = None
    ) -> UniversityMLP:
        """
        Train university model on past data.
        
        Args:
            past_data: Optional past data to train on. If None, uses self.past_applicants_df
        
        Returns:
            Trained UniversityMLP model
        """
        if past_data is None:
            past_data = self.past_applicants_df
        
        if past_data is None:
            raise ValueError("No past data available. Generate past applicants first.")
        
        # Prepare training data
        feature_cols = [f"feature_{i}" for i in range(self.n_features)]
        X_train = torch.FloatTensor(past_data[feature_cols].values)
        
        # Create and train university model
        model = UniversityMLP(self.n_features, self.n_faculties)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

        # Custom loss function that only considers the assigned faculty's grade
        def custom_loss(predictions, targets, assigned_faculties):
            batch_size = predictions.size(0)
            indices = torch.arange(batch_size)
            predicted_assigned_grades = predictions[indices, assigned_faculties]
            return torch.mean((predicted_assigned_grades - targets) ** 2)
        
        # Train the model
        model.train()
        batch_size = 128
        n_epochs = 200
        
        for epoch in range(n_epochs):
            # Process in batches
            permutation = torch.randperm(len(X_train))
            for i in range(0, len(X_train), batch_size):
                indices = permutation[i:i + batch_size]
                batch_x = X_train[indices]
                batch_y = torch.FloatTensor(past_data['final_grade'].values[indices])
                batch_assigned = torch.LongTensor(past_data['assigned_faculty'].values[indices])
                
                optimizer.zero_grad()
                predictions = model(batch_x)
                loss = custom_loss(predictions, batch_y, batch_assigned)
                print(f'loss: {loss} at epoch {epoch}')
                loss.backward()
                optimizer.step()
        
        return model

    def assign_applicants_to_faculties(
        self,
        model: UniversityMLP,
        current_applicants_features: np.ndarray
    ) -> Tuple[np.ndarray, np.ndarray, float]:
        """
        Use trained model to make faculty recommendations for current applicants.
        
        Args:
            model: Trained UniversityMLP model
            current_applicants_features: Modified features of current applicants (n_applicants x n_features)
        
        Returns:
            Tuple of (chosen_faculties, final_grades, mean_grade)
            - chosen_faculties: Array of faculty indices chosen for each applicant
            - final_grades: Array of final grades received by each applicant
            - mean_grade: Average grade across all applicants
        """
        model.eval()
        with torch.no_grad():
            current_features = torch.FloatTensor(current_applicants_features)
            predicted_grades = model(current_features)
            
            # Choose best faculty for each applicant based on predicted grades
            chosen_faculties = torch.argmax(predicted_grades, dim=1).numpy()
        
        return chosen_faculties



In [5]:
def run_example():
    # Create environment
    env = UniversityEnvironment()
    
    # Generate past applicants
    past_df = env.generate_past_applicants(1000)
    print("Past applicants shape:", past_df.shape)
    
    # Generate current applicants
    current_df = env.generate_current_applicants(100)
    print("Current applicants shape:", current_df.shape)
    print(f'current_df: {current_df}')
    
    # Get modified features for all current applicants
    feature_cols = [f"feature_{i}" for i in range(env.n_features)]
    modified_features = []
    original_features = current_df[feature_cols].values
    
    for idx in range(len(current_df)):
        student_features = current_df.iloc[idx][feature_cols].values
        desired_faculty = current_df.iloc[idx]['desired_faculty']
        
        _, modified_student_features = env.choose_supplier_for_applicant(
            student_features,
            desired_faculty
        )
        modified_features.append(modified_student_features)
    
    modified_features = np.array(modified_features)
    
    # Train university model
    trained_model = env.train_university_model(past_df)
    
    # Make predictions using trained model
    chosen_faculties = env.assign_applicants_to_faculties(
        trained_model,
        modified_features
    )
    # Calculate percentage of students accepted into their desired faculty
    desired_faculties = current_df['desired_faculty'].values
    matches = (chosen_faculties == desired_faculties)
    acceptance_rate = (np.sum(matches) / len(desired_faculties)) * 100
    
    # Calculate final grades using original features
    final_grades = env.recommend(original_features, chosen_faculties)
    mean_grade = np.mean(final_grades)
    
    # Print results
    print("\nResults:")
    print(f"Mean grade across all applicants: {mean_grade:.2f}")
    print(f"\nPercentage of students accepted to desired faculty: {acceptance_rate:.2f}%")

    
    # Print detailed results for first 5 applicants
    print("\nDetailed results for first 5 applicants:")
    for i in range(5):
        desired_faculty = current_df.iloc[i]['desired_faculty']
        print(f"\nApplicant {i}:")
        print(f"Desired faculty: {desired_faculty}")
        print(f"Assigned faculty: {chosen_faculties[i]}")
        print(f"Final grade: {final_grades[i]:.2f}")

In [6]:
run_example()

Past applicants shape: (1000, 12)
Current applicants shape: (100, 11)
current_df:      feature_0   feature_1  feature_2   feature_3   feature_4   feature_5  \
0    42.130331   82.896435  87.526611   40.000000   84.548278   69.425058   
1    70.621951   65.401288  61.827161   98.653011   51.783437   84.269128   
2   100.000000   89.435155  40.000000   55.111455  100.000000   52.495976   
3    53.715615   76.190377  65.781734   88.337941   50.871614   45.016538   
4    40.000000  100.000000  57.220875   89.083780   82.577927   49.563704   
..         ...         ...        ...         ...         ...         ...   
95   40.000000   69.810899  96.368410  100.000000   61.915276   93.987424   
96   40.000000   40.000000  49.496010   45.257607   80.233597   57.479468   
97   72.680221   89.568123  61.476285   80.225664  100.000000  100.000000   
98   69.804964  100.000000  51.712177   63.932529   40.000000   54.230594   
99   85.132963   40.000000  62.213696   40.000000   89.345818   40.0000

In [7]:
def calculate_desired_faculty_stats(assigned_faculties, desired_faculties):
        total_students = len(desired_faculties)
        matches = sum(assigned == desired for assigned, desired in zip(assigned_faculties, desired_faculties))
        percentage = (matches / total_students) * 100
        return matches, percentage

In [8]:
def run_multi_iteration_example():
    # Create environment
    env = UniversityEnvironment()
    feature_cols = [f"feature_{i}" for i in range(env.n_features)]
    
    # Iteration -1: Initial University Training
    print("\n=== Iteration -1: Initial University Training ===")
    past_df = env.generate_past_applicants(1000)
    trained_model = env.train_university_model(past_df)
    
    # Generate students that will be used in iterations 0
    iteration0_applicants_df = env.generate_current_applicants(1000)
    original_features = iteration0_applicants_df[feature_cols].values
    
    # Iteration 0: Pure Assignment
    print("\n=== Iteration 0: Pure Assignment ===")
    # Assign faculties using original features
    iteration0_faculties = env.assign_applicants_to_faculties(
        trained_model,
        original_features
    )
    
    # Get real grades for these assignments
    iteration0_grades = env.recommend(original_features, iteration0_faculties)
    
    # Create training data for students from iteration 0
    iteration0_df = pd.DataFrame(original_features, columns=feature_cols)
    iteration0_df['assigned_faculty'] = iteration0_faculties
    iteration0_df['final_grade'] = iteration0_grades
    
    # Iteration 1: Student Learning
    print("\n=== Iteration 1: Student Learning ===")
    iteration1_applicants_df = env.generate_current_applicants(1000)
    modified_features = []
    
    applicant_model = env.train_applicant_model(iteration0_df)
    
    for idx in range(len(iteration1_applicants_df)):
        student_features = iteration1_applicants_df.iloc[idx][feature_cols].values
        desired_faculty = iteration1_applicants_df.iloc[idx]['desired_faculty']
        
        # Now students learn from iteration0 data instead of past_df
        _, modified_student_features = env.choose_supplier_for_applicant(
            student_features,
            desired_faculty,
            applicant_model
        )
        modified_features.append(modified_student_features)
    
    modified_features = np.array(modified_features)
    
    # Get final assignments and grades using modified features
    final_faculties = env.assign_applicants_to_faculties(
        trained_model,
        modified_features
    )
    
    # Calculate final grades using original features
    final_grades = env.recommend(original_features, final_faculties)
    
    desired_faculties = iteration1_applicants_df['desired_faculty'].values
    
    # Calculate stats for both iterations
    # iter0_matches, iter0_percentage = calculate_desired_faculty_stats(iteration0_faculties, desired_faculties)
    final_matches, final_percentage = calculate_desired_faculty_stats(final_faculties, desired_faculties)

    
    # # Print comparison of results
    # print("\nResults Comparison:")
    # print("\nIteration 0 (No Gaming):")
    # print(f"Mean grade: {np.mean(iteration0_grades):.2f}")
    # print(f"Faculty distribution: {np.bincount(iteration0_faculties)}")
    # print(f"Students who got desired faculty: {iter0_matches} ({iter0_percentage:.1f}%)")
    
    print("\nIteration 1 (With Gaming):")
    print(f"Mean grade: {np.mean(final_grades):.2f}")
    print(f"Faculty distribution: {np.bincount(final_faculties)}")
    print(f"Students who got desired faculty: {final_matches} ({final_percentage:.1f}%)")
    # Print detailed results for first 5 applicants
    print("\nDetailed results for first 5 applicants:")
    for i in range(5):
        desired_faculty = iteration1_applicants_df.iloc[i]['desired_faculty']
        print(f"\nApplicant {i}:")
        print(f"Desired faculty: {desired_faculty}")
        print(f"Iteration 0 faculty: {iteration0_faculties[i]}")
        print(f"Iteration 0 grade: {iteration0_grades[i]:.2f}")
        print(f"Final faculty: {final_faculties[i]}")
        print(f"Final grade: {final_grades[i]:.2f}")

In [9]:
run_multi_iteration_example()


=== Iteration -1: Initial University Training ===
loss: 4415.513671875 at epoch 0
loss: 2951.92236328125 at epoch 0
loss: 1845.8995361328125 at epoch 0
loss: 961.4121704101562 at epoch 0
loss: 361.37567138671875 at epoch 0
loss: 125.92678833007812 at epoch 0
loss: 336.9335632324219 at epoch 0
loss: 655.2412719726562 at epoch 0
loss: 732.822265625 at epoch 1
loss: 646.0856323242188 at epoch 1
loss: 421.03765869140625 at epoch 1
loss: 173.06597900390625 at epoch 1
loss: 117.80741882324219 at epoch 1
loss: 84.39306640625 at epoch 1
loss: 136.38082885742188 at epoch 1
loss: 193.75717163085938 at epoch 1
loss: 255.619384765625 at epoch 2
loss: 253.7826385498047 at epoch 2
loss: 254.40704345703125 at epoch 2
loss: 229.77069091796875 at epoch 2
loss: 167.79165649414062 at epoch 2
loss: 112.71031188964844 at epoch 2
loss: 69.82736206054688 at epoch 2
loss: 44.206748962402344 at epoch 2
loss: 37.600955963134766 at epoch 3
loss: 54.909915924072266 at epoch 3
loss: 81.50520324707031 at epoch 3
l