In [5]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from typing import List, Tuple, Dict
from dataclasses import dataclass
import random
from torch.utils.data import Dataset, DataLoader

In [6]:

class UniversityMLP(nn.Module):
    """Simple MLP for university decisions"""
    def __init__(self, n_features: int, n_faculties: int):
        super().__init__()
        self.network = nn.Sequential(
            # Add one-hot encoded faculty to features
            nn.Linear(n_features, 32),
            nn.ReLU(),
            nn.Linear(32, n_faculties)
        )
    
    def forward(self, x):
        return self.network(x)

class ApplicantMLP(nn.Module):
    """MLP for applicant decisions with softmax output"""
    def __init__(self, n_features: int, n_faculties: int):
        super().__init__()
        self.network = nn.Sequential(
            nn.Linear(n_features, 32),
            nn.ReLU(),
            nn.Linear(32, n_faculties),
            nn.Softmax(dim=1)
        )
    
    def forward(self, x):
        return self.network(x)

In [7]:
@dataclass
class FacultyParams:
    """Parameters for each faculty"""
    name: str
    utility_vector: np.ndarray  # Hidden vector that determines student success
    capacity: int  # Number of spots available (can be infinite)

@dataclass
class SupplierParams:
    """Parameters for each preparation supplier"""
    name: str
    diff_vector: np.ndarray  # How this supplier modifies student features

class UniversityEnvironment:
    def __init__(
        self,
        n_features: int = 5,  # Number of student features (e.g., math, english, etc.)
        n_faculties: int = 5,  # Number of different faculties
        n_suppliers: int = 20,  # Number of preparation suppliers
        noise_range: Tuple[float, float] = (0,0)  # Range for uniform noise
    ):
        self.n_features = n_features
        self.n_faculties = n_faculties
        self.n_suppliers = n_suppliers
        self.noise_range = noise_range
        
        # Initialize faculties with random utility vectors
        # Initialize faculties with normalized random utility vectors
        self.faculties = [
            FacultyParams(
                name=f"faculty_{i}",  # Using the predefined faculty names
                utility_vector=self._create_normalized_vector(n_features),
                capacity=np.inf  # As per description, infinite capacity
            )
            for i in range(n_faculties)
        ] 
        
        # Initialize suppliers with random modification vectors
        self.suppliers = [
          SupplierParams(
              name=f"Supplier_{i}",
              diff_vector=np.array([
                  15 if j == idx1 else 5 if j == idx2 else -5 if j == idx3 else 0
                  for j in range(n_features)
              ]),
          )
          for i in range(n_suppliers)
          for idx1, idx2, idx3 in [np.random.choice(n_features, size=3, replace=False)]
        ]
        
        self.past_applicants_df = None
        self.current_applicants_df = None

    def _create_normalized_vector(self, size: int) -> np.ndarray:
        """
        Create a normalized random vector of given size.
        Normalization ensures ||vector|| = 1
        """
        # Create vector with some high and some low values
        vector = np.random.uniform(0.05, 0.2, size)  # Base small values
        
        # Randomly select ~40% of elements to be higher values
        high_value_indices = np.random.choice(size, size=max(1, size // 3), replace=False)
        vector[high_value_indices] = np.random.uniform(0.4, 0.8, size=len(high_value_indices))
        
        # Normalize to sum to 1 while preserving relative differences
        return vector / np.sum(vector)
    
    def _generate_truncated_normal_features(self, n_samples: int) -> np.ndarray:
        """
        Generate features using truncated normal distribution between 55 and 100.
        Uses mean at center of range (77.5) and std that makes the distribution fit well in the range.
        """
        # Generate features with uniform distribution between 40 and 100
        features = np.random.uniform(40, 100, (n_samples, self.n_features))
        
        return features
    
    def generate_past_applicants(
        self,
        n_applicants: int = 1000
    ) -> pd.DataFrame:
        """Generate dataset of past applicants with their outcomes"""
        # Generate random feature vectors
        features = self._generate_truncated_normal_features(n_applicants)
        
        # Randomly assign faculty for each applicant
        df = pd.DataFrame(features, columns=[f"feature_{i}" for i in range(self.n_features)])
        df['assigned_faculty'] = np.random.randint(0, self.n_faculties, n_applicants)
        
        # Calculate grade only for assigned faculty
        faculty_vectors = np.array([f.utility_vector for f in self.faculties])
        grades = np.zeros(n_applicants)
        # Get faculty vectors for each applicant based on their assigned faculty
        faculty_vectors_per_applicant = faculty_vectors[df['assigned_faculty']]
        
        # Calculate base grades using matrix multiplication
        base_grades = np.sum(features * faculty_vectors_per_applicant, axis=1)
        
        # Generate noise for all applicants at once
        noise = np.random.uniform(*self.noise_range, size=n_applicants)
        
        # Calculate final grades
        grades = base_grades + noise
            
        df['final_grade'] = grades
        self.past_applicants_df = df
        return df

    def generate_current_applicants(
        self,
        n_applicants: int = 100
    ) -> pd.DataFrame:
        """Generate dataset of current applicants"""
        # Generate random feature vectors
        features = self._generate_truncated_normal_features(n_applicants)
        
        # Create DataFrame
        feature_cols = [f"feature_{i}" for i in range(self.n_features)]
        df = pd.DataFrame(features, columns=feature_cols)
        
        # Add desired faculty (random)
        df['desired_faculty'] = np.random.randint(0, self.n_faculties, n_applicants)
        
        self.current_applicants_df = df
        return df
    def reconstruct_original_features(
        self,
        modified_features: np.ndarray,
        desired_faculties: np.ndarray
    ) -> np.ndarray:
        """
        Reconstruct approximate original features using group-wise mean vectors.
        
        Args:
            modified_features: Modified feature vectors of shape (n_students, n_features)
            desired_faculties: Array of desired faculty indices for each student
            
        Returns:
            Reconstructed original feature vectors
        """
        # Calculate global mean vector
        global_mean = np.mean(modified_features, axis=0)
        
        # Initialize reconstructed features array
        reconstructed_features = np.zeros_like(modified_features)
        
        # Process each faculty group
        for faculty in range(self.n_faculties):
            # Get indices of students who desire this faculty
            faculty_mask = desired_faculties == faculty
            if not np.any(faculty_mask):
                continue
                
            # Calculate mean vector for this faculty group
            faculty_mean = np.mean(modified_features[faculty_mask], axis=0)
            
            # Calculate proportion vector (avoiding division by zero)
            proportion_vector = np.ones_like(global_mean)
            non_zero_mask = global_mean != 0
            proportion_vector[non_zero_mask] = faculty_mean[non_zero_mask] / global_mean[non_zero_mask]
            
            # Apply inverse proportion to reconstruct original features
            reconstructed_features[faculty_mask] = modified_features[faculty_mask] / proportion_vector
            
        return reconstructed_features

    def assign_applicants_to_faculties_with_reconstruction(
        self,
        model: UniversityMLP,
        modified_features: np.ndarray,
        desired_faculties: np.ndarray
    ) -> np.ndarray:
        """
        Assign applicants to faculties using reconstructed original features.
        
        Args:
            model: Trained UniversityMLP model
            modified_features: Modified features of current applicants
            desired_faculties: Array of desired faculty indices
            
        Returns:
            Array of assigned faculty indices
        """
        # First reconstruct the approximate original features
        reconstructed_features = self.reconstruct_original_features(modified_features, desired_faculties)
        
        # Use reconstructed features for prediction
        model.eval()
        with torch.no_grad():
            features_tensor = torch.FloatTensor(reconstructed_features)
            predicted_grades = model(features_tensor)
            chosen_faculties = torch.argmax(predicted_grades, dim=1).numpy()
        
        return chosen_faculties
    
    def train_applicant_model(
        self,
        past_data: pd.DataFrame = None
    ) -> ApplicantMLP:
        """Train applicant model on past data"""
        if past_data is None:
            past_data = self.past_applicants_df
        
        if past_data is None:
            raise ValueError("No past data available. Generate past applicants first.")
        
        # Create and train applicant's MLP model
        feature_cols = [f"feature_{i}" for i in range(self.n_features)]
        X_train = torch.FloatTensor(past_data[feature_cols].values)
        y_train = torch.LongTensor(past_data['assigned_faculty'].values)
        
        model = ApplicantMLP(self.n_features, self.n_faculties)
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # Reduced learning rate
        
        # Train the model
        model.train()
        
        for epoch in range(500):  
            optimizer.zero_grad()
            outputs = model(X_train)
            loss = criterion(outputs, y_train)
            print(f'loss: {loss.item():.6f} at epoch {epoch} at applicants training')
            loss.backward()
            optimizer.step()
        
        return model

    def choose_supplier_for_applicant(
        self,
        applicant_features: np.ndarray,
        desired_faculty: int,
        applicant_model: ApplicantMLP = None
    ) -> Tuple[int, np.ndarray]:
        """
        Choose the best supplier for an applicant based on past data and supplier effects.
        
        Args:
            applicant_features: The current features of the applicant
            desired_faculty: The faculty index the applicant wants to get into
            past_data: Optional past data to train on. If None, uses self.past_applicants_df
        
        Returns:
            Tuple of (chosen_supplier_idx, modified_features)
        """
        # Evaluate each supplier's effect
        applicant_model.eval()
        best_probability = -1
        best_supplier_idx = -1
        best_modified_features = None
        
        original_features = torch.FloatTensor(applicant_features).unsqueeze(0)
        
        with torch.no_grad():
            # Try each supplier
            for i, supplier in enumerate(self.suppliers):
                # Apply supplier's modification
                modified_features_unclipped = original_features + torch.FloatTensor(supplier.diff_vector)
                modified_features = np.clip(modified_features_unclipped, 40, 100)
                
                # Get probability distribution over faculties
                probabilities = applicant_model(modified_features)
                
                # Check probability for desired faculty
                prob_desired = probabilities[0, int(desired_faculty)].item()
                
                if prob_desired > best_probability:
                    best_probability = prob_desired
                    best_supplier_idx = i
                    best_modified_features = modified_features.squeeze(0).numpy()
        
        if best_supplier_idx == -1:
            # If no supplier improves probability, return original features with no supplier
            return (-1, applicant_features)
        
        return (best_supplier_idx, best_modified_features)
    
    def assign_applicants_to_faculties_fully_exposed(
        self,
        model: UniversityMLP,
        current_applicants_features: np.ndarray
    ) -> Tuple[np.ndarray, np.ndarray, float]:
        """
        Use trained model to make faculty recommendations for current applicants.

        Args:
            model: Trained UniversityMLP model
            current_applicants_features: Modified features of current applicants (n_applicants x n_features)

        Returns:
            Tuple of (chosen_faculties, final_grades, mean_grade)
            - chosen_faculties: Array of faculty indices chosen for each applicant
            - final_grades: Array of final grades received by each applicant
            - mean_grade: Average grade across all applicants
        """
        model.eval()
        with torch.no_grad():
            current_features = torch.FloatTensor(current_applicants_features)
            predicted_grades = model(current_features)

            # Choose best faculty for each applicant based on predicted grades
            chosen_faculties = torch.argmax(predicted_grades).numpy()

        return chosen_faculties, predicted_grades
        
    def choose_supplier_for_applicant_fully_exposed(
        self,
        applicant_features: np.ndarray,
        desired_faculty: int,
        trained_model: UniversityMLP
    ) -> Tuple[int, np.ndarray]:
        """
        Choose the best supplier for an applicant based on faculty utility vectors instead of a model.

        The function ensures that the modified features lead to the **desired faculty** having the
        highest grade among all faculties.

        Args:
            applicant_features: The current features of the applicant
            desired_faculty: The faculty index the applicant wants to get into
            faculty_utility_vectors: A numpy array (n_faculties x n_features) containing utility vectors for faculties

        Returns:
            Tuple of (chosen_supplier_idx, modified_features)
        """
        best_supplier_idx = -1
        best_modified_features = None


        # Get final assignments and grades using modified features
        chosen_faculty, predicted_grades = self.assign_applicants_to_faculties_fully_exposed(
            trained_model,
            applicant_features
        )


        # If the desired faculty is already the best, return without any modifications
        if chosen_faculty == desired_faculty:
            return -1, applicant_features

        # Iterate over suppliers and check if applying their modifications makes the desired faculty the best
        for i, supplier in enumerate(self.suppliers):
            # Apply supplier's modifications to features
            modified_features = applicant_features + supplier.diff_vector
            modified_features = np.clip(modified_features, 40, 100)  # Ensure within valid range

             # Get final assignments and grades using modified features
            chosen_faculty, predicted_grades = self.assign_applicants_to_faculties_fully_exposed(
                trained_model,
                modified_features
            )

            # Check if the desired faculty is now the highest-ranked one
            if chosen_faculty == desired_faculty:
                return i, modified_features  # Return the first supplier that achieves this

        # If no supplier achieves the goal, return the original features
        return -1, applicant_features
    
    
    def recommend(
        self,
        student_features: np.ndarray,
        recommended_faculties: np.ndarray
    ) -> np.ndarray:
        """Calculate final grades for students given their features and recommended faculties
        
        Args:
            student_features: Features matrix of shape (n_students, n_features)
            recommended_faculties: Array of faculty indices of shape (n_students,)
            
        Returns:
            Array of final grades of shape (n_students,)
        """
        # Get utility vectors for all recommended faculties
        faculty_vectors = np.array([self.faculties[f].utility_vector for f in recommended_faculties])
        
        print(f'faculty_vectors: {faculty_vectors}')
        print(f'student_features: {student_features}')
        
        # Calculate base grades using batch matrix multiplication
        base_grades = np.sum(student_features * faculty_vectors, axis=1)
        
        # Generate noise for all students at once
        noise = np.random.uniform(*self.noise_range, size=len(student_features))
        
        return base_grades + noise

    def train_university_model(
        self,
        past_data: pd.DataFrame = None
    ) -> UniversityMLP:
        """
        Train university model on past data.
        
        Args:
            past_data: Optional past data to train on. If None, uses self.past_applicants_df
        
        Returns:
            Trained UniversityMLP model
        """
        if past_data is None:
            past_data = self.past_applicants_df
        
        if past_data is None:
            raise ValueError("No past data available. Generate past applicants first.")
        
        # Prepare training data
        feature_cols = [f"feature_{i}" for i in range(self.n_features)]
        X_train = torch.FloatTensor(past_data[feature_cols].values)
        
        # Create and train university model
        model = UniversityMLP(self.n_features, self.n_faculties)
        optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

        # Custom loss function that only considers the assigned faculty's grade
        def custom_loss(predictions, targets, assigned_faculties):
            batch_size = predictions.size(0)
            indices = torch.arange(batch_size)
            predicted_assigned_grades = predictions[indices, assigned_faculties]
            return torch.mean((predicted_assigned_grades - targets) ** 2)
        
        # Train the model
        model.train()
        batch_size = 128
        n_epochs = 100
        
        for epoch in range(n_epochs):
            # Process in batches
            permutation = torch.randperm(len(X_train))
            for i in range(0, len(X_train), batch_size):
                indices = permutation[i:i + batch_size]
                batch_x = X_train[indices]
                batch_y = torch.FloatTensor(past_data['final_grade'].values[indices])
                batch_assigned = torch.LongTensor(past_data['assigned_faculty'].values[indices])
                
                optimizer.zero_grad()
                predictions = model(batch_x)
                loss = custom_loss(predictions, batch_y, batch_assigned)
                print(f'loss: {loss} at epoch {epoch}')
                loss.backward()
                optimizer.step()
        
        return model

    def assign_applicants_to_faculties(
        self,
        model: UniversityMLP,
        current_applicants_features: np.ndarray
    ) -> Tuple[np.ndarray, np.ndarray, float]:
        """
        Use trained model to make faculty recommendations for current applicants.
        
        Args:
            model: Trained UniversityMLP model
            current_applicants_features: Modified features of current applicants (n_applicants x n_features)
        
        Returns:
            Tuple of (chosen_faculties, final_grades, mean_grade)
            - chosen_faculties: Array of faculty indices chosen for each applicant
            - final_grades: Array of final grades received by each applicant
            - mean_grade: Average grade across all applicants
        """
        model.eval()
        with torch.no_grad():
            current_features = torch.FloatTensor(current_applicants_features)
            predicted_grades = model(current_features)
            
            # Choose best faculty for each applicant based on predicted grades
            chosen_faculties = torch.argmax(predicted_grades, dim=1).numpy()
        
        return chosen_faculties



In [8]:
def run_example():
    # Create environment
    env = UniversityEnvironment()
    
    # Generate past applicants
    past_df = env.generate_past_applicants(1000)
    print("Past applicants shape:", past_df.shape)
    
    # Generate current applicants
    current_df = env.generate_current_applicants(100)
    print("Current applicants shape:", current_df.shape)
    print(f'current_df: {current_df}')
    
    # Get modified features for all current applicants
    feature_cols = [f"feature_{i}" for i in range(env.n_features)]
    modified_features = []
    original_features = current_df[feature_cols].values
    
    for idx in range(len(current_df)):
        student_features = current_df.iloc[idx][feature_cols].values
        desired_faculty = current_df.iloc[idx]['desired_faculty']
        
        _, modified_student_features = env.choose_supplier_for_applicant(
            student_features,
            desired_faculty
        )
        modified_features.append(modified_student_features)
    
    modified_features = np.array(modified_features)
    
    # Train university model
    trained_model = env.train_university_model(past_df)
    
    # Make predictions using trained model
    chosen_faculties = env.assign_applicants_to_faculties(
        trained_model,
        modified_features
    )
    # Calculate percentage of students accepted into their desired faculty
    desired_faculties = current_df['desired_faculty'].values
    matches = (chosen_faculties == desired_faculties)
    acceptance_rate = (np.sum(matches) / len(desired_faculties)) * 100
    
    # Calculate final grades using original features
    final_grades = env.recommend(original_features, chosen_faculties)
    mean_grade = np.mean(final_grades)
    
    # Print results
    print("\nResults:")
    print(f"Mean grade across all applicants: {mean_grade:.2f}")
    print(f"\nPercentage of students accepted to desired faculty: {acceptance_rate:.2f}%")

    
    # Print detailed results for first 5 applicants
    print("\nDetailed results for first 5 applicants:")
    for i in range(5):
        desired_faculty = current_df.iloc[i]['desired_faculty']
        print(f"\nApplicant {i}:")
        print(f"Desired faculty: {desired_faculty}")
        print(f"Assigned faculty: {chosen_faculties[i]}")
        print(f"Final grade: {final_grades[i]:.2f}")

In [9]:
def calculate_desired_faculty_stats(assigned_faculties, desired_faculties):
        total_students = len(desired_faculties)
        matches = sum(assigned == desired for assigned, desired in zip(assigned_faculties, desired_faculties))
        percentage = (matches / total_students) * 100
        return matches, percentage

In [10]:
def run_multi_iteration_example():
    # Create environment
    env = UniversityEnvironment()
    feature_cols = [f"feature_{i}" for i in range(env.n_features)]
    
    # Iteration -1: Initial University Training
    print("\n=== Iteration -1: Initial University Training ===")
    past_df = env.generate_past_applicants(10000)
    trained_model = env.train_university_model(past_df)
    
    # Generate students that will be used in iterations 0
    iteration0_applicants_df = env.generate_current_applicants(10000)
    original_features = iteration0_applicants_df[feature_cols].values
    
    
    
    # Iteration 0: Pure Assignment
    print("\n=== Iteration 0: Pure Assignment ===")
    # Assign faculties using original features
    iteration0_faculties = env.assign_applicants_to_faculties(
        trained_model,
        original_features
    )
    
    # Get real grades for these assignments
    iteration0_grades = env.recommend(original_features, iteration0_faculties)
    
    # Create training data for students from iteration 0
    iteration0_df = pd.DataFrame(original_features, columns=feature_cols)
    iteration0_df['assigned_faculty'] = iteration0_faculties
    iteration0_df['final_grade'] = iteration0_grades
    
    # Iteration 1: Student Learning
    print("\n=== Iteration 1: Student Learning ===")
    iteration1_applicants_df = env.generate_current_applicants(10000)
    modified_features_with_features_knowledge = []
    modified_features_without_features_knowledge = []
    desired_faculties = iteration1_applicants_df['desired_faculty'].values
    
    applicant_model = env.train_applicant_model(iteration0_df)
    
    for idx in range(len(iteration1_applicants_df)):
        student_features = iteration1_applicants_df.iloc[idx][feature_cols].values
        desired_faculty = iteration1_applicants_df.iloc[idx]['desired_faculty']
        
        # Now students learn from iteration0 data instead of past_df
        _, modified_student_features = env.choose_supplier_for_applicant(
            student_features,
            desired_faculty,
            applicant_model
        )
        modified_features_with_features_knowledge.append(modified_student_features)
        _, modified_student_features_without_features_knowledge = env.choose_supplier_for_applicant(
            np.zeros_like(student_features),
            desired_faculty,
            applicant_model
        )
        modified_student_features_without_features_knowledge = modified_student_features_without_features_knowledge + student_features
        modified_features_without_features_knowledge.append(modified_student_features_without_features_knowledge)
    
    modified_features_with_features_knowledge = np.array(modified_features_with_features_knowledge)
    modified_features_without_features_knowledge = np.array(modified_features_without_features_knowledge)
    
    # Get final assignments and grades using modified features
    final_faculties_modified = env.assign_applicants_to_faculties(
        trained_model,
        modified_features_with_features_knowledge
    )
    
    final_faculties_modified_without_features_knowledge = env.assign_applicants_to_faculties(
        trained_model,
        modified_features_without_features_knowledge
    )
    
    final_faculties_original = env.assign_applicants_to_faculties(
        trained_model,
        original_features
    )
    
    final_faculties_with_reconstruction = env.assign_applicants_to_faculties_with_reconstruction(
        trained_model,
        modified_features_with_features_knowledge,
        desired_faculties
    )
    
    # Calculate final grades using original features
    final_grades_original = env.recommend(original_features, final_faculties_original)
    final_grades_modified = env.recommend(original_features, final_faculties_modified)
    final_grades_modified_without_features_knowledge = env.recommend(original_features, final_faculties_modified_without_features_knowledge)
    final_grades_with_reconstruction = env.recommend(original_features, final_faculties_with_reconstruction)
    
    
    
    # Calculate stats for both iterations
    desired_faculties = iteration1_applicants_df['desired_faculty'].values
    final_matches_original, final_percentage_original = calculate_desired_faculty_stats(final_faculties_original, desired_faculties)
    final_matches_modified, final_percentage_modified = calculate_desired_faculty_stats(final_faculties_modified, desired_faculties)
    final_matches_modified_without_features_knowledge, final_percentage_modified_without_features_knowledge = calculate_desired_faculty_stats(final_faculties_modified_without_features_knowledge, desired_faculties)
    final_matches_with_reconstruction, final_percentage_with_reconstruction = calculate_desired_faculty_stats(final_faculties_with_reconstruction, desired_faculties)
    # # Print comparison of results
    # print("\nResults Comparison:")
    # print("\nIteration 0 (No Gaming):")
    # print(f"Mean grade: {np.mean(iteration0_grades):.2f}")
    # print(f"Faculty distribution: {np.bincount(iteration0_faculties)}")
    # print(f"Students who got desired faculty: {iter0_matches} ({iter0_percentage:.1f}%)")
    
    print(f"Mean grade: {np.mean(final_grades_original):.2f}")
    print(f"Faculty distribution: {np.bincount(final_faculties_original)}")
    print(f"Students who got desired faculty: {final_matches_original} ({final_percentage_original:.1f}%)")
    print(f"Mean grade: {np.mean(final_grades_modified):.2f}")
    print(f"Faculty distribution: {np.bincount(final_faculties_modified)}")
    print(f"Students who got desired faculty: {final_matches_modified} ({final_percentage_modified:.1f}%)")
    print(f"Mean grade: {np.mean(final_grades_modified_without_features_knowledge):.2f}")
    print(f"Faculty distribution: {np.bincount(final_faculties_modified_without_features_knowledge)}")
    print(f"Students who got desired faculty: {final_matches_modified_without_features_knowledge} ({final_percentage_modified_without_features_knowledge:.1f}%)")
    print(f"Mean grade: {np.mean(final_grades_with_reconstruction):.2f}")
    print(f"Faculty distribution: {np.bincount(final_faculties_with_reconstruction)}")
    print(f"Students who got desired faculty: {final_matches_with_reconstruction} ({final_percentage_with_reconstruction:.1f}%)")
    
    return iteration1_applicants_df, feature_cols, env, trained_model, original_features

In [11]:
iteration1_applicants_df, feature_cols, env, trained_model, original_features = run_multi_iteration_example()


=== Iteration -1: Initial University Training ===
loss: 6677.18115234375 at epoch 0
loss: 5362.51708984375 at epoch 0
loss: 4223.51611328125 at epoch 0
loss: 3217.74169921875 at epoch 0
loss: 2173.2763671875 at epoch 0
loss: 1507.64453125 at epoch 0
loss: 1235.080322265625 at epoch 0
loss: 829.1347045898438 at epoch 0
loss: 339.57012939453125 at epoch 0
loss: 290.9095458984375 at epoch 0
loss: 245.6690216064453 at epoch 0
loss: 405.1565856933594 at epoch 0
loss: 400.2978210449219 at epoch 0
loss: 540.2223510742188 at epoch 0
loss: 515.9896240234375 at epoch 0
loss: 504.20977783203125 at epoch 0
loss: 407.9283752441406 at epoch 0
loss: 362.1174621582031 at epoch 0
loss: 221.50543212890625 at epoch 0
loss: 179.27757263183594 at epoch 0
loss: 146.95809936523438 at epoch 0
loss: 153.83343505859375 at epoch 0
loss: 99.03187561035156 at epoch 0
loss: 147.766845703125 at epoch 0
loss: 211.29165649414062 at epoch 0
loss: 197.09469604492188 at epoch 0
loss: 174.565185546875 at epoch 0
loss: 18

In [15]:
env = UniversityEnvironment()
env.faculties

[FacultyParams(name='faculty_0', utility_vector=array([0.60395675, 0.04274824, 0.04624596, 0.14279328, 0.16425577]), capacity=inf),
 FacultyParams(name='faculty_1', utility_vector=array([0.06180626, 0.08699381, 0.17291099, 0.1807113 , 0.49757764]), capacity=inf),
 FacultyParams(name='faculty_2', utility_vector=array([0.14590588, 0.49100168, 0.14611896, 0.09234681, 0.12462667]), capacity=inf),
 FacultyParams(name='faculty_3', utility_vector=array([0.17122034, 0.1763869 , 0.08021373, 0.06397304, 0.50820599]), capacity=inf),
 FacultyParams(name='faculty_4', utility_vector=array([0.10299091, 0.15086165, 0.10950118, 0.52562006, 0.1110262 ]), capacity=inf)]

In [16]:
def fully_exposed_example(iteration1_applicants_df, feature_cols, env, trained_model, original_features):
    modified_features = []
    print("Starting")
    for idx in range(len(iteration1_applicants_df)):

      student_features = iteration1_applicants_df.iloc[idx][feature_cols].values
      desired_faculty = iteration1_applicants_df.iloc[idx]['desired_faculty']

      # Now students learn from iteration0 data instead of past_df
      supp_id, modified_student_features = env.choose_supplier_for_applicant_fully_exposed(
          student_features,
          desired_faculty,
          trained_model,
      )
      modified_features.append(modified_student_features)
      print(f'student {idx} - choose supplier {supp_id}')


    modified_features = np.array(modified_features)

    # Get final assignments and grades using modified features
    final_faculties_modified = env.assign_applicants_to_faculties(
        trained_model,
        modified_features
    )

    final_faculties_original = env.assign_applicants_to_faculties(
        trained_model,
        original_features
    )

    # Calculate final grades using original features
    final_grades_original = env.recommend(original_features, final_faculties_original)
    final_grades_modified = env.recommend(original_features, final_faculties_modified)

    desired_faculties = iteration1_applicants_df['desired_faculty'].values

    # Calculate stats for both iterations
    # iter0_matches, iter0_percentage = calculate_desired_faculty_stats(iteration0_faculties, desired_faculties)
    final_matches_original, final_percentage_original = calculate_desired_faculty_stats(final_faculties_original, desired_faculties)
    final_matches_modified, final_percentage_modified = calculate_desired_faculty_stats(final_faculties_modified, desired_faculties)


    # # Print comparison of results
    # print("\nResults Comparison:")
    # print("\nIteration 0 (No Gaming):")
    # print(f"Mean grade: {np.mean(iteration0_grades):.2f}")
    # print(f"Faculty distribution: {np.bincount(iteration0_faculties)}")
    # print(f"Students who got desired faculty: {iter0_matches} ({iter0_percentage:.1f}%)")

    print(f"Mean grade: {np.mean(final_grades_original):.2f}")
    print(f"Faculty distribution: {np.bincount(final_faculties_original)}")
    print(f"Students who got desired faculty: {final_matches_original} ({final_percentage_original:.1f}%)")
    print(f"Mean grade: {np.mean(final_grades_modified):.2f}")
    print(f"Faculty distribution: {np.bincount(final_faculties_modified)}")
    print(f"Students who got desired faculty: {final_matches_modified} ({final_percentage_modified:.1f}%)")

In [14]:
#fully_exposed_example(iteration1_applicants_df, feature_cols, env, trained_model, original_features)