In [49]:
import pandas as pd

from torch.utils.data import Dataset, DataLoader
import torch


import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDRegressor
from sklearn.preprocessing import StandardScaler
import os
import random
import seaborn as sns
import pandas as pd
import time
import copy
import torch
import torch.nn as nn
from torch.utils.data.dataset import random_split
import itertools
import torch.optim as optim
import math

In [50]:
df = pd.read_csv(f"LM_PMPNN_GRID_Functions_011/all_scores.csv")

# Ensure the index is set correctly if it's not already
df['index'] = df['index'].astype(int)
df.set_index('index', inplace=True)

df['parent_index'] = pd.to_numeric(df['parent_index'], errors='coerce').fillna(-1).astype(int)

# Convert design_method to a binary variable
df['action'] = (df['design_method'] == 'ProteinMPNN').astype(int)

# Function to get parent scores
def get_parent_scores(row):
    # Check if the parent_index is -1, indicating a "Parent" entry
    if row['parent_index'] == -1:
        # Return the row's own scores, or alternatively, return default scores
        return pd.Series([row['interface_score'], row['total_score'], row['catalytic_score']])
    
    # If parent_index is valid (not -1), proceed to find the parent row
    parent_row = df.loc[row['parent_index']]
    return pd.Series([parent_row['interface_score'], parent_row['total_score'], parent_row['catalytic_score']])

score_columns = ['interface_score', 'total_score', 'catalytic_score']
scaler = StandardScaler()
df[score_columns] = scaler.fit_transform(df[score_columns])

# Apply the function to get parent scores
df[['parent_interface_score', 'parent_total_score', 'parent_catalytic_score']] = df.apply(get_parent_scores, axis=1)

# Calculate the reward as the difference between the child's and its parent's scores
df['reward'] = (df['parent_interface_score'] - df['interface_score']) + \
               (df['parent_total_score'] - df['total_score']) + \
               (df['parent_catalytic_score'] - df['catalytic_score'])

# Select relevant columns for the dataset
df = df[['parent_interface_score', 'parent_total_score', 'parent_catalytic_score', 'action', 'reward']]

df = df.dropna()



In [58]:
class AIzymesDataset(Dataset):
    def __init__(self, dataframe):
        self.dataframe = dataframe

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        state = torch.tensor([row['parent_interface_score'], row['parent_total_score'], row['parent_catalytic_score']], dtype=torch.float)
        action = torch.tensor(row['action'], dtype=torch.long)
        reward = torch.tensor(row['reward'], dtype=torch.float)
        return state, action, reward

In [59]:
# Create an instance of the Dataset
dataset = AIzymesDataset(df)

dataset_size = len(dataset)
val_size = int(dataset_size * 0.2)  # 20% of the data for validation
train_size = dataset_size - val_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

In [60]:
class NeuralBanditModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        
        super(NeuralBanditModel, self).__init__()
        
        hidden_size = 2 ** (input_size) - 1
        #hidden_size = math.factorial(input_size)
        
        self.fc1  = nn.Linear(input_size, hidden_size)
        self.fc2  = nn.Linear(hidden_size, hidden_size)
        self.fc3  = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
            
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        return x

class NeuralBanditAgent:
    def __init__(self, n_actions, n_features, hidden_size=3, epsilon_0=0.9, epsilon_1=0.1, epsilon_decay=0.995, lr=0.01):
        self.n_actions = n_actions
        self.model = NeuralBanditModel(n_features, hidden_size, n_actions)
        self.optimizer = optim.Adam(self.model.parameters(), lr=lr)
        self.action_counts = np.zeros(n_actions, dtype=int)
        self.rewards = []
        self.states = []
        self.norm_rewards = []
        self.norm_states = []
        self.actions_taken = []
        self.loss = []
        self.predecessors = []
        self.normalization    = {
            'states_mean': 0,
            'states_std': 0,
            'rewards_mean': 0,
            'rewards_std': 0
        }  
        self.epsilon_0 = epsilon_0
        self.epsilon_1 = epsilon_1
        self.epsilon_decay = epsilon_decay
        self.fitting_times = []
        self.RMSE_weights = []
        self.initial_model_state = copy.deepcopy([param.data.numpy() for param in self.model.parameters()])
        self.initial_model_state = np.concatenate([arr.flatten() for arr in self.initial_model_state])

    def select_action(self, state):
        if random.random() < self.epsilon_0 + self.epsilon_1:
            return random.randint(0, self.n_actions - 1)  # Random action
        else:
            state_tensor = torch.FloatTensor(state)
            predicted_rewards = self.model(state_tensor)
            return torch.argmax(predicted_rewards).item()
    
    def set_normalization_parameters(self, dataloader):
        """
        Calculate and set normalization parameters (mean and std) for states and rewards
        based on the data provided by the dataloader.

        Parameters:
        - dataloader: DataLoader providing batches of (states, actions, rewards).
        """
        state_sums = None
        reward_sum = 0.0
        state_squares_sum = None
        reward_square_sum = 0.0
        num_samples = 0

        for states, _, rewards in dataloader:
            if state_sums is None:
                state_sums = torch.zeros(states.shape[1])
                state_squares_sum = torch.zeros(states.shape[1])
            
            state_sums += states.sum(dim=0)
            state_squares_sum += (states ** 2).sum(dim=0)
            reward_sum += rewards.sum()
            reward_square_sum += (rewards ** 2).sum()
            num_samples += states.shape[0]

        # Calculate mean
        state_means = state_sums / num_samples
        reward_mean = reward_sum / num_samples

        # Calculate std
        state_stds = torch.sqrt(state_squares_sum / num_samples - state_means ** 2)
        reward_std = torch.sqrt(reward_square_sum / num_samples - reward_mean ** 2)


        # Set normalization parameters
        self.normalization = {
            'states_mean': state_means,
            'states_std': state_stds,
            'rewards_mean': reward_mean,
            'rewards_std': reward_std
        }

    def normalize(self, states, rewards):
        """
        Normalize states and rewards for a batch of data.

        Parameters:
        - states: Tensor, batch of states.
        - rewards: Tensor, batch of rewards.

        Returns:
        - norm_states: Tensor, normalized states.
        - norm_rewards: Tensor, normalized rewards.
        """

        norm_states = (states - torch.FloatTensor(self.normalization['states_mean'])) / torch.FloatTensor(self.normalization['states_std'])

        norm_rewards = (rewards - self.normalization['rewards_mean']) / self.normalization['rewards_std']

        return norm_states, norm_rewards

    def reinitialize_weights(self, model):
        #Reinitializes the weights of a given model.
        if isinstance(model, nn.Linear):
            model.reset_parameters()
        elif hasattr(model, 'children'):
            for child in model.children():
                self.reinitialize_weights(child)
        
    def update_model(self, states, actions, rewards):
        
        states = torch.FloatTensor(states)
        actions = torch.LongTensor(actions)
        rewards = torch.FloatTensor(rewards)
        #self.epsilon_0 *= self.epsilon_decay
        
        start_time = time.time()  # Start time


        norm_states, norm_rewards = self.normalize(states, rewards)

        # Predict rewards for all actions given the batch of states
        predicted_rewards = self.model(norm_states)

        # Calculate loss for the actions taken
        # This assumes a model output shape of [batch_size, n_actions] and that actions are indices
        criterion = nn.MSELoss()
        action_indices = actions.unsqueeze(1)  # Add an extra dimension to index predicted_rewards
        predicted_rewards_for_actions = predicted_rewards.gather(1, action_indices).squeeze()
        loss = criterion(predicted_rewards_for_actions, norm_rewards)

        # Backpropagation
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        end_time = time.time()
        self.fitting_times.append(end_time - start_time)
        
        
    def plot(self):

        interface_scores = [i[0] for i in self.norm_states]
        total_scores     = [i[1] for i in self.norm_states]
                                
        # Creating the 2x2 plot
        fig, axs = plt.subplots(2, 3, figsize=(10, 6))

        # Reward and Loss
        cumulative_rewards = np.cumsum(self.rewards[1:])
        cumulative_loss    = np.cumsum(self.loss[1:])
        cumulative_rewards /= np.amax(cumulative_rewards)
        cumulative_loss    /= np.amax(cumulative_loss)
        
        axs[0, 0].plot(range(len(cumulative_loss)), cumulative_loss, label='Cumulative Loss')
        axs[0, 0].plot(range(len(cumulative_rewards)), cumulative_rewards, label='Cumulative Rewards')
        axs[0, 0].set_xlabel('Episodes')
        axs[0, 0].set_ylabel('Reward or Loss')
        axs[0, 0].set_title('Reward or Loss Over Episodes')
        axs[0, 0].set_xlim(0, len(cumulative_loss))
        axs[0, 0].set_ylim(0,1)
        axs[0, 0].legend()

        # All three scores vs. episodes
        axs[0, 1].plot(range(len(total_scores)), total_scores, label='Total Score')
        axs[0, 1].plot(range(len(interface_scores)), interface_scores, label='Interface Score')
        axs[0, 1].set_xlabel('Episodes')
        axs[0, 1].set_ylabel('Scores')
        axs[0, 1].set_title('Scores Over Episodes')
        axs[0, 1].set_xlim(0, len(total_scores))
        axs[0, 1].set_ylim(ymin=0)
        axs[0, 1].legend()

        axs[0, 2].plot(range(len(self.fitting_times)), self.fitting_times)
        axs[0, 2].set_title("Fitting Times")
        axs[0, 2].set_ylabel("Time (seconds)")
        axs[0, 2].set_xlim(0, len(self.fitting_times))
        axs[0, 2].set_ylim(ymin=0)
    
        # Scatter plot of Interface Score vs. Total Score
        scatter = axs[1, 0].scatter(total_scores, interface_scores, c=self.actions_taken, cmap='viridis')
        lim = np.amax(total_scores + interface_scores)
        axs[1, 0].set_xlim(0,lim)
        axs[1, 0].set_ylim(0,lim)
        axs[1, 0].plot(axs[1, 0].get_xlim(), axs[1, 0].get_ylim(), ls="--", c=".3")
        axs[1, 0].set_xlabel('Total Score')
        axs[1, 0].set_ylabel('Interface Score')
        axs[1, 0].set_title('Interface Score vs. Total Score')

        # change of model over optimization
        axs[1, 1].plot(range(len(self.RMSE_weights)), self.RMSE_weights)
        axs[1, 1].set_xlim(0, len(self.RMSE_weights))
        #axs[1, 1].set_yscale("log")  
        axs[1, 1].set_xlabel('Episodes')
        axs[1, 1].set_ylabel('RMSE')
        axs[1, 1].set_title('RMSE of Weights compared to initial model')

        percent = float(sum(self.actions_taken))/len(self.actions_taken)
        axs[1, 2].pie([1-percent,percent], labels=["0","1"], autopct='%.0f%%')
        plt.tight_layout()
        plt.show()
        
    def save_model(self, file_name="./"):
        for i, (model, optimizer) in enumerate(zip(self.models, self.optimizers)):
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, f'{file_name}_model_{i}.pt')
        data = {
        'action_counts': self.action_counts,
        'rewards': self.rewards,
        'states': self.states,
        'norm_rewards': self.norm_rewards,
        'norm_states': self.norm_states,
        'actions_taken': self.actions_taken,
        'loss': self.loss,
        'predecessors': self.predecessors,
        'normalization': self.normalization,
        'fitting_times': self.fitting_times
        }
        with open(f'{file_name}_agent_variables.pkl', 'wb') as f:
            pickle.dump(data, f)    
        print(f"Saved model")
                
    def load_model(self, file_name="./"):
        for i, (model, optimizer) in enumerate(zip(self.models, self.optimizers)):
            file_path = f'{file_name}_model_{i}.pt'
            if os.path.isfile(file_path):
                checkpoint = torch.load(file_path)
                model.load_state_dict(checkpoint['model_state_dict'])
                optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            else:
                print(f"No saved model found")
                return
        file_path = f'{file_name}_agent_variables.pkl'
        if os.path.isfile(file_path):
            with open(file_path, 'rb') as f:
                data = pickle.load(f)
                self.action_counts = data['action_counts']
                self.rewards = data['rewards']
                self.states = data['states']
                self.norm_rewards = data['norm_rewards']
                self.norm_states = data['norm_states']
                self.actions_taken = data['actions_taken']
                self.loss = data['loss']
                self.predecessors = data['predecessors']
                self.normalization = data['normalization']
                self.fitting_times = data['fitting_times']
            
    def delete_model(self, file_name="./"):
        for i in range(self.n_actions):
            file_path = f'{file_name}_model_{i}.pt'
            if os.path.isfile(file_path):
                os.remove(file_path)
        file_path = f'{file_name}_agent_variables.pkl'
        if os.path.isfile(file_path):
            os.remove(file_path)
        print(f"Deleted models")
        
    def save_weights_biases(self, file_name="./"):
        torch.save(self.model.state_dict(), f'{file_name}model_weights.pth')
        
    def load_weights_biases(self, file_name="./"):
        if os.path.isfile(f'{file_name}model_weights.pth'):
            self.model.load_state_dict(torch.load(f'{file_name}model_weights.pth'))

In [64]:
n_actions = 2
n_features = 3
agent = NeuralBanditAgent(n_actions, n_features, hidden_size=6, lr=0.05, epsilon_decay=0.995)
agent.set_normalization_parameters(train_loader)

num_epochs = 30
evaluate_model(agent, val_loader)
for epoch in range(num_epochs):
    for states, actions, rewards in train_loader:
        agent.update_model(states, actions, rewards)
    evaluate_model(agent, val_loader)

  action = torch.tensor(row['action'], dtype=torch.long)


Validation Loss: 1.0073
Validation Loss: 0.9404
Validation Loss: 0.9250
Validation Loss: 0.9277
Validation Loss: 0.9265
Validation Loss: 0.9249
Validation Loss: 0.9259
Validation Loss: 0.9317
Validation Loss: 0.9256
Validation Loss: 0.9290
Validation Loss: 0.9243
Validation Loss: 0.9285
Validation Loss: 0.9286
Validation Loss: 0.9269
Validation Loss: 0.9276
Validation Loss: 0.9393
Validation Loss: 0.9323
Validation Loss: 0.9310
Validation Loss: 0.9408
Validation Loss: 0.9344
Validation Loss: 0.9306
Validation Loss: 0.9288
Validation Loss: 0.9276
Validation Loss: 0.9254
Validation Loss: 0.9302
Validation Loss: 0.9277
Validation Loss: 0.9264
Validation Loss: 0.9316
Validation Loss: 0.9277
Validation Loss: 0.9273
Validation Loss: 0.9294


In [None]:
def evaluate_model(agent, val_loader):
    agent.model.eval()  # Set the model to evaluation mode
    total_loss = 0
    with torch.no_grad():  # No need to track gradients for validation
        for states, actions, rewards in val_loader:
            states, rewards = agent.normalize(states, rewards)
            predicted_rewards = agent.model(states)
            criterion = nn.MSELoss()
            action_indices = actions.unsqueeze(1)  # Add an extra dimension to index predicted_rewards
            predicted_rewards_for_actions = predicted_rewards.gather(1, action_indices).squeeze()
            loss = criterion(predicted_rewards_for_actions, rewards)
            total_loss += loss.item()
    
    avg_loss = total_loss / len(val_loader)
    print(f'Validation Loss: {avg_loss:.4f}')

# Call the evaluate function with your model and the validation DataLoader
#evaluate_model(agent, val_loader)

