# Imports

In [6]:
import torch
import pandas as pd
from scipy.spatial.distance import cosine
from tqdm import tqdm
import pickle
import matplotlib.pyplot as plt

In [7]:
training_device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device=training_device

In [28]:
df=pd.read_csv("Data/cf_train_no_noise.csv")
encoding_map = {0: 0, 0.25: 1, 0.5: 2, 0.75: 3, 1: 4}

# Define a function to map the values to classes
def encode_to_classes(value):
    for key, val in encoding_map.items():
        if value == key:
            return val
    return None  # return None if value doesn't match any key

# Apply the function to create a new column with encoded classes
df['target_10_val'] = df['target_10_val'].apply(encode_to_classes)

# Custom Dataloader

In [20]:
class CustomDataset:
    def __init__(self,dataframe,batch_size,device =training_device,shuffle=False):
        self.df=dataframe
        self.batch_size=batch_size
        self.columns_to_drop=['row_num','day','era','target_10_val','target_5_val','sigma','day_no']
        self.X = self.df.drop(self.columns_to_drop, axis=1)
        self.y=self.df['target_10_val']
        self.device=device
        self.shuffle=shuffle

    def generate_batches_with_labels(self,start_idx,end_idx):
        data=self.X.iloc[start_idx:end_idx]
        labels=self.y.iloc[start_idx:end_idx]
        dataset =  torch.utils.data.TensorDataset(torch.tensor(data.values),torch.tensor(labels.values))
        dataloader = torch.utils.data.DataLoader(dataset, batch_size=self.batch_size, shuffle=self.shuffle)
        # data_unseen = self.X.iloc[max(0,idx-9):idx+1]
        # labels_unseen=self.y.iloc[max(0,idx-9):idx+1]
        # data_unseen,labels_unseen = torch.tensor(data_unseen.values).to(self.device),torch.tensor(labels_unseen.values).to(self.device)
        return dataloader
    
        

In [22]:
customDS = CustomDataset(df,64,shuffle = False)
# supervised_dl, unsupervised_data = customDS.generate_batches_with_labels(77)

# Model

In [4]:
class MyMLP(torch.nn.Module):
    def __init__ (self, layers,activation = torch.nn.ReLU(), dropout = 0.5):
        super().__init__()
        linear = [torch.nn.Linear(layers[i], layers[i+1]) for i in range(len(layers)-1)]
        self.layers = []
        for i in range(len(linear)):
            if (i == len(linear)-1):
                self.layers.append(linear[i])
            else:
                self.layers.append(linear[i])
                self.layers.append(activation)
                if (dropout!=0): self.layers.append(torch.nn.Dropout(dropout))
        self.layers = torch.nn.Sequential(*self.layers)
        
        self.softmax = torch.nn.LogSoftmax(dim = 1)
        
    def forward(self, X):          
        X = self.layers(X)
        X = self.softmax(X)
        return X  

In [26]:
def train_model(model, criterion, optimizer, train_loader, val_loader, num_epochs=10,verbose=True):
    train_losses = []
    val_losses = []
    train_accuracies = []
    val_accuracies = []
    for epoch in range(num_epochs):
        model.train()
        running_train_loss = 0.0
        correct_train = 0
        total_train = 0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device).to(torch.float32), labels.to(device).to(torch.long)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_train_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        train_loss = running_train_loss / len(train_loader)
        train_accuracy = correct_train / total_train
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)

        model.eval()
        running_val_loss = 0.0
        correct_val = 0
        total_val = 0

        if val_loader is None:
            continue
        
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device).to(torch.float32), labels.to(device).to(torch.long)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                running_val_loss += loss.item()

                _, predicted = torch.max(outputs, 1)
                total_val += labels.size(0)
                correct_val += (predicted == labels).sum().item()

        val_loss = running_val_loss / len(val_loader)
        val_accuracy = correct_val / total_val
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)

        print(f'Epoch [{epoch+1}/{num_epochs}], '
              f'Train Loss: {train_loss:.4f}, Train Acc: {train_accuracy:.4f}, '
              f'Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.4f}')

    # Plotting
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.plot(range(1, num_epochs + 1), train_losses, label='Train')
    plt.plot(range(1, num_epochs + 1), val_losses, label='Validation')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(range(1, num_epochs + 1), train_accuracies, label='Train')
    plt.plot(range(1, num_epochs + 1), val_accuracies, label='Validation')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.legend()

    plt.show()

In [None]:
Model = MyMLP(layers = [24,64,32,12], dropout=0).to(device)
Model = Model.to(device)
print(Model)
optimizer = torch.optim.Adam(Model.parameters(), lr=0.01)
train_model(Model,torch.nn.NLLLoss(), optimizer,Noise_0_era_train_loader, Noise_0_era_test_loader,num_epochs=200)

In [12]:
len(df)

62400

# train Experts

In [24]:
def train_experts(no_of_experts):
    len_of_individual_train=len(df)//no_of_experts
    start=[i*len_of_individual_train for i in range(no_of_experts)]
    for i in range(len(start)):
        if i==len(start)-1:
            train_loader=customDS.generate_batches_with_labels(start[i],len(df))
        else:
            train_loader=customDS.generate_batches_with_labels(start[i],start[i+1])
        Model = MyMLP(layers = [24,64,32,5], dropout=0).to(device)
        Model = Model.to(device)
        optimizer = torch.optim.Adam(Model.parameters(), lr=0.001)
        train_model(Model,torch.nn.NLLLoss(), optimizer,train_loader, None,num_epochs=200)
        PATH="Experts/expert_no_"+str(i+1)+".pt"
        torch.save(Model, PATH)
    print("EVERYTHING COMPLETED SUCCESSFULLY")

In [None]:
# we need 6 experts to prevent cases where all vote for one class and we get atleast some majority
train_experts(6)

In [None]:
def winnow(no_of_experts):
    experts=[]
    weights=[]
    balance=[]
    for i in range(no_of_experts):
        model=torch.load("no_noise_expert_"+str(i+1)+".pt")
        model.eval()
        experts.append(model)
        weights.append(1)
        balance.append(0)
    increase_count=0
    reduce_count=0
    # winnow majority on all of datasets
    train_loader=customDS.generate_batches_with_labels(0,len(df))
    for inputs,labels in tqdm(train_loader):
#         print(inputs.shape)
        for i in range(len(inputs)):
            predictions=[0,0,0,0,0]
            dict_of_predictions={0:[],1:[],2:[],3:[],4:[]}
            
            # Gathering Predictions
            for j in range(len(experts)):
                model=experts[j]
                input_copy = inputs[i].unsqueeze(0).clone()
                output=model(input_copy.to(device).to(torch.float32))
                _, predicted = torch.max(output, 1)
                predicted=int(predicted)
                predictions[predicted]+=balance[j]
                dict_of_predictions[predicted].append(j)

            result_pred=predictions.index(max(predictions))
            correct_result=int(labels[i])
            # reducing/increasing weights only if prediction wrong
            if result_pred!=correct_result:
                # increasing Correct weights by 2
                for w in dict_of_predictions[correct_result]:
                    weights[w]=weights[w]*2
                    balance[w]+=1
                    increase_count+=1
                    
                # decreasing incorrect weights by 2
                for incorr_label in range(5):
                    if incorr_label!=correct_result:
                        for w in dict_of_predictions[incorr_label]:
                                weights[w]=weights[w]/2
                                balance[w]-=1
                                reduce_count+=1
        print("After Batch",weights)
        print("After Batch balance",balance)
        print("increased",increase_count,"Reduced",reduce_count)
    with open('weights/weights_no_noise.pkl', 'wb') as f:
        pickle.dump(weights, f)
    return weights

In [None]:
weights=winnow(6)

In [None]:
def weighted_majority(no_of_experts):
    experts=[]
    weights=[]
    balance=[]
    for i in range(no_of_experts):
        model=torch.load("no_noise_expert_"+str(i+1)+".pt")
        model.eval()
        experts.append(model)
        weights.append(1)
        balance.append(0)
    increase_count=0
    reduce_count=0
    # winnow majority on all of datasets
    train_loader=customDS.generate_batches_with_labels(0,len(df))
    for inputs,labels in tqdm(train_loader):
#         print(inputs.shape)
        for i in range(len(inputs)):
            predictions=[0,0,0,0,0]
            dict_of_predictions={0:[],1:[],2:[],3:[],4:[]}
            
            # Gathering Predictions
            for j in range(len(experts)):
                model=experts[j]
                input_copy = inputs[i].unsqueeze(0).clone()
                output=model(input_copy.to(device).to(torch.float32))
                _, predicted = torch.max(output, 1)
                predicted=int(predicted)
                predictions[predicted]+=balance[j]
                dict_of_predictions[predicted].append(j)

            result_pred=predictions.index(max(predictions))
            correct_result=int(labels[i])
            # reducing/increasing weights only if prediction wrong
            if result_pred!=correct_result:
                # increasing Correct weights by 2
                # for w in dict_of_predictions[correct_result]:
                #     weights[w]=weights[w]*2
                #     balance[w]+=1
                #     increase_count+=1
                    
                # decreasing incorrect weights by 2
                for incorr_label in range(5):
                    if incorr_label!=correct_result:
                        for w in dict_of_predictions[incorr_label]:
                                weights[w]=weights[w]/2
                                balance[w]-=1
                                reduce_count+=1
        print("After Batch",weights)
        print("After Batch balance",balance)
        print("increased",increase_count,"Reduced",reduce_count)
    with open('weights/weighted_majority_in_power_no_noise.pkl', 'wb') as f:
        pickle.dump(balance, f)
    return balance

In [None]:
weights=weighted_majority(6)