In [26]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils import prune
from sklearn.model_selection import KFold
import pandas as pd
from sentence_transformers import SentenceTransformer
import numpy as np
import tqdm

## Nova Implementação do Paper

In [48]:
# Nova implementação do Paper
class NeuralNet(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(256, output_dim)
    
    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

def train_network(model, criterion, optimizer, X_train, Y_train, epochs=100):
    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, Y_train)
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}")
    
    with torch.no_grad():
        model.eval()
        predictions = model(X_train)
        ss_total = ((Y_train - Y_train.mean()) ** 2).sum()
        ss_residual = ((Y_train - predictions) ** 2).sum()
        r2 = 1 - (ss_residual / ss_total)
        mse = torch.mean((Y_train - predictions) ** 2)
        print(f"Final Training R^2: {r2.item():.4f}")
        print(f"Final Training MSE: {mse.item():.4f}")

def determine_lambda(X, Y, model, criterion, k=5):
    kfold = KFold(n_splits=k)
    lambdas = [0.01, 0.1, 1, 10]
    best_lambda = None
    best_score = float('inf')
    
    for lmbda in lambdas:
        print("\n",lmbda)
        scores = []
        for train_idx, val_idx in kfold.split(X):
            model_copy = NeuralNet(X.size(1), Y.size(1))
            optimizer = optim.Adam(model_copy.parameters())
            
            X_train, X_val = X[train_idx], X[val_idx]
            Y_train, Y_val = Y[train_idx], Y[val_idx]
            
            train_network(model_copy, criterion, optimizer, X_train, Y_train)
            val_outputs = model_copy(X_val)
            val_loss = criterion(val_outputs, Y_val).item()
            scores.append(val_loss + lmbda * torch.norm(model_copy.fc1.weight, p=1).item())
        
        avg_score = sum(scores) / len(scores)
        if avg_score < best_score:
            best_score = avg_score
            best_lambda = lmbda
    
    return best_lambda

def prune_model(model, lambda_value):
    parameters_to_prune = [(model.fc1, 'weight'), (model.fc2, 'weight')]
    prune.global_unstructured(
        parameters_to_prune,
        pruning_method=prune.L1Unstructured,
        amount=lambda_value,
    )
    return model

def iterative_pruning(X, Y, input_dim, output_dim, termination_condition, iterations):
    X, Y = torch.tensor(X, dtype=torch.float32), torch.tensor(Y, dtype=torch.float32)
    model = NeuralNet(input_dim, output_dim)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters())
    i = 0
    
    while True:
        print("Iteration ", i)
        # Step 1: Train or retrain the neural network
        train_network(model, criterion, optimizer, X, Y)
        
        # Step 2: Introduce magnitude parameter β and prune
        lambda_value = determine_lambda(X, Y, model, criterion)
        model = prune_model(model, lambda_value)
        
        # Step 3: Check termination condition
        remaining_weights = sum(torch.sum(torch.abs(p) > 0).item() for p in model.parameters())
        if termination_condition(remaining_weights) or i==iterations:
            break
        
        # Step 4: Prune variables (features) with β_i = 0
        non_zero_features = (model.fc1.weight.abs().sum(dim=0) > 0).nonzero(as_tuple=True)[0]
        X = X[:, non_zero_features]
        model = NeuralNet(X.size(1), output_dim)

        i+=1
    
    return model

def termination_condition(remaining_weights, threshold=50000):
    print(remaining_weights)
    return remaining_weights < threshold

## Código Base da APS 2

In [28]:
# APS 2 code

import torch.nn as nn
import torch

class AutoEncoder(nn.Module):
    """
    AutoEncoder class.

    """    
    def __init__(self, input_dim, hidden_dim):
        super(AutoEncoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32)
        )
        self.decoder = nn.Sequential(
            nn.Linear(32, 64),
            nn.ReLU(),
            nn.Linear(64, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim)
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded
    
    def encode(self, x):
        """
        Encode the input data to the latent space.
        
        Args:
            x: Input tensor to encode.
        
        Returns:
            Encoded representation of the input.
        """
        with torch.no_grad():
            return self.encoder(x)

def fine_tuning(model, embeddings):
    """
    Fine-tune the model using the embeddings.

    Args:
        model: The autoencoder model.
        embeddings: The embeddings to fine-tune the model.

    Returns:
        tuned_embeddings : The tuned embeddings.
    """    
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    loss_fn = nn.MSELoss()
    epochs = 100

    for epoch in range(epochs):
        optimizer.zero_grad()
        output = model(embeddings)
        loss = loss_fn(output, embeddings)
        loss.backward()
        optimizer.step()
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {loss.item():.4f}")

    with torch.no_grad():
        reconstructed = model(embeddings)
        total_variance = torch.sum((embeddings - torch.mean(embeddings, dim=0)) ** 2)
        residual_variance = torch.sum((embeddings - reconstructed) ** 2)
        r2_score = 1 - (residual_variance / total_variance)
        mse = torch.mean((embeddings - reconstructed) ** 2)

        tuned_embeddings = model.encode(embeddings)

    print("R2: ", r2_score.item())
    print("MSE: ", mse.item())

    return tuned_embeddings

## Carregando dados do Paper

In [50]:
df = pd.read_csv('pumadyn-32nm\puma32H.data', sep=',', header=None)

x, y = df.iloc[:, :-1], df.iloc[:, -1]

display(df.head())

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,23,24,25,26,27,28,29,30,31,32
0,0.73646,-1.761829,1.590594,-0.268853,0.572145,-1.941886,0.727704,1.869884,0.224501,-1.442215,...,2.220872,2.440442,0.510303,1.157391,0.265448,1.141465,0.356314,0.568853,1.954376,0.052627
1,-0.389711,0.342256,-1.522463,0.237098,-1.771509,-0.885488,-0.679111,-0.410219,-0.331288,2.092878,...,1.580327,0.809171,2.235141,1.517466,1.795334,0.929355,1.663727,0.754457,0.650492,0.001308
2,-0.269351,1.622452,-2.047811,1.720603,-1.749964,-1.618348,0.327188,-0.317671,0.046938,1.911881,...,1.723625,1.718983,2.438604,1.113059,2.18027,1.794781,0.627965,0.961728,1.258398,0.003834
3,0.25684,0.16504,-1.776401,1.723357,2.117348,-1.692605,1.077334,1.761624,-0.333281,1.57386,...,1.369922,1.528851,1.901706,2.222391,0.968513,0.546513,2.23634,1.412382,1.898374,-0.00201
4,0.96827,1.834561,0.299747,0.308144,0.064617,2.174855,-1.213028,-1.563548,-2.27727,-0.059009,...,1.171433,2.273871,2.199479,0.6464,0.302676,1.323917,2.324899,0.33422,0.491228,0.015778


In [51]:
x_embeddings = torch.tensor(x.values).float()
y_embeddings = torch.tensor(y.values).float()
y_embeddings = y_embeddings.view(-1, 1)

In [52]:
print(x_embeddings.shape, y_embeddings.shape)

torch.Size([4499, 32]) torch.Size([4499, 1])


In [53]:
final_model = iterative_pruning(x_embeddings, y_embeddings, input_dim=x_embeddings.shape[1], output_dim=x_embeddings.shape[1], termination_condition=lambda w: termination_condition(w), iterations=3)


  X, Y = torch.tensor(X, dtype=torch.float32), torch.tensor(Y, dtype=torch.float32)
  return F.mse_loss(input, target, reduction=self.reduction)


Iteration  0
Epoch 1/100, Loss: 16.5935
Epoch 2/100, Loss: 11.9619
Epoch 3/100, Loss: 8.8392
Epoch 4/100, Loss: 6.7698
Epoch 5/100, Loss: 5.3735
Epoch 6/100, Loss: 4.4008
Epoch 7/100, Loss: 3.7011
Epoch 8/100, Loss: 3.1866
Epoch 9/100, Loss: 2.8040
Epoch 10/100, Loss: 2.5149
Epoch 11/100, Loss: 2.2862
Epoch 12/100, Loss: 2.0920
Epoch 13/100, Loss: 1.9172
Epoch 14/100, Loss: 1.7576
Epoch 15/100, Loss: 1.6151
Epoch 16/100, Loss: 1.4920
Epoch 17/100, Loss: 1.3874
Epoch 18/100, Loss: 1.2965
Epoch 19/100, Loss: 1.2122
Epoch 20/100, Loss: 1.1285
Epoch 21/100, Loss: 1.0421
Epoch 22/100, Loss: 0.9536
Epoch 23/100, Loss: 0.8665
Epoch 24/100, Loss: 0.7852
Epoch 25/100, Loss: 0.7131
Epoch 26/100, Loss: 0.6516
Epoch 27/100, Loss: 0.5999
Epoch 28/100, Loss: 0.5563
Epoch 29/100, Loss: 0.5188
Epoch 30/100, Loss: 0.4859
Epoch 31/100, Loss: 0.4566
Epoch 32/100, Loss: 0.4303
Epoch 33/100, Loss: 0.4066
Epoch 34/100, Loss: 0.3852
Epoch 35/100, Loss: 0.3660
Epoch 36/100, Loss: 0.3489
Epoch 37/100, Loss: 0.

## Comparando com APS2

In [54]:
df = pd.read_csv('https://raw.githubusercontent.com/tiagoft/NLP/main/wiki_movie_plots_drama_comedy.csv')

x, y = df["Plot"], df["Genre"]

display(df.head())

Unnamed: 0,Plot,Genre
0,The film is about a family who move to the sub...,comedy
1,Before heading out to a baseball game at a nea...,comedy
2,The plot is that of a black woman going to the...,comedy
3,On a beautiful summer day a father and mother ...,drama
4,A thug accosts a girl as she leaves her workpl...,drama


In [55]:
print("Loading Sentence Transformer model...")
model = SentenceTransformer("all-MiniLM-L6-v2")

x_embeddings = model.encode(x.tolist(), convert_to_tensor=True)
y_embeddings = model.encode(y.tolist(), convert_to_tensor=True)

Loading Sentence Transformer model...


In [56]:
print(x_embeddings.shape, y_embeddings.shape)

torch.Size([10343, 384]) torch.Size([10343, 384])


In [57]:
new_model = iterative_pruning(x_embeddings, y_embeddings, input_dim=x_embeddings.shape[1], output_dim=x_embeddings.shape[1], termination_condition=lambda w: termination_condition(w), iterations=3)

Iteration  0
Epoch 1/100, Loss: 0.0039


  X, Y = torch.tensor(X, dtype=torch.float32), torch.tensor(Y, dtype=torch.float32)


Epoch 2/100, Loss: 0.0033
Epoch 3/100, Loss: 0.0029
Epoch 4/100, Loss: 0.0024
Epoch 5/100, Loss: 0.0020
Epoch 6/100, Loss: 0.0016
Epoch 7/100, Loss: 0.0013
Epoch 8/100, Loss: 0.0011
Epoch 9/100, Loss: 0.0011
Epoch 10/100, Loss: 0.0010
Epoch 11/100, Loss: 0.0010
Epoch 12/100, Loss: 0.0009
Epoch 13/100, Loss: 0.0008
Epoch 14/100, Loss: 0.0007
Epoch 15/100, Loss: 0.0007
Epoch 16/100, Loss: 0.0007
Epoch 17/100, Loss: 0.0007
Epoch 18/100, Loss: 0.0007
Epoch 19/100, Loss: 0.0007
Epoch 20/100, Loss: 0.0007
Epoch 21/100, Loss: 0.0007
Epoch 22/100, Loss: 0.0007
Epoch 23/100, Loss: 0.0006
Epoch 24/100, Loss: 0.0006
Epoch 25/100, Loss: 0.0006
Epoch 26/100, Loss: 0.0006
Epoch 27/100, Loss: 0.0006
Epoch 28/100, Loss: 0.0006
Epoch 29/100, Loss: 0.0006
Epoch 30/100, Loss: 0.0006
Epoch 31/100, Loss: 0.0006
Epoch 32/100, Loss: 0.0006
Epoch 33/100, Loss: 0.0005
Epoch 34/100, Loss: 0.0005
Epoch 35/100, Loss: 0.0005
Epoch 36/100, Loss: 0.0005
Epoch 37/100, Loss: 0.0005
Epoch 38/100, Loss: 0.0005
Epoch 39/

In [58]:
autoencoder = AutoEncoder(input_dim=x_embeddings.shape[1], hidden_dim=128)
tuned_embedding = fine_tuning(autoencoder, x_embeddings)

Epoch 1/100, Loss: 0.0062
Epoch 2/100, Loss: 0.0056
Epoch 3/100, Loss: 0.0050
Epoch 4/100, Loss: 0.0046
Epoch 5/100, Loss: 0.0042
Epoch 6/100, Loss: 0.0038
Epoch 7/100, Loss: 0.0035
Epoch 8/100, Loss: 0.0033
Epoch 9/100, Loss: 0.0031
Epoch 10/100, Loss: 0.0029
Epoch 11/100, Loss: 0.0027
Epoch 12/100, Loss: 0.0026
Epoch 13/100, Loss: 0.0025
Epoch 14/100, Loss: 0.0024
Epoch 15/100, Loss: 0.0024
Epoch 16/100, Loss: 0.0023
Epoch 17/100, Loss: 0.0023
Epoch 18/100, Loss: 0.0022
Epoch 19/100, Loss: 0.0022
Epoch 20/100, Loss: 0.0022
Epoch 21/100, Loss: 0.0021
Epoch 22/100, Loss: 0.0021
Epoch 23/100, Loss: 0.0021
Epoch 24/100, Loss: 0.0021
Epoch 25/100, Loss: 0.0021
Epoch 26/100, Loss: 0.0021
Epoch 27/100, Loss: 0.0021
Epoch 28/100, Loss: 0.0021
Epoch 29/100, Loss: 0.0021
Epoch 30/100, Loss: 0.0021
Epoch 31/100, Loss: 0.0021
Epoch 32/100, Loss: 0.0021
Epoch 33/100, Loss: 0.0021
Epoch 34/100, Loss: 0.0021
Epoch 35/100, Loss: 0.0021
Epoch 36/100, Loss: 0.0020
Epoch 37/100, Loss: 0.0020
Epoch 38/1