In [149]:
ls

[0m[01;34mdb[0m/  [01;34mexamples[0m/  [01;34mfl_main[0m/  README.md  [01;34msetups[0m/


In [3]:
%cd ..

/home/carlos/Documents/8vo/FINAL_SISTEMAS_DISTRIBUIDOS /FL/FINAL/simple-fl


In [150]:
import os
import random
from torch.utils.data import DataLoader, Dataset
import pandas as pd
from typing import Tuple
from examples.Heart_Disease.data_preparation import preprocess_data
import torch.nn as nn
import torch.nn.functional as F
import torch
import pandas as pd
from typing import Dict, List
import numpy as np
import torch
import logging
import torch.optim as optim
from fl_main.agent.client import Client 

In [152]:

class MLP(nn.Module):
    """
    NN Architecture
    """
    def __init__(self,seed=42):
        super(MLP, self).__init__()
        torch.manual_seed(seed)

        self.fc1 = nn.Linear(16, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 1)
        self.dropout = nn.Dropout(p=0.5)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = torch.sigmoid(self.fc3(x))
        return x


In [74]:
class TabularDataset(Dataset):
    """Dataset tabular compatible con PyTorch"""
    def __init__(self, dataframe: pd.DataFrame, target_col: str):
        self.X = dataframe.drop(columns=[target_col]).values
        self.y = dataframe[target_col].values

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return (
            torch.tensor(self.X[idx], dtype=torch.float32),
            torch.tensor(self.y[idx], dtype=torch.float32),
        )


class DataManager:
    """Maneja datasets y DataLoaders (Singleton Pattern)"""
    _singleton_dm = None

    @classmethod
    def dm(cls, th: int = 0):
        if not cls._singleton_dm and th > 0:
            cls._singleton_dm = cls(th)
        return cls._singleton_dm

    def __init__(self, cutoff_th: int):
        # Cargar los datos desde tus archivos CSV
        #BASE_DIR = os.path.dirname(os.path.abspath(__file__))
        #train_df = pd.read_csv(os.path.join(BASE_DIR, "train.csv"))
        #val_df = pd.read_csv(os.path.join(BASE_DIR,"val.csv"))
        #test_df = pd.read_csv(os.path.join(BASE_DIR,"test.csv"))
        train_df = pd.read_csv("/home/carlos/Documents/8vo/FINAL_SISTEMAS_DISTRIBUIDOS /FL/FINAL/simple-fl/examples/Heart_Disease/train.csv")
        val_df = pd.read_csv("/home/carlos/Documents/8vo/FINAL_SISTEMAS_DISTRIBUIDOS /FL/FINAL/simple-fl/examples/Heart_Disease/val.csv")
        test_df = pd.read_csv("/home/carlos/Documents/8vo/FINAL_SISTEMAS_DISTRIBUIDOS /FL/FINAL/simple-fl/examples/Heart_Disease/test.csv")


        # Preprocesamiento
        train_df = preprocess_data(train_df)
        val_df = preprocess_data(val_df)
        test_df = preprocess_data(test_df)

        # Crear datasets PyTorch
        trainset = TabularDataset(train_df, target_col="diabetes")
        valset = TabularDataset(val_df, target_col="diabetes")
        testset = TabularDataset(test_df, target_col="diabetes")

        # Crear DataLoaders
        self.trainloader = DataLoader(trainset, batch_size=32, shuffle=True)
        self.valloader = DataLoader(valset, batch_size=32, shuffle=False)
        self.testloader = DataLoader(testset, batch_size=32, shuffle=False)

        self.cutoff_threshold = cutoff_th

    def get_random_batch(self, is_train=True) -> Tuple:
        """Devuelve un batch aleatorio para ver datos"""
        loader = self.trainloader if is_train else self.testloader
        features, labels = next(iter(loader))
        return features, labels


In [9]:
def preprocess_data(df: pd.DataFrame) -> pd.DataFrame:
    """
    Preprocesa el dataset tabular:
    - Crea la etiqueta 'diabetes'
    - Escala columnas numéricas
    - Codifica variables categóricas
    - Limpia y devuelve el DataFrame listo para PyTorch
    """
    
    def diabetes_flag(row):
        if row["fasting blood sugar"] == 1 or row["cholesterol"] > 240:
            return 1
        else:
            return 0

    df["diabetes"] = df.apply(diabetes_flag, axis=1)

    scale_cols = ['resting bp s', 'cholesterol','max heart rate']
    for c in scale_cols:
        min_val = df[c].min()
        max_val = df[c].max()
        df[c + "_scaled"] = (df[c] - min_val) / (max_val - min_val)

    df.columns = df.columns.str.strip()
    df = pd.get_dummies(
        df,
        columns=[
            "sex",
            "chest pain type",
            "fasting blood sugar",
            "resting ecg",
            "exercise angina",
            "ST slope",
        ],
        drop_first=True
    )

    df = df.drop(["resting bp s", "cholesterol",'max heart rate'], axis=1)

    df = df.astype(float)

    return df


In [154]:


class Converter:
    _singleton_cvtr = None
    @classmethod
    def covtr(cls):
        if not cls._singleton_cvtr:
            cls._singleton_cvtr = cls()
        return cls._singleton_cvtr
    
    def __init__(self):
        self.order_list=[]

    def convert_nn_to_dict_nparray(self, net) -> Dict[str, np.ndarray]:
        d = {}
        layers = vars(net)['_modules']

        for lname, model in layers.items():
            for i, ws in enumerate(model.parameters()):
                mname = f'{lname}_{i}'
                d[mname] = ws.data.cpu().numpy()
                self.order_list.append(mname)
        return d

    def convert_dict_nparray_to_nn(self, models: Dict[str, np.array]) -> MLP:
        """
        Convierte un diccionario de arrays de NumPy en un modelo CNN (clase Net).
        Se usa para reconstruir el modelo en PyTorch a partir de pesos federados.
        """
        net = MLP()
        layers = vars(net)['_modules']

        # Mantener el orden correcto de las capas
        npa_iter = iter(_order_dict(models, self.order_list))

        # Actualizar cada capa (excepto 'pool', que no tiene parámetros)
        for lname, model in layers.items():
            if lname != 'pool':
                for ws in model.parameters():
                    ws.data = torch.from_numpy(next(npa_iter))

        return net


    def get_model_names(self,net) -> List[str]:

        print("=== Model Layers ===")
        d = self.convert_nn_to_dict_nparray
        print(d.keys())

        return d.keys()

def _order_dict(d:Dict,l:List)->List:
    ordered_vals= list()
    for key in l:
        ordered_vals.append(d[key])
    return ordered_vals

In [112]:
class TrainingMetaData:
    # Número de muestras de entrenamiento usadas por ronda
    num_training_data = 100


def init_models() -> Dict[str, np.ndarray]:
    """
    Retorna el modelo base (sin entrenar) convertido a diccionario numpy.
    """
    input_dim = 17
    net = MLP(input_dim)  # 2 clases para binario
    return Converter.covtr().convert_nn_to_dict_nparray(net)


# -----------------------------------------------------------
# 2️⃣ Entrenamiento local
# -----------------------------------------------------------
def training(models: Dict[str, np.ndarray], init_flag: bool = False) -> Dict[str, np.ndarray]:
    if init_flag:
        DataManager.dm(int(TrainingMetaData.num_training_data / 4))
        return init_models()

    logging.info("--- Entrenamiento local ---")

    # Convertir pesos globales a modelo PyTorch
    net = Converter.covtr().convert_dict_nparray_to_nn(models)

    # Definir pérdida y optimizador
    criterion = nn.BCELoss()
    optimizer = optim.Adam(net.parameters(), lr=0.001)

    # Entrenar localmente
    dm = DataManager.dm()
    for epoch in range(10):
        for X_batch, y_batch in dm.trainloader:
            optimizer.zero_grad()
            outputs = net(X_batch)
            loss = criterion(outputs, y_batch.unsqueeze(1).float())
            loss.backward()
            optimizer.step()

    # Convertir de vuelta a diccionario numpy
    return Converter.covtr().convert_nn_to_dict_nparray(net)


# -----------------------------------------------------------
# 3️⃣ Evaluación de desempeño
# -----------------------------------------------------------
def compute_performance(models: Dict[str, np.ndarray], testdata, is_local: bool) -> float:
    net = Converter.covtr().convert_dict_nparray_to_nn(models)
    net.eval()

    correct = 0
    total = 0
    dm = DataManager.dm()

    with torch.no_grad():
        for X_batch, y_batch in dm.testloader:
            outputs = net(X_batch)
            preds = (outputs > 0.5).int()
            correct += (preds.squeeze() == y_batch).sum().item()
            total += y_batch.size(0)

    acc = float(correct) / total
    mt = "local" if is_local else "global"
    print(f"Accuracy del modelo {mt}: {100 * acc:.2f}%")
    return acc


# -----------------------------------------------------------
# 4️⃣ Criterio de parada
# -----------------------------------------------------------
def judge_termination(training_count: int = 0, gm_arrival_count: int = 0) -> bool:
    return training_count < 5  # Por ejemplo, 5 rondas locales


def prep_test_data():
    return 0



### Modelo red neuronal 

In [11]:
input_dim = 10
model = MLP(input_dim)
x = torch.randn(5, input_dim)

output = model(x)

In [12]:
print("Output shape:", output.shape)
print("Output:", output)

Output shape: torch.Size([5, 1])
Output: tensor([[0.4997],
        [0.4614],
        [0.4945],
        [0.4598],
        [0.5556]], grad_fn=<SigmoidBackward0>)


### Base de datos

In [129]:
data = "/home/carlos/Documents/8vo/FINAL_SISTEMAS_DISTRIBUIDOS /FL/FINAL/simple-fl/examples/Heart_Disease/train.csv"
data = pd.read_csv(data)
df_test = pd.DataFrame(data)


In [130]:
data.head()

Unnamed: 0,age,sex,chest pain type,resting bp s,cholesterol,fasting blood sugar,resting ecg,max heart rate,exercise angina,oldpeak,ST slope,target
0,57,1,4,140,192,0,0,148,0,0.4,2,0
1,54,0,2,140,309,0,1,140,0,0.0,1,0
2,65,0,3,155,269,0,0,148,0,0.8,1,0
3,51,0,4,114,258,1,2,96,0,1.0,1,0
4,40,1,4,110,167,0,2,114,1,2.0,2,1


In [131]:
df_processed = preprocess_data(df_test)

In [132]:
df_processed

Unnamed: 0,age,max heart rate,oldpeak,target,diabetes,resting bp s_scaled,cholesterol_scaled,sex_1,chest pain type_2,chest pain type_3,chest pain type_4,fasting blood sugar_1,resting ecg_1,resting ecg_2,exercise angina_1,ST slope_2,ST slope_3
0,57.0,148.0,0.4,0.0,0.0,0.700,0.340426,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0
1,54.0,140.0,0.0,0.0,1.0,0.700,0.547872,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
2,65.0,148.0,0.8,0.0,1.0,0.775,0.476950,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,51.0,96.0,1.0,0.0,1.0,0.570,0.457447,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0
4,40.0,114.0,2.0,1.0,0.0,0.550,0.296099,1.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
471,64.0,115.0,1.8,1.0,1.0,0.715,0.542553,1.0,0.0,0.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0
472,50.0,139.0,0.7,1.0,1.0,0.725,0.000000,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0
473,53.0,95.0,2.0,1.0,1.0,0.615,0.500000,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0
474,44.0,175.0,0.6,0.0,0.0,0.540,0.250000,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0


In [133]:
df_processed.describe()

Unnamed: 0,age,max heart rate,oldpeak,target,diabetes,resting bp s_scaled,cholesterol_scaled,sex_1,chest pain type_2,chest pain type_3,chest pain type_4,fasting blood sugar_1,resting ecg_1,resting ecg_2,exercise angina_1,ST slope_2,ST slope_3
count,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0,476.0
mean,53.798319,139.987395,0.932983,0.529412,0.567227,0.658088,0.371327,0.722689,0.197479,0.260504,0.493697,0.191176,0.161765,0.252101,0.369748,0.478992,0.069328
std,9.551042,26.491306,1.088226,0.499659,0.495981,0.094716,0.185318,0.448142,0.398516,0.439372,0.500486,0.393641,0.368622,0.434676,0.483244,0.500084,0.254278
min,29.0,60.0,-2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,47.75,120.0,0.0,0.0,0.0,0.6,0.329787,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,54.0,142.0,0.6,1.0,1.0,0.65,0.407801,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,60.0,160.0,1.6,1.0,1.0,0.7,0.48094,1.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,0.0
max,77.0,202.0,6.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


### Convert

In [17]:
imput_dim = 10
model = MLP(imput_dim)

cvtr = Converter.covtr()

In [18]:
model_dict = cvtr.convert_nn_to_dict_nparray(model)
print("Claves del modelo:", model_dict.keys())


Claves del modelo: dict_keys(['fc1_0', 'fc1_1', 'fc2_0', 'fc2_1', 'fc3_0', 'fc3_1'])


In [19]:
for k, v in model_dict.items():
    print(k, v.shape)

fc1_0 (120, 10)
fc1_1 (120,)
fc2_0 (84, 120)
fc2_1 (84,)
fc3_0 (1, 84)
fc3_1 (1,)


In [20]:
reconstructed_model = cvtr.convert_dict_nparray_to_nn(model_dict, input_dim)


In [21]:
reconstructed_model

MLP(
  (fc1): Linear(in_features=10, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [22]:
x = torch.randn(5, input_dim)
x

tensor([[ 0.2596,  1.3616, -0.7547,  0.1685,  0.8518, -0.6781,  0.5046,  1.0661,
          0.9372,  1.3858],
        [-1.1842, -0.1510, -0.5501, -1.7810, -1.7304,  1.5656,  1.3134,  0.1762,
          0.7282,  1.1501],
        [ 0.1941,  1.0835,  0.8856, -0.9603,  0.4595,  1.0766,  0.9131, -1.1584,
         -0.4800, -0.6443],
        [-0.0356, -1.3631, -0.1664, -0.9805, -0.0441,  1.3100,  0.2736, -1.1119,
         -0.7122,  0.2215],
        [ 0.2200, -0.1538, -1.1678, -0.5418, -2.5410,  0.0270,  0.3078, -0.4964,
          2.1995, -0.3102]])

In [23]:
output = reconstructed_model(x)
output

tensor([[0.4997],
        [0.4614],
        [0.4945],
        [0.4598],
        [0.5556]], grad_fn=<SigmoidBackward0>)

In [24]:
preds = (output > 0.5).int()
print(preds)


tensor([[0],
        [0],
        [0],
        [0],
        [1]], dtype=torch.int32)


### TNN_TRAINING

In [134]:
data

Unnamed: 0,age,sex,chest pain type,resting bp s,cholesterol,fasting blood sugar,resting ecg,max heart rate,exercise angina,oldpeak,ST slope,target,diabetes,resting bp s_scaled,cholesterol_scaled
0,57,1,4,140,192,0,0,148,0,0.4,2,0,0,0.700,0.340426
1,54,0,2,140,309,0,1,140,0,0.0,1,0,1,0.700,0.547872
2,65,0,3,155,269,0,0,148,0,0.8,1,0,1,0.775,0.476950
3,51,0,4,114,258,1,2,96,0,1.0,1,0,1,0.570,0.457447
4,40,1,4,110,167,0,2,114,1,2.0,2,1,0,0.550,0.296099
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
471,64,1,4,143,306,1,1,115,1,1.8,2,1,1,0.715,0.542553
472,50,1,4,145,0,1,0,139,1,0.7,2,1,1,0.725,0.000000
473,53,1,4,123,282,0,0,95,1,2.0,2,1,1,0.615,0.500000
474,44,0,3,108,141,0,0,175,0,0.6,2,0,0,0.540,0.250000


Propósito: convertir un pandas.DataFrame en un dataset que PyTorch

In [135]:
dataset = TabularDataset(data,target_col="target")
print(len(dataset))
print(dataset[0])

476
(tensor([ 57.0000,   1.0000,   4.0000, 140.0000, 192.0000,   0.0000,   0.0000,
        148.0000,   0.0000,   0.4000,   2.0000,   0.0000,   0.7000,   0.3404]), tensor(0.))


Propósito: centraliza la carga de datasets y crea DataLoaders para entrenamiento, validación y prueba.

In [136]:
train_df = pd.read_csv("/home/carlos/Documents/8vo/FINAL_SISTEMAS_DISTRIBUIDOS /FL/FINAL/simple-fl/examples/Heart_Disease/train.csv")
val_df = pd.read_csv("/home/carlos/Documents/8vo/FINAL_SISTEMAS_DISTRIBUIDOS /FL/FINAL/simple-fl/examples/Heart_Disease/val.csv")
test_df = pd.read_csv("/home/carlos/Documents/8vo/FINAL_SISTEMAS_DISTRIBUIDOS /FL/FINAL/simple-fl/examples/Heart_Disease/test.csv")

train_df = preprocess_data(train_df)
trainset = TabularDataset(train_df, target_col="diabetes")

trainloader = DataLoader(trainset, batch_size=32, shuffle=True)


In [47]:
X_batch, y_batch = next(iter(trainloader))


### Binary Clasification

In [116]:
input_dim = 17
net = MLP(input_dim)

In [117]:
print("Número de filas en train_df:", len(train_df))
print("Número de columnas en train_df:", train_df.shape[1])


Número de filas en train_df: 476
Número de columnas en train_df: 17


In [58]:
net

MLP(
  (fc1): Linear(in_features=16, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=1, bias=True)
  (dropout): Dropout(p=0.5, inplace=False)
)

In [118]:
DataManager.dm()

<__main__.DataManager at 0x7f864b3ad070>

In [142]:
dm = DataManager.dm()

In [146]:
range(0,len(dm.testloader))

range(0, 1)

In [148]:
#abrir el data .binfile
data = "/home/carlos/Documents/8vo/FINAL_SISTEMAS_DISTRIBUIDOS /FL/FINAL/simple-fl/db/models/ultimo.binaryfile"

In [165]:
import pickle
import numpy as np

with open(data, "rb") as f:
    data = pickle.load(f)

print("✅ Archivo cargado correctamente")
print("Tipo de objeto:", type(data))

if isinstance(data, dict):
    print("Claves del diccionario:")
    for k in data.keys():
        print(" -", k)
else:
    print("Contenido:", data)


✅ Archivo cargado correctamente
Tipo de objeto: <class 'dict'>
Claves del diccionario:
 - fc1_0
 - fc1_1
 - fc2_0
 - fc2_1
 - fc3_0
 - fc3_1


In [168]:
if "accuracy" in data:
    print("Accuracy:", data["accuracy"])

if "loss" in data:
    print("Loss:", data["loss"])

# Si tiene varias rondas de entrenamiento
for key, value in data.items():
    if isinstance(value, np.ndarray):
        print(key, "→", value.shape)


fc1_0 → (120, 16)
fc1_1 → (120,)
fc2_0 → (84, 120)
fc2_1 → (84,)
fc3_0 → (1, 84)
fc3_1 → (1,)


In [162]:
model = MLP()
model = Converter.covtr().convert_dict_nparray_to_nn(data)


StopIteration: 