In [2]:
import pandas as pd
import torch
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from linear_regressor import LinearKernelRegressor, MSEWithRegularization
from logistic_regression import LogisticRegressor, BCEWithRegularization


# Load and combine training datasets
df1_train = pd.read_csv('/home/srawash@iit.local/federation_sami/federation_models/genomic_regressor/data/client1_data.csv', index_col=0)
df2_train = pd.read_csv('/home/srawash@iit.local/federation_sami/federation_models/genomic_regressor/data/client2_data.csv', index_col=0)
training_df = pd.concat([df1_train, df2_train], axis=0)

# Load and combine validation datasets
df1_val = pd.read_csv('/home/srawash@iit.local/federation_sami/federation_models/genomic_regressor/data/client1_data_val.csv', index_col=0)
df2_val = pd.read_csv('/home/srawash@iit.local/federation_sami/federation_models/genomic_regressor/data/client2_data_val.csv', index_col=0)
validation_df = pd.concat([df1_val, df2_val], axis=0)


In [3]:
training_df = pd.read_csv('/home/srawash@iit.local/federation_sami/ckits/data/training_data_client.csv', index_col=0)
validation_df = pd.read_csv('/home/srawash@iit.local/federation_sami/ckits/data/validation_data_client.csv', index_col=0)

In [6]:

# Process training data
if "pseudo_id" in training_df.columns:
    training_df = training_df.drop('pseudo_id', axis=1)
y_train = training_df['label'].values
X_train = training_df.drop('label', axis=1).values

# Process validation data
if "pseudo_id" in validation_df.columns:
    validation_df = validation_df.drop('pseudo_id', axis=1)
y_val = validation_df['label'].values
X_val = validation_df.drop('label', axis=1).values

# Convert to PyTorch tensors
X_train = torch.FloatTensor(X_train)
y_train = torch.FloatTensor(y_train).reshape(-1, 1)
X_val = torch.FloatTensor(X_val)
y_val = torch.FloatTensor(y_val).reshape(-1, 1)

In [28]:


# Initialize model and optimizer
input_dim = X_train.shape[1]
model = LinearKernelRegressor(input_dim=input_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = MSEWithRegularization(model, lambda_l2=0.1)

# Training loop
num_epochs = 1500
batch_size = 8
n_batches = len(X_train) // batch_size

model.train()
for epoch in range(num_epochs):
    total_loss = 0
    for i in range(n_batches):
        start_idx = i * batch_size
        end_idx = start_idx + batch_size
        
        batch_X = X_train[start_idx:end_idx]
        batch_y = y_train[start_idx:end_idx]
        
        # Forward pass
        outputs = torch.sigmoid(model(batch_X))  # Add sigmoid for logistic regression
        loss = criterion(outputs, batch_y)
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/n_batches:.4f}')

# Evaluation on validation set
model.eval()
with torch.no_grad():
    val_outputs = torch.sigmoid(model(X_val))
    val_predictions = (val_outputs > 0.5).float().numpy()
    
    # Calculate metrics
    accuracy = accuracy_score(y_val, val_predictions)
    precision = precision_score(y_val, val_predictions)
    recall = recall_score(y_val, val_predictions)
    f1 = f1_score(y_val, val_predictions)
    
    print("\nValidation Metrics:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")

Epoch [10/1500], Loss: 1874.0049
Epoch [20/1500], Loss: 1731.8911
Epoch [30/1500], Loss: 1601.5326
Epoch [40/1500], Loss: 1481.6719
Epoch [50/1500], Loss: 1371.2933
Epoch [60/1500], Loss: 1269.5053
Epoch [70/1500], Loss: 1175.5363
Epoch [80/1500], Loss: 1088.7156
Epoch [90/1500], Loss: 1008.4513
Epoch [100/1500], Loss: 934.1940
Epoch [110/1500], Loss: 865.4575
Epoch [120/1500], Loss: 801.8024
Epoch [130/1500], Loss: 742.8306
Epoch [140/1500], Loss: 688.1797
Epoch [150/1500], Loss: 637.5192
Epoch [160/1500], Loss: 590.5470
Epoch [170/1500], Loss: 546.9865
Epoch [180/1500], Loss: 506.5834
Epoch [190/1500], Loss: 469.0982
Epoch [200/1500], Loss: 434.3219
Epoch [210/1500], Loss: 402.0660
Epoch [220/1500], Loss: 372.1345
Epoch [230/1500], Loss: 344.3625
Epoch [240/1500], Loss: 318.6038
Epoch [250/1500], Loss: 294.7053
Epoch [260/1500], Loss: 272.5051
Epoch [270/1500], Loss: 251.9393
Epoch [280/1500], Loss: 232.8670
Epoch [290/1500], Loss: 215.1802
Epoch [300/1500], Loss: 198.7804
Epoch [310

In [29]:
model.weights

Parameter containing:
tensor([[-4.9186e-43],
        [-5.8434e-43],
        [-1.5204e-42],
        ...,
        [ 1.5936e-23],
        [ 3.4080e-12],
        [-5.5912e-43]], requires_grad=True)

In [9]:
# Initialize model and optimizer
input_dim = X_train.shape[1]
model = LogisticRegressor(input_dim=input_dim)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = BCEWithRegularization(model, lambda_l2=0.01)

# Training loop
num_epochs = 1500
batch_size = 8
n_batches = len(X_train) // batch_size

model.train()
for epoch in range(num_epochs):
    total_loss = 0
    for i in range(n_batches):
        start_idx = i * batch_size
        end_idx = start_idx + batch_size
        
        batch_X = X_train[start_idx:end_idx]
        batch_y = y_train[start_idx:end_idx]
        
        # Forward pass
        outputs = model(batch_X)  # No need for extra sigmoid, it's in the model
        loss = criterion(outputs, batch_y)
        
        # Backward pass and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/n_batches:.4f}')

# Evaluation on validation set
model.eval()
with torch.no_grad():
    val_outputs = model(X_val)
    val_predictions = (val_outputs > 0.5).float().numpy()
    
    # Calculate metrics
    accuracy = accuracy_score(y_val, val_predictions)
    precision = precision_score(y_val, val_predictions)
    recall = recall_score(y_val, val_predictions)
    f1 = f1_score(y_val, val_predictions)
    
    print("\nValidation Metrics:")
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")

Epoch [10/1500], Loss: 0.6153
Epoch [20/1500], Loss: 0.5155
Epoch [30/1500], Loss: 0.4362
Epoch [40/1500], Loss: 0.3727
Epoch [50/1500], Loss: 0.3213
Epoch [60/1500], Loss: 0.2794
Epoch [70/1500], Loss: 0.2450
Epoch [80/1500], Loss: 0.2165
Epoch [90/1500], Loss: 0.1927
Epoch [100/1500], Loss: 0.1727
Epoch [110/1500], Loss: 0.1557
Epoch [120/1500], Loss: 0.1413
Epoch [130/1500], Loss: 0.1289
Epoch [140/1500], Loss: 0.1182
Epoch [150/1500], Loss: 0.1089
Epoch [160/1500], Loss: 0.1007
Epoch [170/1500], Loss: 0.0936
Epoch [180/1500], Loss: 0.0873
Epoch [190/1500], Loss: 0.0818
Epoch [200/1500], Loss: 0.0768
Epoch [210/1500], Loss: 0.0724
Epoch [220/1500], Loss: 0.0685
Epoch [230/1500], Loss: 0.0649
Epoch [240/1500], Loss: 0.0617
Epoch [250/1500], Loss: 0.0588
Epoch [260/1500], Loss: 0.0562
Epoch [270/1500], Loss: 0.0539
Epoch [280/1500], Loss: 0.0517
Epoch [290/1500], Loss: 0.0497
Epoch [300/1500], Loss: 0.0479
Epoch [310/1500], Loss: 0.0463
Epoch [320/1500], Loss: 0.0448
Epoch [330/1500],

In [10]:
model.weights

Parameter containing:
tensor([[ 2.9563e-41],
        [-1.3078e-40],
        [-6.9203e-41],
        ...,
        [-3.1228e-40],
        [ 2.5948e-41],
        [ 2.3962e-43]], requires_grad=True)

In [11]:
y_val

tensor([[0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.]])

In [12]:
model.weights

Parameter containing:
tensor([[-4.9186e-43],
        [ 4.1741e-03],
        [-1.5204e-42],
        ...,
        [-1.7426e-33],
        [ 5.2506e-16],
        [-5.5912e-43]], requires_grad=True)

In [13]:
# Assuming the tensor is stored in a variable called 'weights_tensor'
weights = model.weights.detach().numpy().flatten()  # Convert to numpy array and flatten
column_names = [f'col_{i+1}' for i in range(len(weights))]  # Create column names
weights_dict = dict(zip(column_names, weights))

# If you want to print it nicely:
for col, weight in weights_dict.items():
    print(f"{col}: {weight:.6f}")

col_1: -0.000000
col_2: 0.004174
col_3: -0.000000
col_4: 0.000000
col_5: -0.004812
col_6: -0.000000
col_7: 0.000000
col_8: -0.000000
col_9: -0.002675
col_10: -0.000000
col_11: 0.000000
col_12: -0.000000
col_13: -0.009996
col_14: 0.010565
col_15: 0.000000
col_16: -0.000000
col_17: -0.000000
col_18: -0.000000
col_19: 0.000000
col_20: -0.008804
col_21: 0.000000
col_22: -0.000000
col_23: -0.000000
col_24: 0.000000
col_25: -0.000000
col_26: -0.000000
col_27: 0.004236
col_28: -0.000000
col_29: 0.000000
col_30: -0.000000
col_31: -0.000000
col_32: 0.000000
col_33: 0.000000
col_34: -0.000000
col_35: -0.000000
col_36: -0.002694
col_37: -0.000000
col_38: 0.000000
col_39: -0.000000
col_40: -0.000000
col_41: 0.000000
col_42: -0.000000
col_43: -0.000000
col_44: 0.000000
col_45: 0.000000
col_46: 0.000000
col_47: -0.000000
col_48: -0.007235
col_49: 0.000000
col_50: -0.000000
col_51: 0.000000
col_52: 0.000000
col_53: 0.000000
col_54: 0.000000
col_55: 0.000000
col_56: 0.000000
col_57: 0.000000
col_58: -

In [24]:
# Assuming you already have weights_dict
for i in range(100, 131):
    col_name = f'col_{i}'
    if col_name in weights_dict:
        print(f"{col_name}: {weights_dict[col_name]:.6f}")

col_100: -0.087976
col_101: -0.000000
col_102: 0.017052
col_103: 0.033459
col_104: 0.000000
col_105: 0.030361
col_106: -0.028774
col_107: -0.056288
col_108: -0.024931
col_109: 0.012022
col_110: -0.049363
col_111: -0.002585
col_112: -0.039030
col_113: 0.061945
col_114: -0.155136
col_115: 0.161254
col_116: -0.175911
col_117: -0.000000
col_118: -0.091010
col_119: -0.027014
col_120: 0.028355
col_121: 0.033250
col_122: 0.000319
col_123: 0.061628
col_124: -0.016234
col_125: 0.113418
col_126: 0.055467
col_127: 0.024868
col_128: -0.151788
col_129: -0.109707
col_130: -0.033626


In [14]:
import numpy as np
# Assuming weights_tensor is your original tensor
weights = model.weights.detach().numpy().flatten()  # Convert to numpy array and flatten

# Calculate sums for different ranges
sum_0_100 = weights[0:100].sum()
sum_101_120 = weights[100:120].sum()
sum_121_end = weights[120:].sum()

print(f"Sum of weights 1-100: {sum_0_100:.6f}")
print(f"Sum of weights 101-120: {sum_101_120:.6f}")
print(f"Sum of weights 121-end: {sum_121_end:.6f}")

# You can also print the absolute sums if you want to see magnitude regardless of sign
abs_sum_0_100 = np.abs(weights[0:100]).sum()
abs_sum_101_120 = np.abs(weights[100:120]).sum()
abs_sum_121_end = np.abs(weights[120:]).sum()

print("\nAbsolute sums:")

Sum of weights 1-100: 0.125440
Sum of weights 101-120: 0.017722
Sum of weights 121-end: 0.089618

Absolute sums:
