In [123]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import Normalizer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [124]:
# Loading data from a CSV file
data = pd.read_csv('sensitivity_secret_data.csv')
print('Data is loaded')

Data is loaded


In [125]:
# Extract features and target sensitivity from data
features = data.iloc[:, 3:28370]
sensitivity = data.iloc[:, 2].values
cell_line = data.iloc[:, 0]
drug_id = data.iloc[:, 1]
print('Target sensitivity and features are loaded')

Target sensitivity and features are loaded


In [126]:
# Removing columns with empty or NaN values
features = features.dropna(axis=1, how='all')
all_zeros = (data == 0).all()
columns_to_remove = all_zeros[all_zeros].index
features = features.drop(columns=columns_to_remove)

In [127]:
# Fill empty cells with mean value (now this is NumPy Array)
imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
features = imputer.fit_transform(features)

In [128]:
# Standardization
ss = StandardScaler()
features = ss.fit_transform(features)

In [129]:
# Normalization
norm = Normalizer()
features = norm.fit_transform(features)

In [130]:
#Saving preprocessing data (optional)
np.savetxt ("sensitivity_features_preprocessing.csv", features, delimiter=" , ", fmt=" %.3f ")

In [131]:
# Dividing data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, sensitivity, test_size=0.2, random_state=42)
print('Dataset is splited')

Dataset is splited


In [None]:
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

In [133]:
# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).reshape(-1, 1)
y_test = torch.tensor(y_test, dtype=torch.float32).reshape(-1, 1)
print('Tensors are ready')

Tensors are ready


In [134]:
# Create DataLoader for training and test sets
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
print('Dataloaders are ready')

Dataloaders are ready


In [135]:
# Creating a neural network
class SensitivityPredictor(nn.Module):
    def __init__(self, input_size):
        super(SensitivityPredictor, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 1)  # In this case, the output will be a single number (sensitivity prediction)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x
    
print('NN is ready')

NN is ready


In [136]:
# Initialize the model and select the device (GPU or CPU)
model = SensitivityPredictor(input_size=24156)
model.to(device)

print(f"{device} is ready")

cuda is ready


In [137]:
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
print('Loss criterion and optimizer are ready')

Loss criterion and optimizer are ready


In [138]:
# Model training
num_epochs = 100
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels.view(-1, 1))
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    
    # Display information about the learning process
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}] Loss: {total_loss / len(train_loader)}")

print('\nTraining is finished')

Epoch [10/100] Loss: 0.14316710208853087
Epoch [20/100] Loss: 0.06827719807624817
Epoch [30/100] Loss: 0.035607496990511814
Epoch [40/100] Loss: 0.018515790230594575
Epoch [50/100] Loss: 0.014717162190936505
Epoch [60/100] Loss: 0.012093068131556113
Epoch [70/100] Loss: 0.010930910407720755
Epoch [80/100] Loss: 0.010513006383553148
Epoch [90/100] Loss: 0.011398008562779675
Epoch [100/100] Loss: 0.017419350193813445

Training is finished


In [139]:
# Assessing the accuracy of the model on the test set
model.eval()
true_values = []  # List to store true values
predicted_values = []  # List to store predicted values

with torch.no_grad():
    test_loss = 0.0
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)

        # Adding true and predicted values ​​to lists
        true_values.extend(labels.view(-1).cpu().numpy())
        predicted_values.extend(outputs.view(-1).cpu().numpy())

        test_loss += criterion(outputs, labels.view(-1, 1)).item()

# Calculate RMSE
rmse = np.sqrt(test_loss / len(test_loader))
print(f"Root Mean Square Error (RMSE): {rmse}")

Root Mean Square Error (RMSE): 0.5127344422924256


In [140]:
# Find the maximum and minimum values for true_values and predicted_values
diff_true_values = max(true_values) - min(true_values)
diff_predicted_values = max(predicted_values) - min(predicted_values)

# Print the results
print(f"Difference between the maximum and minimum true_values: {diff_true_values:.3f}")
print(f"Difference between the maximum and minimum predicted_values: {diff_predicted_values:.3f}")

full_length = len(true_values)
your_length = 10 # Enter the value

for i in range(your_length): # Enter full_length or your_length
    print(f"Case_{i + 1}: ({true_values[i]:.3f}, {predicted_values[i]:.3f})")

Difference between the maximum and minimum true_values: 8.452
Difference between the maximum and minimum predicted_values: 6.180
Case_1: (-0.657, -0.286)
Case_2: (0.270, 0.340)
Case_3: (1.194, 0.315)
Case_4: (0.131, 0.577)
Case_5: (0.129, -0.331)
Case_6: (-0.095, -0.142)
Case_7: (0.095, -0.063)
Case_8: (-0.354, -0.309)
Case_9: (0.060, 0.177)
Case_10: (-0.336, -0.353)


In [148]:
# Just another RMSE code
squared_differences = [(a - b) ** 2 for a, b in zip(true_values, predicted_values)]
mean_squared_difference = sum(squared_differences) / len(squared_differences)
rmse = mean_squared_difference ** 0.5

print("RMSE:", rmse)


RMSE: 0.5139206504027659
