<a href="https://colab.research.google.com/github/hadywalied/numerical_mahdy/blob/master/ANN_ISO_DATA01_SAVED.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Install necessary packages using pip.
# Uncomment: !pip install torch wandb numpy pandas matplotlib

# Import required libraries.
import torch
import pandas as pd  # Import the Pandas library for data manipulation.
from torch.utils.data import Dataset
import numpy as np
import math

from torch.utils.data import DataLoader
import torch.nn as nn  # Import PyTorch's neural network module.
from torch.autograd import Variable  # Import PyTorch's autograd module for automatic differentiation.
import matplotlib.pyplot as plt  # Import Matplotlib for creating plots.
%matplotlib inline  # This line is a Jupyter Notebook magic command for inline plotting.


# Print the version of PyTorch (incorrect usage of the torch.version module, corrected below).
print(torch.__version__)

# Note: The following line is commented out because it's a command to clone a GitHub repository, not Python code.
# You can uncomment and run it in your terminal or command prompt if needed.
# ! git clone https://github.com/hadywalied/numerical_mahdy


2.0.1+cu118


In [3]:
import pandas as pd  # Import the Pandas library for data manipulation.

# Define the input file name as a variable for easy modification.
input_file = 'output_results.csv'

# Read the CSV data from the specified input file into a Pandas DataFrame.
csv_data = pd.read_csv(input_file)

# Display the first few rows of the DataFrame to inspect the data.
csv_data.head()

FileNotFoundError: ignored

In [None]:
class MyDataset(Dataset):
    def __init__(self, file_name, train_test_ratio=0.9, test=False):
        # Read the CSV file into a Pandas DataFrame.
        _df = pd.read_csv(file_name)

        if test:
            # Calculate the length of data for the test set.
            data_len = math.floor((1 - train_test_ratio) * len(_df.iloc[:, 0]))
        else:
            # Calculate the length of data for the training set.
            data_len = math.floor(train_test_ratio * len(_df.iloc[:, 0]))

        # Extract input features (x) and target values (y) from the DataFrame.
        x = _df.iloc[:data_len, 1:-2].values
        y = _df.iloc[:data_len, -2:].values

        # Convert the NumPy arrays to PyTorch tensors with float32 data type.
        self.x_data = torch.tensor(x, dtype=torch.float32)
        self.y_data = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        # Return the number of samples in the dataset (length of y_data).
        return np.shape(self.y_data)[0]

    def __getitem__(self, idx):
        # Return a tuple containing the input data (x_data) and target data (y_data) for a given index (idx).
        return self.x_data[idx], self.y_data[idx]

In [None]:
# Create a training dataset instance using the MyDataset class and the input_file.
train_dataset = MyDataset(input_file)

# Create a testing dataset instance using the MyDataset class, specifying 'test=True'.
test_dataset = MyDataset(input_file, test=True)

# Creating Model Architecture

In [None]:
'''torch.manual_seed(1): This function call sets the random seed of the PyTorch library to 1.
When random numbers are generated using PyTorch functions, they will be generated in a deterministic manner, always producing the same sequence of random numbers if the random seed is not changed.
This is useful for reproducibility in machine learning experiments.'''

torch.manual_seed(1) # Reproducible

In [None]:
'''EPOCH = No. if EPOCHs: This constant defines the number of training epochs. An epoch is one complete pass through the entire training dataset during the training process.
In this case, the model will be trained for 100 epochs, meaning it will see the entire training dataset 100 times during training.'''

'''BATCH_SIZE = 37: This constant defines the batch size used in mini-batch gradient descent. During training, the dataset is divided into smaller batches, each containing 37 data samples.
The model's parameters are updated after processing each batch. Using mini-batches can speed up training and make it more memory-efficient compared to processing the entire dataset at once.'''

EPOCH = 100
BATCH_SIZE = 37
LR = 0.001

In [None]:
# Create a DataLoader for the training dataset.
# - train_dataset: The dataset containing training samples.
# - batch_size: The batch size for mini-batch training.
# - shuffle: Whether to shuffle the dataset before each epoch (False in this case).
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Create a DataLoader for the test dataset (similar to the training DataLoader).
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)


In [None]:
# Get the next batch of training data and labels using the DataLoader.
# `iter(train_loader)` creates an iterator for the DataLoader, and `next(iter(...))` gets the next batch.
train_features, train_labels = next(iter(train_loader))

# Print the shape (dimensions) of the batch of training features.
print(f"Feature batch shape: {train_features.size()}")

# Print the shape (dimensions) of the batch of training labels.
print(f"Labels batch shape: {train_labels.size()}")


The code below defines a neural network model for multiple linear regression. It has an input layer with 6 features, two hidden layers with 6 neurons each, and an output layer with the specified output dimension. The Xavier initialization method is used to initialize the weights, and the forward method defines the forward pass of the network.

In [None]:
import torch
import torch.nn as nn

class MultipleLinearRegression(nn.Module):
    def __init__(self, input_dim=6, output_dim=2):
        super(MultipleLinearRegression, self).__init__()
        # Define the layers of the neural network.
        self.hidden1 = nn.Linear(input_dim, 6)  # First hidden layer with 6 neurons.
        self.hidden2 = nn.Linear(6, 6)           # Second hidden layer with 6 neurons.
        self.output = nn.Linear(6, output_dim)   # Output layer with the specified output dimension.

        # Initialize the weights and biases using Xavier initialization.
        nn.init.xavier_uniform_(self.hidden1.weight)
        nn.init.zeros_(self.hidden1.bias)
        nn.init.xavier_uniform_(self.hidden2.weight)
        nn.init.zeros_(self.hidden2.bias)
        nn.init.xavier_uniform_(self.output.weight)
        nn.init.zeros_(self.output.bias)

    def forward(self, x):
        # Define the forward pass of the neural network.
        z = torch.sigmoid(self.hidden1(x))  # Apply sigmoid activation to the first hidden layer.
        z = torch.sigmoid(self.hidden2(z))  # Apply sigmoid activation to the second hidden layer.
        z = self.output(z)                   # Output layer with no activation function.
        return z


In [None]:
MLR_model = MultipleLinearRegression(6,2)
print("The parameters: ", list(MLR_model.parameters()))

In [None]:
# Create an AdamW optimizer for model parameter optimization with specified learning rate (LR)
optimizer = torch.optim.AdamW(MLR_model.parameters(), lr=LR)

# Define the Mean Squared Error (MSE) loss criterion for regression tasks
criterion = torch.nn.MSELoss()



In [None]:
# Define the input data as a tensor (make sure the values are floats)
x = torch.tensor([8.0, 6.0, 10.0, 1.0, 16.0, 100.0], dtype=torch.float32)

# Use the model to make predictions (NO TRAINING YET)
y_pred = MLR_model(x)

# Print the predicted values
print(y_pred)

# Model Training

In [None]:
# Initialize a list to store training losses
t_losses = []

# Loop over the specified number of epochs
for epoch in range(EPOCH):
    # Loop over batches of training data
    for i, (inputs, targets) in enumerate(train_loader):
        # Zero the gradients to avoid accumulation
        optimizer.zero_grad()

        # Forward pass: compute model predictions
        outputs = MLR_model(inputs)

        # Calculate the loss between model predictions and actual targets
        loss = criterion(outputs, targets)

        # Append the current loss value to the list of training losses
        t_losses.append(loss.item())

        # Backward pass: compute gradients of the loss with respect to model parameters
        loss.backward()

        # Update the model's parameters using the optimizer
        optimizer.step()
        # scheduler.step()  # Optionally adjust the learning rate

        # Print training statistics every 10 batches
        if (i + 1) % 10 == 0:
            print(f'Epoch [{epoch + 1}/{EPOCH}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}')


# Model Testing

In [None]:
# Use torch.no_grad() to temporarily disable gradient tracking to save memory
with torch.no_grad():
    # Initialize a variable to store the total test loss
    total_loss = 0

    # Loop over batches of test data from test_loader
    for inputs, targets in test_loader:
        # Compute model predictions for the current batch
        outputs = MLR_model(inputs)

        # Calculate the loss between model predictions and actual targets
        loss = criterion(outputs, targets)

        # Add the current loss to the total test loss
        total_loss += loss.item()

    # Calculate the mean test loss over all test batches
    mean_loss = total_loss / len(test_loader)

    # Print the mean test loss
    print(f'Test Loss: {mean_loss:.4f}')

# Plots

In [None]:
# Plot the training losses stored in t_losses
plt.plot(t_losses)  # Plotting the list of training losses

# Add labels to the x and y axes
plt.xlabel("Number of Iterations")  # Label for the x-axis
plt.ylabel("Total Loss")  # Label for the y-axis

# Display the plot
plt.show()  # Show the generated plot

# Model Validation