In [1]:
# Importing required libraries
import pandas as pd  # For data manipulation and analysis
import numpy as np  # For numerical computations
import torch  # PyTorch library for building and training the neural network
import torch.nn as nn  # For defining neural network modules
from torch.utils.data import Dataset, DataLoader, random_split  # For dataset handling
from sklearn.preprocessing import MinMaxScaler  # For normalizing data
from sklearn.model_selection import train_test_split  # For splitting data into train and test sets
from sklearn.metrics import mean_absolute_error, r2_score  # For evaluating model performance

In [2]:
# # Load the dataset directly from a URL
# url = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/concrete.csv"
# data = pd.read_csv(url)


import pandas as pd

# Load the dataset from the UCI Machine Learning Repository
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls"
data = pd.read_excel(url)

# Display the first few rows
print(data.head())


   Cement (component 1)(kg in a m^3 mixture)  \
0                                      540.0   
1                                      540.0   
2                                      332.5   
3                                      332.5   
4                                      198.6   

   Blast Furnace Slag (component 2)(kg in a m^3 mixture)  \
0                                                0.0       
1                                                0.0       
2                                              142.5       
3                                              142.5       
4                                              132.4       

   Fly Ash (component 3)(kg in a m^3 mixture)  \
0                                         0.0   
1                                         0.0   
2                                         0.0   
3                                         0.0   
4                                         0.0   

   Water  (component 4)(kg in a m^3 mixture)  \
0      

In [3]:
# Display the first few rows of the dataset for inspection
print(data.head())

# Define features (inputs) and target (output)
X = data.iloc[:, :-1].values  # Select all columns except the last one as features
y = data.iloc[:, -1].values   # Select the last column as the target (compressive strength)

# Normalize the feature data to a range of [0, 1] using Min-Max Scaling
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Convert the features and target into PyTorch tensors for model training
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)  # Features as float tensors
y_tensor = torch.tensor(y, dtype=torch.float32).unsqueeze(1)  # Target as float tensors (unsqueeze adds a dimension)

# Split the dataset into training and testing sets using an 80-20 split
dataset = torch.utils.data.TensorDataset(X_tensor, y_tensor)  # Combine features and target into a single dataset
train_size = int(0.8 * len(dataset))  # Calculate the size of the training set
test_size = len(dataset) - train_size  # Calculate the size of the testing set
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])  # Perform the split

# Create data loaders for batch processing during training and testing
batch_size = 32  # Number of samples per batch
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)  # Shuffle training data for randomness
test_loader = DataLoader(test_dataset, batch_size=batch_size)  # No need to shuffle test data



   Cement (component 1)(kg in a m^3 mixture)  \
0                                      540.0   
1                                      540.0   
2                                      332.5   
3                                      332.5   
4                                      198.6   

   Blast Furnace Slag (component 2)(kg in a m^3 mixture)  \
0                                                0.0       
1                                                0.0       
2                                              142.5       
3                                              142.5       
4                                              132.4       

   Fly Ash (component 3)(kg in a m^3 mixture)  \
0                                         0.0   
1                                         0.0   
2                                         0.0   
3                                         0.0   
4                                         0.0   

   Water  (component 4)(kg in a m^3 mixture)  \
0      

In [4]:

# Define the neural network model for regression
class RegressionModel(nn.Module):
    def __init__(self, input_dim):
        """
        Initialize the regression model with fully connected layers and ReLU activation.
        Args:
            input_dim: Number of input features
        """
        super(RegressionModel, self).__init__()  # Initialize the parent class
        self.fc = nn.Sequential(  # Define the model architecture
            nn.Linear(input_dim, 64),  # First layer with 64 neurons
            nn.ReLU(),  # Activation function
            nn.Linear(64, 32),  # Second layer with 32 neurons
            nn.ReLU(),  # Activation function
            nn.Linear(32, 1)  # Output layer with a single neuron for regression
        )

    def forward(self, x):
        """
        Forward pass of the model.
        Args:
            x: Input tensor
        Returns:
            Output tensor
        """
        return self.fc(x)  # Pass input through the defined layers


In [6]:

# Initialize the model, loss function, and optimizer
input_dim = X_tensor.shape[1]  # Determine the number of input features
model = RegressionModel(input_dim)  # Create an instance of the model
criterion = nn.MSELoss()  # Mean Squared Error loss for regression
# optimizer = torch.optim.Adam(model.parameters(), lr=0.01)  # Adam optimizer with a learning rate of 0.01
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=1e-4)




In [7]:
# Train the model for a specified number of epochs
num_epochs = 60  # Number of iterations over the entire dataset
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode
    epoch_loss = 0  # Initialize epoch loss
    for inputs, targets in train_loader:  # Loop over each batch of data
        optimizer.zero_grad()  # Clear gradients from the previous step
        outputs = model(inputs)  # Perform a forward pass
        loss = criterion(outputs, targets)  # Calculate the loss
        loss.backward()  # Backpropagate the gradients
        optimizer.step()  # Update the model parameters
        epoch_loss += loss.item()  # Accumulate the loss for the epoch

    # Print the average loss for the epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss/len(train_loader):.4f}")



Epoch [1/60], Loss: 994.6699
Epoch [2/60], Loss: 266.6437
Epoch [3/60], Loss: 188.4630
Epoch [4/60], Loss: 148.6230
Epoch [5/60], Loss: 127.6841
Epoch [6/60], Loss: 119.7294
Epoch [7/60], Loss: 119.2075
Epoch [8/60], Loss: 117.4699
Epoch [9/60], Loss: 115.1683
Epoch [10/60], Loss: 114.0657
Epoch [11/60], Loss: 112.7265
Epoch [12/60], Loss: 112.7121
Epoch [13/60], Loss: 109.4696
Epoch [14/60], Loss: 109.8536
Epoch [15/60], Loss: 107.9361
Epoch [16/60], Loss: 103.9965
Epoch [17/60], Loss: 100.2339
Epoch [18/60], Loss: 96.4367
Epoch [19/60], Loss: 91.9261
Epoch [20/60], Loss: 89.6992
Epoch [21/60], Loss: 83.8279
Epoch [22/60], Loss: 78.5919
Epoch [23/60], Loss: 73.6360
Epoch [24/60], Loss: 69.2679
Epoch [25/60], Loss: 65.1489
Epoch [26/60], Loss: 65.7871
Epoch [27/60], Loss: 62.2794
Epoch [28/60], Loss: 60.5919
Epoch [29/60], Loss: 56.6947
Epoch [30/60], Loss: 54.7425
Epoch [31/60], Loss: 54.1074
Epoch [32/60], Loss: 52.1582
Epoch [33/60], Loss: 52.3118
Epoch [34/60], Loss: 51.9443
Epoch 

In [8]:
# Evaluate the model on the test set
model.eval()  # Set the model to evaluation mode
with torch.no_grad():  # Disable gradient computation for evaluation
    y_true, y_pred = [], []  # Lists to store true and predicted values
    for inputs, targets in test_loader:  # Loop over the test data batches
        outputs = model(inputs)  # Perform a forward pass
        y_true.extend(targets.numpy())  # Collect true values
        y_pred.extend(outputs.numpy())  # Collect predicted values

# Convert the true and predicted values to NumPy arrays for metric calculation
y_true = np.array(y_true).flatten()  # Flatten the array for compatibility
y_pred = np.array(y_pred).flatten()  # Flatten the array for compatibility

# Calculate evaluation metrics
mae = mean_absolute_error(y_true, y_pred)  # Mean Absolute Error
r2 = r2_score(y_true, y_pred)  # R² score to measure goodness of fit

# Print the evaluation metrics
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R² Score: {r2:.2f}")

# Display some sample predictions for inspection
print("Sample predictions:")
for true, pred in zip(y_true[:10], y_pred[:10]):  # Loop over the first 10 samples
    print(f"True: {true:.2f}, Predicted: {pred:.2f}")

Mean Absolute Error (MAE): 4.89
R² Score: 0.86
Sample predictions:
True: 27.66, Predicted: 33.91
True: 29.93, Predicted: 20.41
True: 56.34, Predicted: 60.94
True: 26.74, Predicted: 34.92
True: 9.01, Predicted: 19.58
True: 50.70, Predicted: 59.17
True: 37.42, Predicted: 40.14
True: 67.70, Predicted: 58.22
True: 31.54, Predicted: 35.83
True: 19.99, Predicted: 24.41


In [10]:
import torch

# Assuming 'model' is your trained PyTorch model
torch.save(model.state_dict(), './model.pth')

print("Model saved successfully as 'model.pth'")


Model saved successfully as 'model.pth'
