In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [3]:
data = pd.read_csv("7.csv")

In [4]:
data["col0_encoded"] = data.iloc[:, 0].astype("category").cat.codes + 1

In [7]:
X = data.iloc[:, 1:]
y = data.iloc[:, 0]

train_data, temp_data = train_test_split(data, test_size=0.4, stratify=data.iloc[:, -1], random_state=42)
val_data, test_data = train_test_split(temp_data, test_size=0.5, stratify=temp_data.iloc[:, -1], random_state=42)

In [8]:
train_data.reset_index(drop=True, inplace=True)
test_data.reset_index(drop=True, inplace=True)
val_data.reset_index(drop=True, inplace=True)

In [9]:
# Define features (last 3 columns excluding the target)
X_train = train_data.iloc[:, -4:-1]
X_val = val_data.iloc[:, -4:-1]
X_test = test_data.iloc[:, -4:-1]

# Define target (last column)
y_train = train_data.iloc[:, -1]
y_val = val_data.iloc[:, -1]
y_test = test_data.iloc[:, -1]

In [10]:
# ✅ Standardize the Data
scaler = StandardScaler()
X_train_scaled = torch.tensor(scaler.fit_transform(X_train), dtype=torch.float32)
X_val_scaled = torch.tensor(scaler.transform(X_val), dtype=torch.float32)

# ✅ Convert Labels (zero-indexed for PyTorch)
y_train_tensor = torch.tensor(y_train.values - 1, dtype=torch.long)
y_val_tensor = torch.tensor(y_val.values - 1, dtype=torch.long)

# ✅ Create DataLoaders
train_dataset = TensorDataset(X_train_scaled, y_train_tensor)
val_dataset = TensorDataset(X_val_scaled, y_val_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# ✅ Define the Neural Network
class FishClassifier(nn.Module):
    def __init__(self, input_size, hidden_size=256, num_classes=9):
        super(FishClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.bn1 = nn.BatchNorm1d(hidden_size)
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.bn2 = nn.BatchNorm1d(hidden_size)
        self.fc3 = nn.Linear(hidden_size, hidden_size // 2)
        self.fc4 = nn.Linear(hidden_size // 2, num_classes)
        self.dropout = nn.Dropout(p=0.4)

    def forward(self, x):
        x = F.relu(self.bn1(self.fc1(x)))
        x = self.dropout(x)
        x = F.relu(self.bn2(self.fc2(x)))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

# ✅ Initialize Model, Loss, Optimizer
input_dim = X_train.shape[1]
model = FishClassifier(input_size=input_dim)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.005)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5)

# ✅ Training Loop
num_epochs = 200
train_loss_list = []
val_acc_list = []

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    scheduler.step()
    avg_loss = total_loss / len(train_loader)
    train_loss_list.append(avg_loss)

    # ✅ Validation
    model.eval()
    correct, total = 0, 0
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            outputs = model(X_batch)
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
            all_preds.extend(predicted.cpu().numpy())
            all_targets.extend(y_batch.cpu().numpy())
    
    acc = correct / total
    val_acc_list.append(acc)
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}, Validation Accuracy: {acc:.4f}")

# ✅ Save Model
torch.save(model.state_dict(), "fish_classifier.pth")

# ✅ Classification Report
print("\nClassification Report:")
print(classification_report(all_targets, all_preds, digits=4))

# ✅ Confusion Matrix
cm = confusion_matrix(all_targets, all_preds)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.savefig('nn_confusion_matrix.png', dpi=300)
plt.close()
print("Confusion matrix saved as nn_confusion_matrix.png")

# ✅ Metrics
acc_score = accuracy_score(all_targets, all_preds)
precision = precision_score(all_targets, all_preds, average='weighted')
recall = recall_score(all_targets, all_preds, average='weighted')
f1 = f1_score(all_targets, all_preds, average='weighted')
print(f"\nFinal Metrics:")
print(f"✅ Accuracy:  {acc_score:.4f}")
print(f"✅ Precision: {precision:.4f}")
print(f"✅ Recall:    {recall:.4f}")
print(f"✅ F1 Score:  {f1:.4f}")

# ✅ Plot Accuracy & Loss Over Epochs
plt.figure(figsize=(10, 6))
plt.plot(val_acc_list, label="Validation Accuracy", color='blue')
plt.xlabel("Epochs")
plt.ylabel("Accuracy")
plt.title("Validation Accuracy Over Epochs")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("nn_val_accuracy.png", dpi=300)
plt.close()

plt.figure(figsize=(10, 6))
plt.plot(train_loss_list, label="Training Loss", color='red')
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training Loss Over Epochs")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.savefig("nn_train_loss.png", dpi=300)
plt.close()
print("Training and validation plots saved.")

# ⚠️ Optional: Plot Decision Boundary (only if features = 2)
if X_train.shape[1] == 2:
    def plot_decision_boundary(model, X, y, filename='nn_decision_boundary.png'):
        x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
        y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
        xx, yy = np.meshgrid(np.linspace(x_min, x_max, 300),
                             np.linspace(y_min, y_max, 300))
        grid = torch.tensor(np.c_[xx.ravel(), yy.ravel()], dtype=torch.float32)
        with torch.no_grad():
            Z = model(grid)
            _, preds = torch.max(Z, 1)
        Z = preds.reshape(xx.shape).numpy()

        plt.figure(figsize=(10, 8))
        plt.contourf(xx, yy, Z, alpha=0.6, cmap=plt.cm.RdYlBu)
        scatter = plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.RdYlBu, edgecolor='k')
        plt.colorbar(scatter)
        plt.title("Neural Network Decision Boundary")
        plt.savefig(filename, dpi=300)
        plt.close()
        print(f"Decision boundary saved as {filename}")

    plot_decision_boundary(model, X_val_scaled.numpy(), y_val_tensor.numpy())


Epoch [1/200], Loss: 0.4958, Validation Accuracy: 0.9252
Epoch [2/200], Loss: 0.4035, Validation Accuracy: 0.8811
Epoch [3/200], Loss: 0.3549, Validation Accuracy: 0.8897
Epoch [4/200], Loss: 0.3128, Validation Accuracy: 0.8946
Epoch [5/200], Loss: 0.3146, Validation Accuracy: 0.9044
Epoch [6/200], Loss: 0.2911, Validation Accuracy: 0.9130
Epoch [7/200], Loss: 0.2734, Validation Accuracy: 0.8946
Epoch [8/200], Loss: 0.2618, Validation Accuracy: 0.9142
Epoch [9/200], Loss: 0.2517, Validation Accuracy: 0.9118
Epoch [10/200], Loss: 0.2609, Validation Accuracy: 0.9093
Epoch [11/200], Loss: 0.2778, Validation Accuracy: 0.9191
Epoch [12/200], Loss: 0.2399, Validation Accuracy: 0.9179
Epoch [13/200], Loss: 0.2227, Validation Accuracy: 0.9105
Epoch [14/200], Loss: 0.2961, Validation Accuracy: 0.8897
Epoch [15/200], Loss: 0.2402, Validation Accuracy: 0.9203
Epoch [16/200], Loss: 0.2682, Validation Accuracy: 0.8860
Epoch [17/200], Loss: 0.2471, Validation Accuracy: 0.8811
Epoch [18/200], Loss: 0

In [14]:
from sklearn.naive_bayes import GaussianNB

# ✅ Initialize and Train
nb_model = GaussianNB()
nb_model.fit(X_train, y_train - 1)

# ✅ Predict
y_pred_nb = nb_model.predict(X_val)

# ✅ Evaluation
print("🤖 Naive Bayes (scikit-learn) Results:")
print(classification_report(y_val - 1, y_pred_nb, digits=4))

# ✅ Confusion Matrix
plt.figure(figsize=(10, 8))
sns.heatmap(confusion_matrix(y_val - 1, y_pred_nb), annot=True, fmt='d', cmap='Oranges')
plt.title("Naive Bayes - Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.tight_layout()
plt.savefig("naive_bayes_confusion_matrix.png", dpi=300)
plt.close()


🤖 Naive Bayes (scikit-learn) Results:
              precision    recall  f1-score   support

           0     1.0000    1.0000    1.0000        95
           1     1.0000    1.0000    1.0000        83
           2     0.7424    0.5213    0.6125        94
           3     1.0000    1.0000    1.0000        87
           4     1.0000    1.0000    1.0000        95
           5     1.0000    1.0000    1.0000        91
           6     1.0000    1.0000    1.0000        84
           7     0.6371    0.8229    0.7182        96
           8     1.0000    1.0000    1.0000        91

    accuracy                         0.9240       816
   macro avg     0.9311    0.9271    0.9256       816
weighted avg     0.9276    0.9240    0.9222       816

