In [None]:
!pip install tensorflow matplotlib numpy scikit-learn seaborn nltk opencv-python

In [None]:
# Implementing Feedforward neural networks with Keras and TensorFlow
# a. Import the necessary packages
# b. Load the training and testing data (MNIST/CIFAR10)
# c. Define the network architecture using Keras
# d. Train the model using SGD
# e. Evaluate the network
# f. Plot the training loss and accuracy

In [None]:
# a. Import the necessary packages final accuracy should increase with epochs 100 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.utils import to_categorical
import warnings
warnings.filterwarnings('ignore')

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# b. Load the training and testing data
print("Loading data...")
train_data = pd.read_csv('train_data.csv')
test_data = pd.read_csv('test_data.csv')

print(f"Training data shape: {train_data.shape}")
print(f"Testing data shape: {test_data.shape}")

# Separate features and labels
X_train = train_data.drop('label', axis=1).values
y_train = train_data['label'].values
X_test = test_data.drop('label', axis=1).values
y_test = test_data['label'].values

print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

# Check unique labels
unique_labels = np.unique(y_train)
print(f"Unique labels: {unique_labels}")
print(f"Number of classes: {len(unique_labels)}")

# Normalize pixel values to [0, 1]
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# One-hot encode the labels
num_classes = len(unique_labels)
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

print(f"y_train shape after encoding: {y_train.shape}")
print(f"y_test shape after encoding: {y_test.shape}")

# c. Define the network architecture using Keras
def create_model(optimizer='adam', learning_rate=0.001):
    model = Sequential([
        Dense(512, activation='relu', input_shape=(3072,)),
        Dropout(0.3),
        Dense(256, activation='relu'),
        Dropout(0.3),
        Dense(128, activation='relu'),
        Dropout(0.2),
        Dense(num_classes, activation='softmax')
    ])
    
    # Choose optimizer
    if optimizer.lower() == 'sgd':
        opt = SGD(learning_rate=learning_rate, momentum=0.9)
    else:
        opt = Adam(learning_rate=learning_rate)
    
    model.compile(
        optimizer=opt,
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

# Create model with Adam optimizer
print("Creating model with Adam optimizer...")
model_adam = create_model(optimizer='adam', learning_rate=0.001)
model_adam.summary()

# Create model with SGD optimizer for comparison
print("Creating model with SGD optimizer...")
model_sgd = create_model(optimizer='sgd', learning_rate=0.01)

# d. Train the model using SGD/Adam optimizer
print("Training models...")

# Training parameters
batch_size = 32
epochs = 10

# Callbacks for early stopping and reducing learning rate
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=15,
        restore_best_weights=True
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=10,
        min_lr=1e-7
    )
]

# Train with Adam optimizer
print("Training with Adam optimizer...")
history_adam = model_adam.fit(
    X_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_test, y_test),
    callbacks=callbacks,
    verbose=1
)

# Train with SGD optimizer
print("Training with SGD optimizer...")
history_sgd = model_sgd.fit(
    X_train, y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(X_test, y_test),
    callbacks=callbacks,
    verbose=1
)

# e. Evaluate the network
print("Evaluating models...")

# Evaluate Adam model
test_loss_adam, test_accuracy_adam = model_adam.evaluate(X_test, y_test, verbose=0)
print(f"Adam Optimizer - Test Loss: {test_loss_adam:.4f}, Test Accuracy: {test_accuracy_adam:.4f}")

# Evaluate SGD model
test_loss_sgd, test_accuracy_sgd = model_sgd.evaluate(X_test, y_test, verbose=0)
print(f"SGD Optimizer - Test Loss: {test_loss_sgd:.4f}, Test Accuracy: {test_accuracy_sgd:.4f}")

# Make predictions
y_pred_adam = model_adam.predict(X_test)
y_pred_classes_adam = np.argmax(y_pred_adam, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

# Classification report
print("\nClassification Report (Adam Optimizer):")
print(classification_report(y_true_classes, y_pred_classes_adam))

# Confusion matrix
cm = confusion_matrix(y_true_classes, y_pred_classes_adam)
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix - Adam Optimizer')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# f. Plot the training loss and accuracy
def plot_training_history(history_adam, history_sgd):
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Adam optimizer plots
    axes[0, 0].plot(history_adam.history['loss'], label='Training Loss')
    axes[0, 0].plot(history_adam.history['val_loss'], label='Validation Loss')
    axes[0, 0].set_title('Adam Optimizer - Loss')
    axes[0, 0].set_xlabel('Epoch')
    axes[0, 0].set_ylabel('Loss')
    axes[0, 0].legend()
    axes[0, 0].grid(True)
    
    axes[0, 1].plot(history_adam.history['accuracy'], label='Training Accuracy')
    axes[0, 1].plot(history_adam.history['val_accuracy'], label='Validation Accuracy')
    axes[0, 1].set_title('Adam Optimizer - Accuracy')
    axes[0, 1].set_xlabel('Epoch')
    axes[0, 1].set_ylabel('Accuracy')
    axes[0, 1].legend()
    axes[0, 1].grid(True)
    
    # SGD optimizer plots
    axes[1, 0].plot(history_sgd.history['loss'], label='Training Loss')
    axes[1, 0].plot(history_sgd.history['val_loss'], label='Validation Loss')
    axes[1, 0].set_title('SGD Optimizer - Loss')
    axes[1, 0].set_xlabel('Epoch')
    axes[1, 0].set_ylabel('Loss')
    axes[1, 0].legend()
    axes[1, 0].grid(True)
    
    axes[1, 1].plot(history_sgd.history['accuracy'], label='Training Accuracy')
    axes[1, 1].plot(history_sgd.history['val_accuracy'], label='Validation Accuracy')
    axes[1, 1].set_title('SGD Optimizer - Accuracy')
    axes[1, 1].set_xlabel('Epoch')
    axes[1, 1].set_ylabel('Accuracy')
    axes[1, 1].legend()
    axes[1, 1].grid(True)
    
    plt.tight_layout()
    plt.show()

# Plot training history
plot_training_history(history_adam, history_sgd)

# Compare final performance
optimizers = ['Adam', 'SGD']
test_accuracies = [test_accuracy_adam, test_accuracy_sgd]
test_losses = [test_loss_adam, test_loss_sgd]

plt.figure(figsize=(10, 6))
x = np.arange(len(optimizers))
width = 0.35

fig, ax = plt.subplots(figsize=(10, 6))
rects1 = ax.bar(x - width/2, test_accuracies, width, label='Accuracy', color='skyblue')
rects2 = ax.bar(x + width/2, test_losses, width, label='Loss', color='lightcoral')

ax.set_xlabel('Optimizer')
ax.set_ylabel('Score')
ax.set_title('Model Performance Comparison')
ax.set_xticks(x)
ax.set_xticklabels(optimizers)
ax.legend()

# Add value labels on bars
def autolabel(rects):
    for rect in rects:
        height = rect.get_height()
        ax.annotate(f'{height:.4f}',
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),
                    textcoords="offset points",
                    ha='center', va='bottom')

autolabel(rects1)
autolabel(rects2)

plt.tight_layout()
plt.show()

# Print final comparison
print("\n" + "="*50)
print("FINAL MODEL COMPARISON")
print("="*50)
print(f"Adam Optimizer:")
print(f"  - Final Test Accuracy: {test_accuracy_adam:.4f}")
print(f"  - Final Test Loss: {test_loss_adam:.4f}")
print(f"  - Training Epochs: {len(history_adam.history['loss'])}")

print(f"\nSGD Optimizer:")
print(f"  - Final Test Accuracy: {test_accuracy_sgd:.4f}")
print(f"  - Final Test Loss: {test_loss_sgd:.4f}")
print(f"  - Training Epochs: {len(history_sgd.history['loss'])}")

# Save the best model
if test_accuracy_adam > test_accuracy_sgd:
    best_model = model_adam
    best_optimizer = "Adam"
    best_accuracy = test_accuracy_adam
else:
    best_model = model_sgd
    best_optimizer = "SGD"
    best_accuracy = test_accuracy_sgd

print(f"\nBest model: {best_optimizer} Optimizer with accuracy: {best_accuracy:.4f}")

# Save the best model
best_model.save('best_cifar10_model.h5')
print("Best model saved as 'best_cifar10_model.h5'")

In [None]:
# a. Import the necessary packages loading dataset without keras will work locally with csv files
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

# b. Load the training and testing data from CSV files
def load_mnist_from_csv(train_file, test_file):
    """
    Load MNIST data from CSV files
    First column is label, remaining 784 columns are pixel values
    """
    print("Loading training data...")
    train_data = pd.read_csv(train_file)
    print("Loading test data...")
    test_data = pd.read_csv(test_file)
    
    # Extract labels (first column) and images (remaining columns)
    y_train = train_data.iloc[:, 0].values
    x_train = train_data.iloc[:, 1:].values
    
    y_test = test_data.iloc[:, 0].values
    x_test = test_data.iloc[:, 1:].values
    
    print(f"Training data shape: {x_train.shape}")
    print(f"Training labels shape: {y_train.shape}")
    print(f"Test data shape: {x_test.shape}")
    print(f"Test labels shape: {y_test.shape}")
    
    return (x_train, y_train), (x_test, y_test)

# Load data from CSV files
(x_train, y_train), (x_test, y_test) = load_mnist_from_csv('mnist_train.csv', 'mnist_test.csv')

# Normalize pixel values to range [0, 1]
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

# Note: Data is already in (784,) shape from CSV, no need to reshape
print(f"\nAfter normalization:")
print(f"Training data range: [{x_train.min():.3f}, {x_train.max():.3f}]")
print(f"Test data range: [{x_test.min():.3f}, {x_test.max():.3f}]")

# Convert labels to categorical one-hot encoding
y_train_categorical = keras.utils.to_categorical(y_train, 10)
y_test_categorical = keras.utils.to_categorical(y_test, 10)

print(f"\nAfter one-hot encoding:")
print(f"Training labels shape: {y_train_categorical.shape}")
print(f"Test labels shape: {y_test_categorical.shape}")

# Display sample data information
print(f"\nSample labels from training set: {y_train[:10]}")
print(f"Unique labels in training set: {np.unique(y_train)}")
print(f"Label distribution in training set: {np.bincount(y_train)}")

# c. Define the network architecture using Keras
model = keras.Sequential([
    layers.Dense(512, activation='relu', input_shape=(784,)),
    layers.Dropout(0.2),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(10, activation='softmax')
])

# Print model summary
print("\nModel Architecture:")
model.summary()

# Compile the model
model.compile(
    optimizer=keras.optimizers.SGD(learning_rate=0.01),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# d. Train the model using SGD with 11 epochs
print("\nStarting training...")
history = model.fit(
    x_train, y_train_categorical,
    batch_size=128,
    epochs=11,
    validation_data=(x_test, y_test_categorical),
    verbose=1
)

# e. Evaluate the network
test_loss, test_accuracy = model.evaluate(x_test, y_test_categorical, verbose=0)
print(f"\nTest Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

# f. Plot the training loss and accuracy
plt.figure(figsize=(15, 5))

# Plot training & validation loss
plt.subplot(1, 3, 1)
plt.plot(history.history['loss'], label='Training Loss', linewidth=2)
plt.plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True, alpha=0.3)

# Plot training & validation accuracy
plt.subplot(1, 3, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy', linewidth=2)
plt.plot(history.history['val_accuracy'], label='Validation Accuracy', linewidth=2)
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True, alpha=0.3)

# Additional: Make some predictions and display sample results
# Get predictions for test set
predictions = model.predict(x_test, verbose=0)
predicted_classes = np.argmax(predictions, axis=1)
true_classes = y_test  # Use original labels, not one-hot

# Display some sample predictions
plt.subplot(1, 3, 3)
# Show first 12 test images with predictions
for i in range(12):
    plt.subplot(3, 4, i + 1)
    # Reshape to 28x28 for display
    plt.imshow(x_test[i].reshape(28, 28), cmap='gray')
    plt.title(f'True: {true_classes[i]}\nPred: {predicted_classes[i]}', fontsize=8)
    plt.axis('off')

plt.suptitle('Sample Predictions on Test Set', fontsize=14, fontweight='bold')
plt.tight_layout()
plt.show()

# Print confusion matrix
plt.figure(figsize=(10, 8))
cm = confusion_matrix(true_classes, predicted_classes)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
plt.title('Confusion Matrix', fontsize=14, fontweight='bold')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.tight_layout()
plt.show()

# Print classification report
print("\n" + "="*50)
print("CLASSIFICATION REPORT")
print("="*50)
print(classification_report(true_classes, predicted_classes, digits=4))

# Additional: Display some misclassified examples
misclassified_indices = np.where(predicted_classes != true_classes)[0]

if len(misclassified_indices) > 0:
    print(f"\nNumber of misclassified samples: {len(misclassified_indices)}")
    print(f"Error rate: {len(misclassified_indices)/len(true_classes)*100:.2f}%")
    
    # Show some misclassified examples
    plt.figure(figsize=(12, 6))
    num_misclassified_to_show = min(12, len(misclassified_indices))
    
    for i, idx in enumerate(misclassified_indices[:num_misclassified_to_show]):
        plt.subplot(3, 4, i + 1)
        plt.imshow(x_test[idx].reshape(28, 28), cmap='gray')
        confidence = np.max(predictions[idx])
        plt.title(f'True: {true_classes[idx]}\nPred: {predicted_classes[idx]}\nConf: {confidence:.3f}', 
                 fontsize=8, color='red')
        plt.axis('off')
    
    plt.suptitle('Misclassified Examples', fontsize=14, fontweight='bold', color='red')
    plt.tight_layout()
    plt.show()
else:
    print("\nNo misclassified samples! Perfect accuracy!")

# Save the model
model.save('mnist_feedforward_model.h5')
print(f"\nModel saved as 'mnist_feedforward_model.h5'")

# Final summary
print("\n" + "="*60)
print("TRAINING SUMMARY")
print("="*60)
print(f"Final Test Accuracy: {test_accuracy:.4f}")
print(f"Final Test Loss: {test_loss:.4f}")
print(f"Training Samples: {len(x_train):,}")
print(f"Test Samples: {len(x_test):,}")
print(f"Model Parameters: {model.count_params():,}")
print("="*60)