# Digit Recognition 

### Step 1: Import Required Libraries
First, we'll import the necessary libraries for data manipulation, visualization, and building the neural network.

In [None]:
# Importing necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer

# Set a random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)


### Step 2: Load and Explore the Dataset
We'll load the dataset from the provided CSV file and explore its structure.

In [None]:
# Load the dataset
data = pd.read_csv('train.csv')

# Display the first few rows of the dataset
data.head()


### Step 3: Preprocess the Data
We need to separate the features and labels, normalize the pixel values, and reshape the data for the CNN.

In [None]:
# Separate features and labels
X = data.drop('label', axis=1).values
y = data['label'].values

# Normalize the pixel values
X = X / 255.0

# Reshape the data to fit the CNN input requirements (28x28 pixels and 1 color channel)
X = X.reshape(-1, 28, 28, 1)

# One-hot encode the labels
lb = LabelBinarizer()
y = lb.fit_transform(y)

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


### Step 4: Build the CNN Model
We'll define a simple CNN architecture for our digit recognition task.

In [None]:
# Build the CNN model
model = Sequential([
    Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=(28, 28, 1)),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    
    Conv2D(64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.25),
    
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Display the model's architecture
model.summary()


### Step 5: Train the Model
We'll train the CNN model on the training data and validate it on the validation data, using early stopping to prevent overfitting.

In [None]:
# Define early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
batch_size = 64 
epochs = 50 
# Train the model with early stopping
history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_val, y_val), callbacks=[early_stopping])


### Step 6: Evaluate the Model
Finally, we'll evaluate the model's performance on the validation set and visualize the training history.

In [None]:
# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(X_val, y_val)
print(f'Validation Loss: {val_loss}')
print(f'Validation Accuracy: {val_accuracy}')

# Plot the training history
plt.figure(figsize=(12, 4))

# Plot training & validation accuracy values
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.show()


### Step 7: Save the Model
Save the trained model for future use.

In [None]:
# Save the model using the recommended native Keras format
model.save('mnist_cnn_model.keras')