In [None]:
import pandas as pd
import matplotlib.pyplot as plt

train_data = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test_data = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

train_data.describe()


In [None]:
first_image = train_data.iloc[0, 1:].values.reshape(28, 28)
plt.imshow(first_image, cmap='gray')
plt.title({train_data.iloc[0, 0]})
plt.show()


In [None]:
X_train = train_data.iloc[:, 1:].values / 255.0
y_train = train_data['label'].values

X_test = test_data.values / 255.0

In [None]:
from sklearn.model_selection import train_test_split

X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(X_train, y_train, test_size=0.2, random_state=42)


In [None]:
from tensorflow.keras.utils import to_categorical

y_train_cnn = to_categorical(y_train, num_classes=10)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

logistic_model_lbfgs = LogisticRegression(max_iter=1000, solver='lbfgs', multi_class='multinomial', C = 0.1)
logistic_model_lbfgs.fit(X_train_split, y_train_split)


In [None]:
y_val_pred = logistic_model_lbfgs.predict(X_val_split)

val_accuracy = accuracy_score(y_val_split, y_val_pred)
val_accuracy

In [None]:
logistic_model_saga = LogisticRegression(max_iter=1000, solver='saga', multi_class='multinomial')
logistic_model_saga.fit(X_train_split, y_train_split)

In [None]:
y_val_pred = logistic_model_saga.predict(X_val_split)

val_accuracy = accuracy_score(y_val_split, y_val_pred)
val_accuracy

In [None]:
X_train_cnn = X_train.reshape(-1, 28, 28, 1)
X_test_cnn = X_test.reshape(-1, 28, 28, 1)

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)), 
    MaxPooling2D(pool_size=(2, 2)), 

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),

    Dense(128, activation='relu'),

    Dense(10, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.summary()


In [None]:
history = model.fit(X_train_cnn, y_train, epochs=10, batch_size=32, validation_split=0.2)


In [None]:
val_loss, val_acc = model.evaluate(X_train_cnn, y_train)
val_acc

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend()
plt.show()

plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.show()


In [None]:
test_predictions = model.predict(X_test_cnn)
test_labels = test_predictions.argmax(axis=1)

submission = pd.DataFrame({'ImageId': list(range(1, len(test_labels)+1)), 'Label': test_labels})
submission.to_csv('/kaggle/working/submission.csv', index=False)