<a href="https://colab.research.google.com/github/hdorazi/english-phonetics/blob/main/class-2024-fall_1129.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required libraries
!pip install tensorflow gradio

# Import necessary libraries
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
import numpy as np
import gradio as gr
from PIL import Image

# Load and preprocess the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data() # x train: 숫자 imaage, y train: x train image의 라벨 번호
x_train = x_train / 255.0  # Normalize pixel values to [0, 1]
x_test = x_test / 255.0 # model이 훈련이 잘 되었는지 확인하는 용도 (파란점 20개만 따로 빼뒀다가 결과로 나온 모델 즉, y=ax+b가 따로 빼둔 점들과 유사한 위치에 있는지 확인)

# Add a channel dimension (for grayscale images)
x_train = x_train[..., np.newaxis] # input node의 차원에 따라 개수가 달라짐
x_test = x_test[..., np.newaxis]

# Build the CNN model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    # DNN
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax') #sofmax probability 다 더해서 1(확률)로 만들어줌
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, epochs=5, validation_data=(x_test, y_test)) # epoch: 데이터를 n번 내보낼 때까지 훈련.

# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f"Test accuracy: {test_acc:.4f}")

# Map digit predictions to word equivalents
digit_to_word = {
    0: "zero",
    1: "one",
    2: "two",
    3: "three",
    4: "four",
    5: "five",
    6: "six",
    7: "seven",
    8: "eight",
    9: "nine"
}

# Define the Gradio interface function
def classify_digit(data):
    try:
        # Extract composite image from Gradio Sketchpad data
        composite_array = np.array(data["composite"])
        image = Image.fromarray(composite_array)
        image = image.resize((28, 28))  # Resize to match MNIST format
        image_array = np.array(image) / 255.0
        r, g, b, a = image_array[:,:,0], image_array[:,:,1], image_array[:,:,2], image_array[:, :, 3]

        # Make a prediction
        prediction = model.predict(a.reshape(1, 28, 28, 1)).flatten()
        class_idx = np.argmax(prediction)
        confidence = prediction[class_idx]

        # Return the word label and confidence
        return f"{digit_to_word[class_idx]} ({confidence:.2f})"
    except Exception as e:
        return f"Error: {str(e)}"



# Create the Gradio interface
demo = gr.Interface(
    fn=classify_digit,
    inputs=gr.Sketchpad(label="Draw a digit"),  # Use Sketchpad for drawing
    outputs="text",  # Output a text label
    title="Real-time Digit Classifier",
    description="Draw a digit (0-9) on the canvas and see the prediction in words."
)

# Launch the app
demo.launch(debug=True)


Collecting gradio
  Downloading gradio-5.7.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.5.0 (from gradio)
  Downloading gradio_client-1.5.0-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart==0.0.12 (from gradio)
  Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metad

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m64s[0m 33ms/step - accuracy: 0.8942 - loss: 0.3331 - val_accuracy: 0.9835 - val_loss: 0.0523
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 31ms/step - accuracy: 0.9853 - loss: 0.0479 - val_accuracy: 0.9857 - val_loss: 0.0465
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 33ms/step - accuracy: 0.9895 - loss: 0.0325 - val_accuracy: 0.9894 - val_loss: 0.0295
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m78s[0m 31ms/step - accuracy: 0.9929 - loss: 0.0231 - val_accuracy: 0.9897 - val_loss: 0.0309
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 34ms/step - accuracy: 0.9954 - loss: 0.0150 - val_accuracy: 0.9919 - val_loss: 0.0267
313/313 - 2s - 8ms/step - accuracy: 0.9919 - loss: 0.0267
Test accuracy: 0.9919
Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True`

Created dataset file at: .gradio/flagged/dataset1.csv
