In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
import tensorflow as tf
from tensorflow.keras import layers, models

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
# Assuming your dataset is in a CSV file named 'emnist-balanced-train.csv'
csv_file_path = 'emnist-balanced-train.csv'

In [44]:
# Load the CSV file into a Pandas DataFrame
df = pd.read_csv(csv_file_path, header=None)


In [4]:
# Shuffle the dataset
df = shuffle(df)

In [5]:
# Extract features (X) and labels (y)
X = df.iloc[:, 1:].values  # Assuming pixel values start from the second column
y = df.iloc[:, 0].values   # Assuming labels are in the first column


In [6]:
# Reshape the features to (num_samples, image_height, image_width, num_channels)
image_height = 28
image_width = 28
X = X.reshape(-1, image_height, image_width, 1)


In [7]:
# Convert labels to numerical values using LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [8]:
# Calculate the number of classes
num_classes = len(np.unique(y))

In [9]:
# Normalize pixel values to the range [0, 1]
X = X / 255.0

In [10]:
# Build the CNN model
model = models.Sequential()

In [11]:
# First convolutional block
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(image_height, image_width, 1)))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))


In [12]:
# Second convolutional block
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))


In [13]:
# Third convolutional block
model.add(layers.Conv2D(128, (3, 3), activation='relu'))
model.add(layers.BatchNormalization())
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25))


In [14]:
# Global Average Pooling
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dropout(0.25))

In [15]:
# Dense layers
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dropout(0.5))


In [16]:
# Output layer
model.add(layers.Dense(num_classes, activation='softmax'))

In [17]:
# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [18]:
# Display the model summary
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 26, 26, 32)        320       
                                                                 
 batch_normalization (Batch  (None, 26, 26, 32)        128       
 Normalization)                                                  
                                                                 
 max_pooling2d (MaxPooling2  (None, 13, 13, 32)        0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 13, 13, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 11, 11, 64)        18496     
                                                                 
 batch_normalization_1 (Bat  (None, 11, 11, 64)        2

In [19]:
# Train the model
history = model.fit(X, y, epochs=20, validation_split=0.2, batch_size=64)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [25]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

NameError: name 'X_test' is not defined

In [27]:
# Save the entire model to a HDF5 file
model.save('medicine_prediction.h5')


In [42]:
import pandas as pd
from sklearn.utils import shuffle
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

In [65]:
csv_test_file_path = '/Users/andrewasher/Education/Medicine_Prescription/emnist-balanced-test.csv'


In [66]:
df_test = pd.read_csv(csv_test_file_path)

In [67]:

# Shuffle the test dataset
df_test = shuffle(df_test)


In [68]:
# Extract features (X_test) and labels (y_test)
X_test = df_test.iloc[:, 1:].values  # Assuming pixel values start from the second column
y_test = df_test.iloc[:, 0].values   # Assuming labels are in the first column

In [69]:
# Reshape the features to (num_samples, image_height, image_width, num_channels)
X_test = X_test.reshape(-1, image_height, image_width, 1)

In [70]:
# Convert labels to numerical values using LabelEncoder
y_test = label_encoder.transform(y_test)

# Normalize pixel values to the range [0, 1]
X_test = X_test / 255.0


In [71]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

Test Accuracy: 84.80%


In [98]:
from tensorflow.keras.models import load_model
import cv2
import numpy as np


In [99]:
# Load your trained character recognition model
model_path = 'medicine_prediction.h5'  # Replace with the path to your saved model file
model = load_model(model_path)

In [120]:
def process_image(image_path):
    # Step 1: Loading and Resizing
    image = cv2.imread(image_path)
    image = cv2.resize(image, (28, 28))  # Resize to match the expected input dimensions of the model

    # Step 2: Grayscale Conversion
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Step 3: Image Processing
    processed_image = preprocess_image(gray_image)

    # Step 4: Thresholding
    _, thresholded_image = cv2.threshold(processed_image, 128, 255, cv2.THRESH_BINARY)

    # Step 5: Contour Detection
    contours, _ = cv2.findContours(thresholded_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Step 6: Cropping & Character Recognition
    recognized_text = recognize_characters(gray_image, contours)

    return recognized_text


In [112]:
# Function for image preprocessing
def preprocess_image(image):
    # Convert to grayscale if the input image is in color
    if len(image.shape) == 3:
        image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian Blur to reduce noise
    image = cv2.GaussianBlur(image, (5, 5), 0)

    # Apply adaptive thresholding to improve contrast
    _, image = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

    # Perform morphological operations (optional)
    kernel = np.ones((5, 5), np.uint8)
    image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel)

    # Resize the image to a consistent size
    image = cv2.resize(image, (300, 300))

    return image


In [113]:
# Function for character recognition
def recognize_characters(image, contours):
    recognized_text = ""

    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        character_image = image[y:y+h, x:x+w]

        # Apply your character recognition model here (similar to the CNN model training)
        character_prediction = np.argmax(model.predict(np.expand_dims(character_image, axis=0)))

        # Append the recognized character to the result
        recognized_text += get_character_from_prediction(character_prediction)

    return recognized_text

In [114]:
# Helper function to get the recognized character from the model prediction
def get_character_from_prediction(prediction):
    # Assuming predictions are integers corresponding to character ASCII codes
    recognized_character = str(chr(prediction + 65))  # Assumes predictions are integers (A-Z)
    
    # You might need to adjust this logic based on your actual output
    return recognized_character


In [132]:
# Example usage:
image_path = '/Users/andrewasher/Education/Medicine_Prescription/image.jpeg'  # Replace with the path to your input image
result = process_image(image_path)
print(result)

L
