In [1]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dense, Flatten, Dropout, BatchNormalization

In [2]:
# Set data and label paths
data_path = 'C:\\Users\\zigya\\OneDrive\\Desktop\\Invoice-2\\data\\images'
label_path = 'C:\\Users\\zigya\\OneDrive\\Desktop\\Invoice-2\\data\\labels'




In [3]:
# Load images and labels
images = []
labels =[]
for filename in os.listdir(data_path):
    img = cv2.imread(os.path.join(data_path, filename))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.resize(img, (256, 256))
    images.append(img)
    label_file = os.path.join(label_path, os.path.splitext(filename)[0] + '.xml')
    with open(label_file, 'r') as f:
        label_data = f.read()
        label_data = label_data.strip()
        labels.append(label_data)

In [4]:
# Convert labels to one-hot encoding
label_classes = sorted(list(set(labels)))
label_to_idx = {label: i for i, label in enumerate(label_classes)}
idx_to_label = {i: label for i, label in enumerate(label_classes)}
labels = [label_to_idx[label] for label in labels]
labels = to_categorical(labels, num_classes=len(label_classes))

In [22]:
import json

with open('idx_to_label.json', 'w') as f:
    json.dump(idx_to_label, f)


In [26]:
print(labels)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221,

In [6]:
len(labels)

234

In [7]:
# Split data into training, validation, and test sets
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size=0.5, random_state=42)

In [8]:
# Augment data
datagen_train = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True,
    fill_mode='nearest')

datagen_val = ImageDataGenerator()

In [9]:
# Convert data
X_train = np.array(X_train)
X_val = np.array(X_val)
X_test = np.array(X_test)

In [10]:
# Reshape data for Conv2D layer
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_val = X_val.reshape(X_val.shape[0], X_val.shape[1], X_val.shape[2], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

In [11]:
# Build model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(256, 256, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization())
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization())
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization())
model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization())
model.add(Conv2D(512, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(label_classes), activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [12]:
# Define callbacks
from keras.callbacks import EarlyStopping, ModelCheckpoint

early_stop = EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min')
model_checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', save_best_only=True, verbose=1)

In [13]:
# Train model
batch_size = 32
epochs = 50
history = model.fit(datagen_train.flow(X_train, y_train, batch_size=batch_size), 
                    epochs=epochs, 
                    batch_size=batch_size, 
                    validation_data=datagen_val.flow(X_val, y_val, batch_size=batch_size), 
                    callbacks=[early_stop, model_checkpoint])
# history = model.fit(datagen_train.flow(X_train, y_train, batch_size=batch_size),
# validation_data=datagen_val.flow(X_val, y_val, batch_size=batch_size),
# epochs=epochs)

Epoch 1/50
Epoch 1: val_loss improved from inf to 154.33902, saving model to best_model.h5
Epoch 2/50
Epoch 2: val_loss did not improve from 154.33902
Epoch 3/50
Epoch 3: val_loss did not improve from 154.33902
Epoch 4/50
Epoch 4: val_loss did not improve from 154.33902
Epoch 5/50
Epoch 5: val_loss did not improve from 154.33902
Epoch 6/50
Epoch 6: val_loss did not improve from 154.33902
Epoch 6: early stopping


In [14]:
# Evaluate model
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print('Test Accuracy:', test_acc)

1/1 - 2s - loss: 429.9192 - accuracy: 0.0000e+00 - 2s/epoch - 2s/step
Test Accuracy: 0.0


In [15]:
model.save('test_model.h5')

In [5]:
from tensorflow.keras.models import load_model

loaded_model = load_model('test_model.h5')


In [6]:
import cv2

invoice_image = cv2.imread('test images/invoice-254.jpg')


In [7]:
# Preprocess the image
img_gray = cv2.cvtColor(invoice_image, cv2.COLOR_BGR2GRAY)
img_resized = cv2.resize(img_gray, (256, 256))
img_reshaped = img_resized.reshape(1, 256, 256, 1)

In [8]:
# Get the model's predictions for the input image
pred_probs = loaded_model.predict(img_reshaped)[0]

# Get the predicted class label
pred_class = np.argmax(pred_probs)

# Map the predicted class label back to the original class name
pred_label = idx_to_label[pred_class]




In [9]:
pred_label

'<annotation>\n\t<folder>output</folder>\n\t<filename>invoice-101.jpg</filename>\n\t<path>C:\\Users\\zigya\\OneDrive\\Desktop\\Label_Img Text Detect\\data\\images\\output\\invoice-101.jpg</path>\n\t<source>\n\t\t<database>Unknown</database>\n\t</source>\n\t<size>\n\t\t<width>1654</width>\n\t\t<height>2339</height>\n\t\t<depth>3</depth>\n\t</size>\n\t<segmented>0</segmented>\n\t<object>\n\t\t<name>Company Name</name>\n\t\t<pose>Unspecified</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>125</xmin>\n\t\t\t<ymin>3</ymin>\n\t\t\t<xmax>613</xmax>\n\t\t\t<ymax>106</ymax>\n\t\t</bndbox>\n\t</object>\n\t<object>\n\t\t<name>Name</name>\n\t\t<pose>Unspecified</pose>\n\t\t<truncated>0</truncated>\n\t\t<difficult>0</difficult>\n\t\t<bndbox>\n\t\t\t<xmin>1320</xmin>\n\t\t\t<ymin>643</ymin>\n\t\t\t<xmax>1555</xmax>\n\t\t\t<ymax>683</ymax>\n\t\t</bndbox>\n\t</object>\n\t<object>\n\t\t<name>Address</name>\n\t\t<pose>Unspecified</pose>\n\t\t<truncated>0</tr

In [10]:
import pytesseract
import xml.etree.ElementTree as ET
def get_annotation_info(xml_file_path, img_path):
    #function to get the annotation details from the XML file and extract the text using OCR
    
    root = ET.fromstring(xml_file_path)
    
    annotations = [] #list to store the annotations
    img = cv2.imread(img_path) #reading the input image
    
    norm_img = np.zeros((img.shape[0], img.shape[1]))
    img = cv2.normalize(img, norm_img, 0, 255, cv2.NORM_MINMAX)
    
    # noise = cv2.fastNlMeansDenoisingColored(img, None, 10, 10, 7, 15)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) #converting the image to grayscale
    thresh = cv2.threshold(gray, 64, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1] #applying threshold to the image
    for obj in root.findall('object'):
        # Extract the label name
        annotation = {}
        annotation['label'] = obj.find('name').text

        # Extract the bounding box coordinates
        bbox = obj.find('bndbox')
        xmin = int(bbox.find('xmin').text)
        ymin = int(bbox.find('ymin').text)
        xmax = int(bbox.find('xmax').text)
        ymax = int(bbox.find('ymax').text)
        cropped_img = thresh[ymin:ymax, xmin:xmax] #cropping the image based on the bounding box coordinates
        annotation['text'] = pytesseract.image_to_string(cropped_img) #using OCR to extract the text from the cropped image
        annotations.append(annotation) #adding the annotation to the list
        
        
    return annotations #returning the list of annotations
    

In [11]:
annotations = get_annotation_info(pred_label , 'test images/invoice-254.jpg')

invoice_data = {}

# Loop through the annotations and add the information to the dictionary
for annotation in annotations:
    if(annotation['text'] == ""): 
        continue
    else:
        label = annotation['label']
        text = annotation['text']
        invoice_data[label] = text

In [12]:
invoice_data

{'Company Name': 'amazon.in\n',
 'Name': 'ANANT BHAT\n',
 'Address': '-aridabad, Gali number 2 sector 11\nFARIDABAD, HARYANA, 121006\nIN\n',
 'Item Description': 'NIVEA Men Face Wash, Oil Control for 12hr Oil Control with 10x\nVitamin C Effect, 150 mi | BOOX9UOCE! ( BOOXSUOCEI )\nHSN:33041000\n\nTAL:\n'}

In [13]:
import csv
import os

# Path of the folder to save the file in
folder_path = 'C:\\Users\\zigya\\OneDrive\\Desktop\\Invoice-2\\static\\uploaded_files\\'

# Path of the file to save
file = os.path.join(folder_path, 'output_2.csv')

def create_csv(invoice_data, file_path):
    with open(file_path ,'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Label', 'Text'])
        for key, value in invoice_data.items():
            writer.writerow([key, value])


In [14]:
csv_data = create_csv(invoice_data, file)

In [17]:
import json
# Convert the dictionary to a formatted JSON string with newlines and indentation
invoice_data_str = json.dumps(invoice_data, indent=4)

# Add a newline after each opening curly brace and comma in the JSON string
invoice_data_str = invoice_data_str.replace("{", "{\n").replace(",", ",\n")

# Print the formatted JSON string
print(invoice_data_str)

{

    "Company Name": "amazon.in\n",

    "Name": "ANANT BHAT\n",

    "Address": "-aridabad,
 Gali number 2 sector 11\nFARIDABAD,
 HARYANA,
 121006\nIN\n",

    "Item Description": "NIVEA Men Face Wash,
 Oil Control for 12hr Oil Control with 10x\nVitamin C Effect,
 150 mi | BOOX9UOCE! ( BOOXSUOCEI )\nHSN:33041000\n\nTAL:\n"
}


In [17]:
from datetime import datetime
# Path of the folder to save the file in
folder_path = 'C:\\Users\\zigya\\OneDrive\\Desktop\\Invoice-2\\'

# Path of the file to save
filename = datetime.now().strftime("%Y%m%d_%H%M%S_") + 'invoice_data'
file_path = os.path.join(folder_path, filename + '.csv')

# Write the data to the CSV file
with open(file_path, 'w', newline='') as csv_file:
    writer = csv.writer(csv_file)
    for key, value in invoice_data.items():
        writer.writerow([key, value.strip()])