In [42]:
import numpy as np
import os
import argparse
import cv2
import imutils

from matplotlib import pyplot as plt
from collections import Counter

## Loading

In [10]:
def load_images_from_folder(folder):
    images = []
    dimensions = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        if os.path.isfile(img_path):
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            if img is not None:
                images.append(img)
                dimensions.append(img.shape[:2])  # Append the (height, width)
    return images, dimensions

In [11]:
folder = "Train"
images, dimensions = load_images_from_folder(folder)

## Resizing

In [13]:
def analyze_image_dimensions(dimensions):
    heights = [dim[0] for dim in dimensions]
    widths = [dim[1] for dim in dimensions]

    average_height = np.mean(heights)
    average_width = np.mean(widths)
    median_height = np.median(heights)
    median_width = np.median(widths)

    height_freq = Counter(heights)
    width_freq = Counter(widths)

    most_common_heights = height_freq.most_common(3)
    most_common_widths = width_freq.most_common(3)

    return {
        "average_height": average_height,
        "average_width": average_width,
        "median_height": median_height,
        "median_width": median_width,
        "most_common_heights": most_common_heights,
        "most_common_widths": most_common_widths
    }

stats = analyze_image_dimensions(dimensions)

print("Image Dimension Statistics:")
print(f"Average Height: {stats['average_height']:.2f}")
print(f"Average Width: {stats['average_width']:.2f}")
print(f"Median Height: {stats['median_height']}")
print(f"Median Width: {stats['median_width']}")
print("Top 3 Most Common Heights:", stats['most_common_heights'])
print("Top 3 Most Common Widths:", stats['most_common_widths'])

Image Dimension Statistics:
Average Height: 859.32
Average Width: 747.52
Median Height: 944.5
Median Width: 810.0
Top 3 Most Common Heights: [(961, 14), (962, 13), (965, 9)]
Top 3 Most Common Widths: [(813, 22), (819, 14), (816, 11)]


In [14]:
def resize_images(images, target_size=(961, 813)):
    resized_images = []
    for i in range(len(images)):
        image = images[i]
        original_size = image.shape[:2]
        if original_size[0] < target_size[0] or original_size[1] < target_size[1]:
            interpolation = cv2.INTER_CUBIC
        else:
            interpolation = cv2.INTER_LINEAR

        resized_image = cv2.resize(image, (target_size[1], target_size[0]), interpolation=interpolation)
        # cv2.imwrite(f"train_grayscale_resized/{i}.jpg", resized_image)
        resized_images.append(resized_image)
    return resized_images

resized_train = resize_images(images)

## Color

In [None]:
def extract_blue_channel(images):
    for i in range(len(images)):
        image = images[i]
        (B, G, R) = cv2.split(image)
        cv2.imwrite(f"train_blue_channel/{i}.jpg", B)

extract_blue_channel(images)

## Enhancing Image (Applying CLAHE)

In [16]:
def enhance_text_image(images):
    for i in range(len(images)):
        image = images[i]
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(4,4))
        enhanced_image = clahe.apply(image)
        cv2.imwrite(f"train_enhanced2/{i}.jpg", enhanced_image)

enhance_text_image(resized_train)

# Image Registration

In [41]:
import cv2 
import numpy as np 
import os

directory = "train_enhanced/train_enhanced"

file_list = os.listdir(directory)

orb_detector = cv2.ORB_create(5000) 

for i in range(len(file_list)):
    if i != 1: 
        img1_color = cv2.imread(os.path.join(directory, file_list[0]))  # Image to be aligned. 
        img2_color = cv2.imread(os.path.join(directory, file_list[i]))  # Reference image. 

        img1 = img1_color
        img2 = img2_color
        
        height, width = img2.shape[:2]

        kp1, d1 = orb_detector.detectAndCompute(img1, None) 
        kp2, d2 = orb_detector.detectAndCompute(img2, None) 

        matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) 

        matches_tuple = matcher.match(d1, d2) 
        matches = list(matches_tuple)

        matches.sort(key=lambda x: x.distance) 

        matches = matches[:int(len(matches)*0.9)] 
        no_of_matches = len(matches) 

        p1 = np.zeros((no_of_matches, 2)) 
        p2 = np.zeros((no_of_matches, 2)) 

        for j in range(len(matches)): 
            p1[j, :] = kp1[matches[j].queryIdx].pt 
            p2[j, :] = kp2[matches[j].trainIdx].pt 

        homography, mask = cv2.findHomography(p1, p2, cv2.RANSAC) 
        transformed_img = cv2.warpPerspective(img1_color, homography, (width, height)) 

        cv2.imwrite(os.path.join(directory, file_list[i]), transformed_img)
    else:
        continue



# Rotation

In [12]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

In [19]:
from PIL import Image
import os

dir_path = 'train_enhanced/train_enhanced'


jpg_files = [f for f in os.listdir(dir_path) if f.endswith('.jpg')]

for file in jpg_files:
    img = Image.open(os.path.join(dir_path, file))
    width, height = img.size
    num_channels = len(img.getbands())
    print(f'Gambar {file}: Dimensi = {width} x {height}, Saluran Warna = {num_channels}')

Gambar 0.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 1.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 10.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 100.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 101.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 102.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 103.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 104.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 105.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 106.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 107.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 108.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 109.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 11.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 110.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 111.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 112.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 113.jpg: Dimensi = 813 x 961, Saluran Warna = 1
Gambar 114.jpg: 

In [32]:
model = Sequential()

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(813, 961, 1)))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())

model.add(Dense(128, activation='relu'))
model.add(Dense(4, activation='softmax'))  

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

  super().__init__(


In [34]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator


datagen = ImageDataGenerator(rescale=1./255)

# Menyiapkan data train
train_generator = datagen.flow_from_directory(
    'train_enhanced',  # direktori data latih
    target_size=(813, 961),  # mengubah resolusi seluruh gambar menjadi 813x961 piksel
    color_mode='grayscale',
    batch_size=32,
    class_mode='categorical')

Found 500 images belonging to 1 classes.


In [None]:
def correct_image_orientation(img):
    # Gunakan model Anda untuk memprediksi orientasi gambar
    predicted_rotation = 180
    
    if predicted_rotation == 180:
        img = cv2.rotate(img, cv2.ROTATE_180)
    return img

In [None]:
def load_and_correct_images_from_folder(folder):
    images = []
    dimensions = []
    for filename in os.listdir(folder):
        img_path = os.path.join(folder, filename)
        if os.path.isfile(img_path):
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            if img is not None:
                img = correct_image_orientation(img)
                images.append(img)
                dimensions.append(img.shape[:2])  # Append the (height, width)
    return images, dimensions

In [31]:
# import cv2
# import os
# import pytesseract
# from pytesseract import Output
# from scipy import ndimage

# pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# def get_image_resolution(image_path):
#     img = cv2.imread(image_path)
#     height, width, _ = img.shape
#     return width, height

# def rotate_image_if_needed(image_path):
#     try:
#         # Membaca gambar
#         img = cv2.imread(image_path, 0)

#         # Mendeteksi teks dalam gambar
#         d = pytesseract.image_to_osd(img, output_type=Output.DICT)

#         # Mendapatkan orientasi teks
#         rotate_angle = d['rotate']

#         # Jika teks terbaca terbalik, putar gambar 180 derajat
#         if rotate_angle == 180:
#             rotated_img = ndimage.rotate(img, 180)
#             cv2.imwrite(image_path, rotated_img)
#     except pytesseract.TesseractError as e:
#         print(f"Error processing {image_path}: {e}")
#         pass

# # Mendapatkan daftar semua file gambar dalam direktori
# image_files = os.listdir('train_enhanced/train_enhanced')

# # Memproses setiap gambar
# for image_file in image_files:
#     image_path = os.path.join('train_enhanced/train_enhanced', image_file)
    
#     # Mendapatkan resolusi gambar
#     width, height = get_image_resolution(image_path)
    
#     # Melakukan pengecekan resolusi
#     if width < 813 or height < 961:  # Ubah nilai threshold sesuai kebutuhan Anda
#         print(f"Ignoring {image_file} due to low resolution.")
#         continue
    
#     rotate_image_if_needed(image_path)






In [49]:
import cv2
import pytesseract
from pytesseract import Output

# Set the path to the tesseract executable
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# Function to rotate image
def rotate_image(image):
    return cv2.rotate(image, cv2.ROTATE_180)


for i in range(500):
    image_path = f'train_enhanced/train_enhanced/{i}.jpg'  # Update with your path and image format
    image = cv2.imread(image_path)

    # Check if the image is None
    if image is None:
        print(f"No image found at {image_path}")
        continue

    # Get the dimensions of the image
    height, width = image.shape[:2]

    # Perform OCR on the image to get text bounding boxes
    d = pytesseract.image_to_data(image, output_type=Output.DICT)
    n_boxes = len(d['text'])

    for i in range(n_boxes):
        if int(d['conf'][i]) > 60:
            (x, y, w, h) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i])

            # Check if the text is closer to the top right edge
            if x > width/2 and y < height/2:
                # Rotate the image
                image = rotate_image(image)
                break

    # Overwrite the original image
    cv2.imwrite(image_path, image)


No image found at train_enhanced/train_enhanced/500.jpg


In [None]:
def crop_image(image, no):
    height = 320
    width = 230
    
    x = 813-230
    y = 30
    cropped_image_1 = image[y:y+height, x:x+width]
    cv2.imwrite(f'working/kanan/{no}_kanan_1.jpg', cropped_image_1) 
    
    x = 813-230
    y = 330
    cropped_image_2 = image[y:y+height, x:x+width]
    cv2.imwrite(f'working/kanan/{no}_kanan_2.jpg', cropped_image_2) 
    
    x = 813-230
    y = 961-320
    cropped_image_3 = image[y:y+height, x:x+width]
    cv2.imwrite(f'working/kanan/{no}_kanan_3.jpg', cropped_image_3) 