In [1]:
import os
import cv2
import numpy as np
import xml.etree.ElementTree as ET
import pytesseract
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Input, Conv2D, Conv2DTranspose, MaxPooling2D, GlobalAveragePooling2D, Dense
from tensorflow.keras.applications import VGG19
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from collections import Counter

KeyboardInterrupt: 

In [None]:
# Konstanta global
INPUT_SHAPE = (512, 512, 3)
BATCH_SIZE = 32
EPOCHS = 50
IMAGE_DIR = 'data/processed'
XML_DIR = 'data/xml'

In [2]:
def parse_xml(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    objects = {}
    for obj in root.findall('object'):
        name = obj.find('name').text
        bbox = obj.find('bndbox')
        xmin = int(bbox.find('xmin').text)
        ymin = int(bbox.find('ymin').text)
        xmax = int(bbox.find('xmax').text)
        ymax = int(bbox.find('ymax').text)
        objects[name] = (xmin, ymin, xmax, ymax)
    
    return objects

In [None]:
def perform_ocr(img, bbox):
    x1, y1, x2, y2 = bbox
    roi = img[y1:y2, x1:x2]
    text = pytesseract.image_to_string(roi).strip()
    return text

In [None]:
def detect_vertical_lines(img, bbox):
    x1, y1, x2, y2 = bbox
    roi = img[y1:y2, x1:x2]
    gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)
    lines = cv2.HoughLinesP(edges, 1, np.pi/180, threshold=100, minLineLength=y2-y1-20, maxLineGap=20)
    
    vertical_lines = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            if abs(x2 - x1) < 10:  # Considering nearly vertical lines
                vertical_lines.append(x1)
    
    return sorted(set(vertical_lines))  # Remove duplicates and sort

In [None]:

def load_data(image_dir, xml_dir):
    images = []
    table_masks = []
    column_masks = []
    years = []
    locations = []
    filenames = []  # Tambahkan ini untuk menyimpan nama file
    
    for xml_file in os.listdir(xml_dir):
        if xml_file.endswith('.xml'):
            xml_path = os.path.join(xml_dir, xml_file)
            img_path = os.path.join(image_dir, xml_file.replace('.xml', '.JPG'))
            
            # Load image
            img = cv2.imread(img_path)
            if img is None:
                print(f"Unable to read image: {img_path}")
                continue
            
            # Parse XML
            objects = parse_xml(xml_path)
            
            # Resize image and adjust bounding boxes
            original_height, original_width = img.shape[:2]
            img_resized = cv2.resize(img, (INPUT_SHAPE[1], INPUT_SHAPE[0]))
            scale_x, scale_y = INPUT_SHAPE[1] / original_width, INPUT_SHAPE[0] / original_height
            
            # Create masks
            table_mask = np.zeros((INPUT_SHAPE[0], INPUT_SHAPE[1]), dtype=np.float32)
            column_mask = np.zeros((INPUT_SHAPE[0], INPUT_SHAPE[1]), dtype=np.float32)
            
            if 'table' in objects:
                xmin, ymin, xmax, ymax = objects['table']
                xmin, ymin = int(xmin * scale_x), int(ymin * scale_y)
                xmax, ymax = int(xmax * scale_x), int(ymax * scale_y)
                table_mask[ymin:ymax, xmin:xmax] = 1
                
                # Detect columns
                vertical_lines = detect_vertical_lines(img, objects['table'])
                for x in vertical_lines:
                    scaled_x = int(x * scale_x)
                    cv2.line(column_mask, (scaled_x, ymin), (scaled_x, ymax), 1, 2)
            
            # Extract year and location using OCR
            year_text = perform_ocr(img, objects['year']) if 'year' in objects else ''
            location_text = perform_ocr(img, objects['location']) if 'location' in objects else ''
            
            # Print OCR results immediately
            print(f"File: {xml_file}")
            print(f"Year (OCR): {year_text}")
            print(f"Location (OCR): {location_text}")
            print("-" * 30)
            
            images.append(cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB))
            table_masks.append(table_mask)
            column_masks.append(column_mask)
            years.append(year_text)
            locations.append(location_text)
            filenames.append(xml_file)  # Simpan nama file
    
    return np.array(images), np.array(table_masks), np.array(column_masks), years, locations, filenames

In [None]:
def create_tablenet_model(input_shape):
    base_model = VGG19(input_shape=input_shape, include_top=False, weights='imagenet')
    base_model.trainable = False
    
    # Common layers
    common_model = Sequential([
        base_model,
        Conv2D(512, 1, activation='relu'),
        Conv2D(512, 3, activation='relu', padding='same'),
        Conv2D(512, 3, activation='relu', padding='same'),
        Conv2D(512, 3, activation='relu', padding='same')
    ])
    
    # Table branch
    table_branch = Sequential([
        common_model,
        Conv2DTranspose(256, 4, strides=2, activation='relu', padding='same'),
        Conv2D(256, 3, activation='relu', padding='same'),
        Conv2DTranspose(128, 4, strides=2, activation='relu', padding='same'),
        Conv2D(128, 3, activation='relu', padding='same'),
        Conv2DTranspose(64, 4, strides=2, activation='relu', padding='same'),
        Conv2D(64, 3, activation='relu', padding='same'),
        Conv2DTranspose(32, 4, strides=2, activation='relu', padding='same'),
        Conv2D(32, 3, activation='relu', padding='same'),
        Conv2DTranspose(16, 4, strides=2, activation='relu', padding='same'),
        Conv2D(1, 1, activation='sigmoid', name='table_output')
    ])
    
    # Column branch
    column_branch = Sequential([
        common_model,
        Conv2DTranspose(256, 4, strides=2, activation='relu', padding='same'),
        Conv2D(256, 3, activation='relu', padding='same'),
        Conv2DTranspose(128, 4, strides=2, activation='relu', padding='same'),
        Conv2D(128, 3, activation='relu', padding='same'),
        Conv2DTranspose(64, 4, strides=2, activation='relu', padding='same'),
        Conv2D(64, 3, activation='relu', padding='same'),
        Conv2DTranspose(32, 4, strides=2, activation='relu', padding='same'),
        Conv2D(32, 3, activation='relu', padding='same'),
        Conv2DTranspose(16, 4, strides=2, activation='relu', padding='same'),
        Conv2D(1, 1, activation='sigmoid', name='column_output')
    ])
    
    inputs = Input(shape=input_shape)
    table_output = table_branch(inputs)
    column_output = column_branch(inputs)
    
    model = Model(inputs=inputs, outputs=[table_output, column_output])
    return model

In [None]:
def train_model(model, X_train, y_train_table, y_train_column, X_val, y_val_table, y_val_column, batch_size, epochs):
    model.compile(optimizer=Adam(),
                  loss={'table_output': 'binary_crossentropy', 
                        'column_output': 'binary_crossentropy'},
                  loss_weights={'table_output': 1.0, 'column_output': 1.0},
                  metrics=['accuracy'])
    
    history = model.fit(X_train, 
                        {'table_output': y_train_table, 
                         'column_output': y_train_column},
                        batch_size=batch_size,
                        epochs=epochs,
                        validation_data=(X_val, {'table_output': y_val_table, 
                                                 'column_output': y_val_column}))
    
    return history

In [None]:
# Load data
images, table_masks, column_masks, years, locations, filenames = load_data(IMAGE_DIR, XML_DIR)

In [None]:
# Split data
X_train, X_test, y_train_table, y_test_table, y_train_column, y_test_column = train_test_split(
        images, table_masks, column_masks, test_size=0.2, random_state=42)

In [None]:
# Create and train model
model = create_tablenet_model(INPUT_SHAPE)
history = train_model(model, X_train, y_train_table, y_train_column,
						X_test, y_test_table, y_test_column,
						BATCH_SIZE, EPOCHS)

In [None]:
# Print summary of OCR results
print("\nSummary of OCR Results:")
for filename, year, location in zip(filenames, years, locations):
	print(f"File: {filename}")
	print(f"Year: {year}")
	print(f"Location: {location}")
	print("-" * 30)

In [None]:
year_counts = Counter(years)
location_counts = Counter(locations)

print("\nYear Distribution:")
for year, count in year_counts.most_common():
	print(f"{year}: {count}")

print("\nLocation Distribution:")
for location, count in location_counts.most_common():
	print(f"{location}: {count}")