In [12]:
import os
import cv2
import csv
import shutil
import tkinter as tk
from tkinter import filedialog
import numpy as np
import pytesseract
from PIL import Image
from PIL import ImageTk

# Modify this path according to your system
path_to_tesseract = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
pytesseract.pytesseract.tesseract_cmd = path_to_tesseract

origine_folder_path = r'C:\Users\anass\OneDrive\Desktop\projet-traitement-image\origine'
processed_folder_path = r'C:\Users\anass\OneDrive\Desktop\projet-traitement-image\processed'
output_folder_path  = os.path.join(processed_folder_path, 'processed_csv')

def create_folders():
    if not os.path.exists(origine_folder_path):
        os.makedirs(origine_folder_path)
        print("Origine folder created successfully.")
    else:
        print("Origine folder already exists.")
    
    if not os.path.exists(processed_folder_path):
        os.makedirs(processed_folder_path)
        print("Processed folder created successfully.")
    else:
        print("Processed folder already exists.")
    
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)
        print("Output folder created successfully.")
    else:
        print("Output folder already exists.")

def detect_outline(image):
    print("Detecting outline...")
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edged = cv2.Canny(blurred, 50, 150)
    contours, _ = cv2.findContours(edged, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)
    outline_contour = None
    for contour in contours:
        perimeter = cv2.arcLength(contour, True)
        approx = cv2.approxPolyDP(contour, 0.02 * perimeter, True)
        if len(approx) == 4:
            outline_contour = approx
            break
    print("Outline detected.")
    return outline_contour

def detect_inner_lines(image, outline_contour):
    print("Detecting inner lines...")
    if outline_contour is not None:
        x, y, w, h = cv2.boundingRect(outline_contour)
        outline_region = image[y:y+h, x:x+w]
        gray = cv2.cvtColor(outline_region, cv2.COLOR_BGR2GRAY)
        blurred = cv2.GaussianBlur(gray, (5, 5), 0)
        thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 15, 4)

        vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 30))
        detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, vertical_kernel, iterations=2)
        contours, _ = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        for contour in contours:
            if cv2.contourArea(contour) > 500:
                peri = cv2.arcLength(contour, True)
                approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
                if len(approx) == 4:
                    cv2.drawContours(outline_region, [approx], -1, (0, 255, 0), 2)
        print("Inner lines detected.")
        return outline_region
    else:
        print("Outline not found in the image.")
        return None

def extract_table_from_image(image_path):
    print(f"Extracting table from image: {image_path}")
    create_folders()
    original_file_path = os.path.join(origine_folder_path, os.path.basename(image_path))
    shutil.copy(image_path, original_file_path)
    img = cv2.imread(image_path)
    outline_contour = detect_outline(img)
    inner_lines_image = detect_inner_lines(img, outline_contour)
    if inner_lines_image is not None:
        processed_image_path = os.path.join(processed_folder_path, os.path.splitext(os.path.basename(image_path))[0] + '_processed.jpg')
        cv2.imwrite(processed_image_path, inner_lines_image)
        print(f"Processed image saved to: {processed_image_path}")
        return processed_image_path
    else:
        return None

def save_to_csv(data, file_path):
    print(f"Saving data to CSV: {file_path}")
    with open(file_path, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        for row in data:
            writer.writerow(row)
    print("Data saved to CSV.")

def process_image(file_path):
    print(f"Processing image: {file_path}")
    create_folders()
    processed_image_path = extract_table_from_image(file_path)
    if processed_image_path:
        extracted_data = []
        img = cv2.imread(processed_image_path)
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 15, 4)

        vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 30))
        horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 1))

        vertical_lines = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, vertical_kernel, iterations=2)
        horizontal_lines = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, horizontal_kernel, iterations=2)

        combined_lines = cv2.addWeighted(vertical_lines, 0.5, horizontal_lines, 0.5, 0.0)
        contours, _ = cv2.findContours(combined_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        for contour in contours:
            if cv2.contourArea(contour) > 500:
                x, y, w, h = cv2.boundingRect(contour)
                table_roi = gray[y:y+h, x:x+w]
                text = pytesseract.image_to_string(table_roi, config='--psm 6')
                rows = text.split('\n')
                table_data = [row.split() for row in rows if row]
                extracted_data.extend(table_data)

        csv_file_path = os.path.join(processed_folder_path, os.path.splitext(os.path.basename(file_path))[0] + '.csv')
        save_to_csv(extracted_data, csv_file_path)
       
        print(f"Data extracted and saved to CSV: {csv_file_path}")
        # Process the CSV file
        process_csv_file(csv_file_path)

def process_csv_file(input_file_path):
    output_folder_path = os.path.join(processed_folder_path, 'processed_csv')
    if not os.path.exists(output_folder_path):
        os.makedirs(output_folder_path)
        print("Processed CSV folder created successfully.")
    else:
        print("Processed CSV folder already exists.")
    
    output_file_name = "processed-" + os.path.basename(input_file_path)
    output_file_path = os.path.join(output_folder_path, output_file_name)

    with open(input_file_path, 'r') as infile:
        with open(output_file_path, 'w', newline='') as outfile:
            reader = csv.reader(infile)
            writer = csv.writer(outfile)
            for row in reader:
                if row:  # Check if row is not empty
                    last_column = row[-1]
                    p_count = last_column.count('P')
                    a_count = last_column.count('A')
                    # Append counts to the end of the row
                    row.append(str(p_count))
                    row.append(str(a_count))
                writer.writerow(row)
    print(f"Processed CSV file saved to: {output_file_path}")

def import_file():
    print("Importing file...")
    file_path = filedialog.askopenfilename()
    if file_path:
        process_image(file_path)
        print("File processed successfully.")
    else:
        print("No file selected.")

def create_gui():
    print("Creating GUI...")
    window = tk.Tk()
    window.title("Image Table Extractor")
    
    background_image = Image.open(r'C:\Users\anass\Downloads\data-mining-845x321.png')
    background_photo = ImageTk.PhotoImage(background_image)

    image_width, image_height = background_image.size
    
    window.geometry(f"{image_width}x{image_height}")
    
    background_label = tk.Label(window, image=background_photo)
    background_label.image = background_photo  
    background_label.pack(fill="both", expand=True)  
    
   
    button_width = 100  
    button_height = 30  
    button_x = (image_width - button_width) // 2
    button_y = (image_height - button_height) // 2
    
    import_button = tk.Button(window, text='Import File', command=import_file)
    import_button.place(x=button_x, y=button_y)
    
    window.mainloop()

if __name__ == "__main__":
    create_gui()


Creating GUI...
Importing file...
Processing image: C:/Users/anass/Downloads/testing script/Dynamic-Attendance-Sheet-Using-Power-Pivot-1.jpg
Origine folder created successfully.
Processed folder created successfully.
Extracting table from image: C:/Users/anass/Downloads/testing script/Dynamic-Attendance-Sheet-Using-Power-Pivot-1.jpg
Origine folder already exists.
Processed folder already exists.
Detecting outline...
Outline detected.
Detecting inner lines...
Inner lines detected.
Processed image saved to: C:\Users\anass\OneDrive\Desktop\projet-traitement-image\processed\Dynamic-Attendance-Sheet-Using-Power-Pivot-1_processed.jpg
Saving data to CSV: C:\Users\anass\OneDrive\Desktop\projet-traitement-image\processed\Dynamic-Attendance-Sheet-Using-Power-Pivot-1.csv
Data saved to CSV.
Data extracted and saved to CSV: C:\Users\anass\OneDrive\Desktop\projet-traitement-image\processed\Dynamic-Attendance-Sheet-Using-Power-Pivot-1.csv
File processed successfully.
