In [3]:
import os
import cv2
import fitz
import numpy as np
from PIL import Image

def preprocess_image(image):
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Apply Gaussian blur to the entire image
    blurred = cv2.GaussianBlur(gray, (9, 9), 0)
    
    # Adaptive thresholding on blurred image
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 21, 5)

    # Morphological closing to connect broken parts of characters
    closing_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, closing_kernel)

    # Apply erosion to separate vertical lines
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
    eroded = cv2.erode(closed, kernel, iterations=1)
    
    return eroded

def enlarge_image(image, scale_factor=3):
    enlarged_image = cv2.resize(image, (0, 0), fx=scale_factor, fy=scale_factor, interpolation=cv2.INTER_LANCZOS4)
    return enlarged_image

def enhance_quality(image):
    sharpened = cv2.filter2D(image, -1, np.array([[-1, -1, -1], [-1,  9, -1], [-1, -1, -1]]))
    denoised = cv2.fastNlMeansDenoisingColored(sharpened, None, 10, 10, 7, 21)
    return denoised

def create_mapping(coordinates, aspect_ratio_threshold, is_row=True):
    mapping = []
    number = 1

    if not coordinates:
        return mapping

    for i, (page_num, x, y, w, h) in enumerate(coordinates):
        if is_row:
            if h / w > aspect_ratio_threshold:
                continue
            coord = y
            size = h
        else:
            # if h / w > aspect_ratio_threshold:
            #     continue
            if w / h > aspect_ratio_threshold and w > 14 and h < 8:  
                continue
            coord = x
            size = w

        if i == 0:
            upper_limit = coord + int(size / 2)
            lower_limit = coord
            mapping.append((number, lower_limit, upper_limit))
        elif mapping and coord > mapping[-1][2]:
            number += 1
            lower_limit = coord
            upper_limit = coord + int(size / 2)
            mapping.append((number, lower_limit, upper_limit))
        else:
            upper_limit = max(mapping[-1][2], coord + int(size / 2))
            mapping[-1] = (number, mapping[-1][1], upper_limit)

    return mapping

def assign_number(coord, mapping):
    for num, lower_limit, upper_limit in mapping:
        if lower_limit <= coord <= upper_limit:
            return num
    return -1

def extract_alphabets(pdf_path, output_folder, aspect_ratio_threshold=3):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    pdf_document = fitz.open(pdf_path)
    coordinates = []

    for page_num in range(len(pdf_document)):
        page = pdf_document.load_page(page_num)
        page_image = page.get_pixmap()
        np_page_image = np.frombuffer(page_image.samples, dtype=np.uint8).reshape((page_image.height, page_image.width, page_image.n))

        processed_image = preprocess_image(np_page_image)

        contours, _ = cv2.findContours(processed_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        for contour in contours:
            x, y, w, h = cv2.boundingRect(contour)
            coordinates.append((page_num, x, y, w, h))

    if not coordinates:
        print("No contours found.")
        return

    coordinates_sorted_by_y = sorted(coordinates, key=lambda item: item[2])

    row_mapping = create_mapping(coordinates_sorted_by_y, aspect_ratio_threshold, is_row=True)

    print("Row mapping:", row_mapping)

    for page_num, x, y, w, h in coordinates:
        if h / w > aspect_ratio_threshold:
            continue

        row_num = assign_number(y, row_mapping)

        if row_num == -1:
            continue

        alphabet_region = np_page_image[y:y+h, x:x+w]
        enlarged_region = enlarge_image(alphabet_region)
        enhanced_region = enhance_quality(enlarged_region)

        if w<6 and h<6:
            continue

        base_filename = f"{page_num}_row{row_num}_x{x}_y{y}_w{w}_h{h}"
        counter = 1
        filename = f"{base_filename}.png"
        while os.path.exists(os.path.join(output_folder, filename)):
            filename = f"{base_filename}_{counter}.png"
            counter += 1

        alphabet_image = Image.fromarray(enhanced_region)
        alphabet_image.save(os.path.join(output_folder, filename))
    
    return coordinates, row_mapping

pdf_path = "Analysis/check2.pdf"
output_folder = "Analysis/check2"
coordinates, row_mapping = extract_alphabets(pdf_path, output_folder)


Row mapping: [(1, 71, 78), (2, 105, 116), (3, 121, 136), (4, 144, 147), (5, 154, 170), (6, 174, 177), (7, 182, 188), (8, 221, 223), (9, 255, 261), (10, 288, 320), (11, 325, 328), (12, 337, 349), (13, 356, 359), (14, 363, 369), (15, 377, 386), (16, 392, 405), (17, 416, 419), (18, 426, 441), (19, 443, 445), (20, 446, 448), (21, 449, 452), (22, 453, 459), (23, 501, 504), (24, 541, 553), (25, 581, 590), (26, 596, 598), (27, 621, 629), (28, 635, 649), (29, 655, 658), (30, 665, 680), (31, 685, 689), (32, 693, 699), (33, 706, 715), (34, 721, 737), (35, 742, 745), (36, 752, 767), (37, 774, 776), (38, 779, 786)]


In [2]:
def find_general_boundaries(coordinates):
    min_x = min(coordinates, key=lambda item: item[1])[1]
    max_x = max(coordinates, key=lambda item: item[1] + item[3])[1] + max(coordinates, key=lambda item: item[1] + item[3])[3]
    return min_x, max_x

def is_row_centered(row_coords, general_min_x, general_max_x, threshold=0.15):
    min_x = min(row_coords, key=lambda item: item[0])[0]
    max_x = max(row_coords, key=lambda item: item[0] + item[2])[0] + max(row_coords, key=lambda item: item[0] + item[2])[2]
    
    center_region_left = general_min_x + (general_max_x - general_min_x) * threshold
    center_region_right = general_max_x - (general_max_x - general_min_x) * threshold
    
    return center_region_left <= min_x and max_x <= center_region_right

def find_first_valid_row(coordinates, row_mapping):
    general_min_x, general_max_x = find_general_boundaries(coordinates)
    first_valid_row = None
    
    for i, (row_num, lower_limit, upper_limit) in enumerate(row_mapping):
        row_coords = [(x, y, w, h) for page_num, x, y, w, h in coordinates if lower_limit <= y <= upper_limit]
        
        if len(row_coords) >= 1:  # Ensure there is at least one image in the row
            if not is_row_centered(row_coords, general_min_x, general_max_x):
                if first_valid_row is None:
                    first_valid_row = row_num
                    # Check the previous row only once after finding the first valid row
                    if i > 0:
                        prev_row_num, prev_lower_limit, prev_upper_limit = row_mapping[i-1]
                        prev_row_coords = [(x, y, w, h) for page_num, x, y, w, h in coordinates if prev_lower_limit <= y <= prev_upper_limit]

                        if len(prev_row_coords) > 2:
                            return prev_row_num
                        elif len(prev_row_coords) <= 2:
                            valid_prev_row = False
                            for (x, y, w, h) in prev_row_coords:
                                if ((w / h > 1.6) and h > 8) or (w < 5):
                                    valid_prev_row = True
                                    break
                            if not valid_prev_row:
                                return prev_row_num
                    return first_valid_row
                else:
                    return first_valid_row
    return None  # In case no valid row is found

# Find and store the first valid row number
first_valid_row = find_first_valid_row(coordinates, row_mapping)
print("First valid row number:", first_valid_row)


First valid row number: 4


In [3]:
# import os
# import cv2
# import numpy as np
# from tensorflow.keras.models import load_model

# # Load the trained model
# model = load_model('../CNN_Model/cnn_recognizer_music_13_v2.h5')

# # Preprocess the input image
# def preprocess_image(image_path):
#     image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
#     image = cv2.resize(image, (32, 32))  # Resize to match the model's input size
#     image = np.expand_dims(image, axis=0)  # Add batch dimension
#     return image

# # Pass the image through the model and get predictions
# def predict_class(image_path):
#     preprocessed_image = preprocess_image(image_path)
#     predictions = model.predict(preprocessed_image)
#     predicted_class_index = np.argmax(predictions, axis=1)
#     max_probability = np.max(predictions, axis=1)
#     return predicted_class_index[0], max_probability[0]

# # Function to find important rows based on image filenames and class labels
# def find_important_rows(output_folder, first_valid_row, primary_label, secondary_labels):
#     important_rows = set()

#     for filename in os.listdir(output_folder):
#         if filename.endswith(".png"):
#             # Parse filename to get row number
#             parts = filename.split("_")
#             row_num = int(parts[1].replace("row", ""))

#             # Check if this row is at least the first valid row
#             if row_num < first_valid_row:
#                 continue

#             # Predict class labels for all images in the current row
#             row_images = [f for f in os.listdir(output_folder) if f.startswith(f"{parts[0]}_row{row_num}_")]
#             row_predictions = [predict_class(os.path.join(output_folder, img))[0] for img in row_images]

#             # Check if the row contains the primary label and meets the secondary conditions
#             if primary_label in row_predictions:
#                 if len(row_predictions) == 2:
#                     if all(label in secondary_labels for label in row_predictions if label != primary_label):
#                         important_rows.add(row_num)
#                 else:
#                     secondary_count = sum(1 for label in row_predictions if label in secondary_labels)
#                     if secondary_count >= 1 and secondary_count == len(set(row_predictions) & secondary_labels):
#                         important_rows.add(row_num)

#     return sorted(important_rows)

# # Assuming output_folder, first_valid_row, and important_labels are defined
# # output_folder = "Analysis/alhaiya_bilawal_3_taal"
# primary_label = 35  # Primary label index
# secondary_labels = {31, 32, 36, 37}  # Secondary label indices

# important_rows = find_important_rows(output_folder, first_valid_row, primary_label, secondary_labels)
# print("Important rows:", important_rows)


#  ------------------------------------------------------------------------------------------------------------------

important_rows = [7, 10, 12, 15, 20, 24, 28, 32]  # yaman_1_taal

# important_rows = [9, 15, 20, 25, 28, 34] # bilawal_dhamaar


In [None]:
import os
import re
from PIL import Image

def get_image_details(filename):
    pattern = r'(\d+)_row(\d+)_x(\d+)_y(\d+)_w(\d+)_h(\d+)'
    match = re.match(pattern, filename)
    if match:
        page_num = int(match.group(1))
        row_num = int(match.group(2))
        x = int(match.group(3))
        y = int(match.group(4))
        w = int(match.group(5))
        h = int(match.group(6))
        return (page_num, row_num, x, y, w, h)
    return None

def update_image_filename(output_folder, old_filename, new_suffix):
    details = get_image_details(old_filename)
    if details:
        page_num, row_num, x, y, w, h = details
        new_filename = f"{page_num}_row{row_num}_{new_suffix}_x{x}_y{y}_w{w}_h{h}.png"
        os.rename(os.path.join(output_folder, old_filename), os.path.join(output_folder, new_filename))

def find_general_boundaries(coordinates):
    min_x = min(coordinates, key=lambda item: item[3])[3]
    max_x = max(coordinates, key=lambda item: item[3] + item[5])[3] + max(coordinates, key=lambda item: item[3] + item[5])[5]
    return min_x, max_x

def is_row_centered(row_coords, general_min_x, general_max_x, threshold=0.15):
    min_x = min(row_coords, key=lambda item: item[0])[0]
    max_x = max(row_coords, key=lambda item: item[0] + item[2])[0] + max(row_coords, key=lambda item: item[0] + item[2])[2]
    
    center_region_left = general_min_x + (general_max_x - general_min_x) * threshold
    center_region_right = general_max_x - (general_max_x - general_min_x) * threshold
    
    return center_region_left <= min_x and max_x <= center_region_right

def find_first_valid_row(coordinates, row_mapping, subgroup_lower_bound):
    general_min_x, general_max_x = find_general_boundaries(coordinates)
    first_valid_row = None
    
    for i, (row_num, lower_limit, upper_limit) in enumerate(row_mapping):
        if row_num < subgroup_lower_bound:
            continue
        
        row_coords = [(x, y, w, h) for _, _, _, x, y, w, h in coordinates if lower_limit <= y <= upper_limit]
        
        if len(row_coords) >= 1:  # Ensure there is at least one image in the row
            if not is_row_centered(row_coords, general_min_x, general_max_x):
                if first_valid_row is None:
                    first_valid_row = row_num
                    # Check the previous row only if the first valid row is not the lower bound of the subgroup
                    
                    if row_num > subgroup_lower_bound:
                        if i > 0:
                            prev_row_num, prev_lower_limit, prev_upper_limit = row_mapping[i-1]
                            prev_row_coords = [(x, y, w, h) for _, _, _, x, y, w, h in coordinates if prev_lower_limit <= y <= prev_upper_limit]

                            if len(prev_row_coords) > 2:
                                return prev_row_num
                            elif len(prev_row_coords) <= 2:
                                valid_prev_row = False
                                for (x, y, w, h) in prev_row_coords:
                                    if ((w / h > 1.6) and h > 8) or (w < 5):
                                        valid_prev_row = True
                                        break
                                if not valid_prev_row:
                                    return prev_row_num
                    return first_valid_row
                else:
                    return first_valid_row
    return None  # In case no valid row is found

def assign_column_numbers(output_folder, row_mapping, first_valid_row, important_rows, aspect_ratio_threshold=1.6):
    images = os.listdir(output_folder)
    coordinates = []
    subgroup_ranges = []  # Store subgroup ranges

    for image in images:
        details = get_image_details(image)
        if details:
            page_num, row_num, x, y, w, h = details
            if first_valid_row <= row_num <= important_rows[-1]:
                coordinates.append((image, page_num, row_num, x, y, w, h))

    important_rows = [first_valid_row - 1] + important_rows

    for i in range(len(important_rows) - 1):
        start_row = important_rows[i] + 1
        end_row = important_rows[i + 1]
        subgroup_coords = [
            (image, page_num, row_num, x, y, w, h) for image, page_num, row_num, x, y, w, h in coordinates
            if start_row <= row_num <= end_row
        ]
        if not subgroup_coords:
            continue
        
        # Print the current subgroup range
        print(f"Subgroup range: [{start_row}, {end_row}]")
        
        # Find and mark invalid rows in the subgroup
        invalid_rows = []
        first_valid_row_in_subgroup = find_first_valid_row(subgroup_coords, row_mapping, start_row)

        # Print the first valid row in the subgroup
        print("First valid row in subgroup: ", first_valid_row_in_subgroup)

        # Store the current subgroup range as a tuple
        subgroup_ranges.append((first_valid_row_in_subgroup, end_row-1))
        
        for image, page_num, row_num, x, y, w, h in subgroup_coords:
            if row_num < first_valid_row_in_subgroup:
                invalid_rows.append(image)
                update_image_filename(output_folder, image, "extra")
        
        # Filter out invalid rows
        valid_subgroup_coords = [
            (image, page_num, row_num, x, y, w, h) for image, page_num, row_num, x, y, w, h in subgroup_coords
            if row_num >= first_valid_row_in_subgroup
        ]
        
        if not valid_subgroup_coords:
            continue
        
        valid_subgroup_coords_sorted_by_x = sorted(valid_subgroup_coords, key=lambda item: item[3])  # Sort by x

        valid_subgroup_coords_mapping = [
            (page_num, x, y, w, h) for _, page_num, row_num, x, y, w, h in valid_subgroup_coords_sorted_by_x
        ]

        column_mapping = create_mapping(valid_subgroup_coords_mapping, aspect_ratio_threshold, is_row=False)

        for image, page_num, row_num, x, y, w, h in valid_subgroup_coords_sorted_by_x:
            col_num = assign_number(x, column_mapping)
            if col_num != -1:
                update_image_filename(output_folder, image, f"col{col_num}")
        
    return subgroup_ranges

first_valid_row = 1
subgroup_ranges = assign_column_numbers(output_folder, row_mapping, first_valid_row, important_rows)


Subgroup range: [1, 7]
First valid row in subgroup:  4
Subgroup range: [8, 10]
First valid row in subgroup:  8
Subgroup range: [11, 12]
First valid row in subgroup:  11
Subgroup range: [13, 15]
First valid row in subgroup:  13
Subgroup range: [16, 20]
First valid row in subgroup:  17
Subgroup range: [21, 24]
First valid row in subgroup:  21
Subgroup range: [25, 28]
First valid row in subgroup:  25
Subgroup range: [29, 32]
First valid row in subgroup:  29


In [5]:
import os

def get_image_details(filename):
    """
    Extract row and column details from image filename.
    Filename format: '0_row4_col12_x400_y145_w7_h10' or '0_row3_extra_x282_y116_w40_h18'
    Returns (row_num, col_num).
    """
    parts = filename.split('_')
    row_num = None
    col_num = None

    for part in parts:
        if part.startswith('row'):
            row_num = int(part[3:])
        elif part.startswith('col'):
            col_num = int(part[3:])  # Ensure col_num gets a value only if it exists
    
    return row_num, col_num

def process_subgroups(folder, subgroups):
    """
    Processes subgroups by checking if the first subgroup needs to be split into two.
    """
    images = os.listdir(folder)
    images.sort(key=lambda x: get_image_details(x)[0])  # Sort by row number
    
    first_subgroup_start, first_subgroup_end = subgroups[0]
    first_valid_row = first_subgroup_start

    # Only process rows from the first valid row
    first_group_images = [img for img in images if get_image_details(img)[0] >= first_valid_row]

    # To track if we need to split the first subgroup
    first_row_images = [img for img in first_group_images if get_image_details(img)[0] == first_valid_row]
    second_row_images = [img for img in first_group_images if get_image_details(img)[0] == first_valid_row + 1]

    # Ensure we have valid rows and columns to process
    if first_row_images and second_row_images:
        # Sort images by column number and check the first (lowest column number) image
        first_row_images.sort(key=lambda x: get_image_details(x)[1])
        second_row_images.sort(key=lambda x: get_image_details(x)[1])
        
        first_row_col = get_image_details(first_row_images[0])[1]
        second_row_col = get_image_details(second_row_images[0])[1]

        if first_row_col is not None and second_row_col is not None and first_row_col > 1 and second_row_col > 1:
            # Now, let's iterate through rows to find where col = 1 begins
            new_first_end = first_valid_row  # Default in case we find no rows with col = 1
            for img in first_group_images:
                row, col = get_image_details(img)
                if row > first_valid_row and col == 1:
                    new_first_end = row
                    break

            # Update the subgroups
            first_subgroup = (first_valid_row, new_first_end)
            second_subgroup = (new_first_end, first_subgroup_end)
            subgroups[0] = first_subgroup
            subgroups.insert(1, second_subgroup)
    
    return subgroups

# Example use
# subgroups = [(1, 9), (10, 17), (18, 24), (25, 30), (31, 35)]
updated_subgroups = process_subgroups("Analysis/yaman_1_taal", subgroup_ranges)

for start, end in updated_subgroups:
    print(f"Updated Subgroup range: [{start}, {end}]")


Updated Subgroup range: [4, 6]
Updated Subgroup range: [8, 9]
Updated Subgroup range: [11, 11]
Updated Subgroup range: [13, 14]
Updated Subgroup range: [17, 19]
Updated Subgroup range: [21, 23]
Updated Subgroup range: [25, 27]
Updated Subgroup range: [29, 31]


In [6]:
import os
import re
from collections import defaultdict

def get_image_details(filename):
    pattern = r'(\d+)_row(\d+)_col(\d+)_x(\d+)_y(\d+)_w(\d+)_h(\d+)'
    match = re.match(pattern, filename)
    if match:
        page_num = int(match.group(1))
        row_num = int(match.group(2))
        col_num = int(match.group(3))
        x = int(match.group(4)) 
        y = int(match.group(5))
        w = int(match.group(6))
        h = int(match.group(7))
        return (page_num, row_num, col_num, x, y, w, h)
    return None

def is_articulation(w, h):
    return 4 < h < 9 and w > 9

def classify_rows_in_subgroup(subgroup_coords):
    articulation_rows = []
    kann_swar_rows = []
    swar_rows = []
    lyrics_rows = []

    # Group images by rows
    row_groups = defaultdict(list)
    for image, page_num, row_num, x, y, w, h in subgroup_coords:
        row_groups[row_num].append((image, page_num, x, y, w, h))

    # Check for articulation rows
    non_articulation_rows = []
    for row_num, images in row_groups.items():
        if all(is_articulation(w, h) for _, _, x, y, w, h in images):
            articulation_rows.append(row_num)
        else:
            non_articulation_rows.append((row_num, images))

    # Sort non-articulation rows by row number
    non_articulation_rows.sort(key=lambda item: item[0])
    remaining_rows = len(non_articulation_rows)

    # Classify remaining rows based on cases
    if remaining_rows == 3:
        kann_swar_rows.append(non_articulation_rows[0][0])
        swar_rows.append(non_articulation_rows[1][0])
        lyrics_rows.append(non_articulation_rows[2][0])

    elif remaining_rows == 2:
        row1_images = non_articulation_rows[0][1]
        row2_images = non_articulation_rows[1][1]
        if (abs(len(row1_images) - len(row2_images)) <= 2) or (len(row1_images) > len(row2_images)):
            swar_rows.append(non_articulation_rows[0][0])
            lyrics_rows.append(non_articulation_rows[1][0])
        else:
            kann_swar_rows.append(non_articulation_rows[0][0])
            swar_rows.append(non_articulation_rows[1][0])

    elif remaining_rows == 1:
        swar_rows.append(non_articulation_rows[0][0])

    return articulation_rows, kann_swar_rows, swar_rows, lyrics_rows

def process_subgroups(output_folder, subgroup_ranges, aspect_ratio_threshold=1.6):
    images = os.listdir(output_folder)
    coordinates = []

    # Parse image details and store them
    for image in images:
        details = get_image_details(image)
        if details:
            page_num, row_num, col_num, x, y, w, h = details
            coordinates.append((image, page_num, row_num, x, y, w, h))

    articulation_rows_all = []
    kann_swar_rows_all = []
    swar_rows_all = []
    lyrics_rows_all = []

    # Process each subgroup range
    for start_row, end_row in subgroup_ranges:
        subgroup_coords = [
            (image, page_num, row_num, x, y, w, h) for image, page_num, row_num, x, y, w, h in coordinates
            if start_row <= row_num <= end_row
        ]

        # Classify rows within the subgroup
        articulation_rows, kann_swar_rows, swar_rows, lyrics_rows = classify_rows_in_subgroup(subgroup_coords)

        # Add rows to the respective lists
        articulation_rows_all.extend(articulation_rows)
        kann_swar_rows_all.extend(kann_swar_rows)
        swar_rows_all.extend(swar_rows)
        lyrics_rows_all.extend(lyrics_rows)

    # Print the results
    print("Articulation Rows: ", articulation_rows_all)
    print("Kann Swar Rows: ", kann_swar_rows_all)
    print("Swar Rows: ", swar_rows_all)
    print("Lyrics Rows: ", lyrics_rows_all)

    return articulation_rows_all, kann_swar_rows_all, swar_rows_all, lyrics_rows_all

# Example usage:
# Define the subgroup ranges as identified earlier
# updated_subgroups = [(4, 6), (7, 8), (10, 14), (16, 19), (22, 24), (26, 27), (29, 33)]

# Folder where the images are stored
output_folder = "Analysis/yaman_1_taal"

# Process the subgroups and classify rows
articulation_rows, kann_swar_rows, swar_rows, lyrics_rows = process_subgroups(output_folder, updated_subgroups)


Articulation Rows:  []
Kann Swar Rows:  [4, 13, 17, 21, 25, 29]
Swar Rows:  [5, 8, 11, 14, 18, 22, 26, 30]
Lyrics Rows:  [6, 9, 19, 23, 27, 31]


In [7]:
# import bisect

# def add_rows(ls):
#     print(f"Here is your list: {ls}")
#     rows = int(input("How many rows do you want to add?\n"))

#     print(f"Enter {rows} row numbers to add:")
#     for _ in range(rows):
#         num = int(input())
        
#         # Perform binary search to find the insertion point
#         pos = bisect.bisect_left(ls, num)
        
#         # Insert the number at the correct position if it doesn't already exist
#         if pos == len(ls) or ls[pos] != num:
#             ls.insert(pos, num)
    
#     print(f"Updated list: {ls}")


# def delete_rows(ls):
#     print(f"Here is your list: {ls}")
#     rows = int(input("How many rows do you want to delete?\n"))

#     # Using a set to store numbers to delete
#     to_delete = set()

#     print(f"Enter {rows} row numbers to delete:")
#     for _ in range(rows):
#         num = int(input())
#         to_delete.add(num)

#     # Use list comprehension to filter out the rows that need to be deleted
#     ls[:] = [x for x in ls if x not in to_delete]

#     print(f"Updated list: {ls}")


# def modify_rows(articulation_rows, kann_swar_rows, swar_rows, lyrics_rows):
#     while True:
#         print("\n1. Articulation\n2. Kann Swar\n3. Swar\n4. Lyrics\n5. Exit")
#         select = int(input("Which section do you want to modify? (Enter your choice (1-5))\n"))

#         if select == 5:
#             print("Modifications are not required further.")
#             break

#         print("1. Add rows\n2. Delete rows\n3. Exit")
#         choice = int(input("Enter your choice (1-3)\n"))

#         if choice == 3:
#             print("No modifications in this section.")
#             continue

#         if select == 1:
#             if choice == 1:
#                 add_rows(articulation_rows)
#             else:
#                 delete_rows(articulation_rows)
#         elif select == 2:
#             if choice == 1:
#                 add_rows(kann_swar_rows)
#             else:
#                 delete_rows(kann_swar_rows)
#         elif select == 3:
#             if choice == 1:
#                 add_rows(swar_rows)
#             else:
#                 delete_rows(swar_rows)
#         elif select == 4:
#             if choice == 1:
#                 add_rows(lyrics_rows)
#             else:
#                 delete_rows(lyrics_rows)


# # # Example usage:
# # articulation_rows = [5, 8, 12]
# # kann_swar_rows = [6]
# # swar_rows = [7, 9]
# # lyrics_rows = [10]

# # Call the interactive row modification function
# modify_rows(articulation_rows, kann_swar_rows, swar_rows, lyrics_rows)

# ---------------------------------------------------------------------------
# yaman_1_taal

articulation_rows = []
kann_swar_rows = [4, 13, 17, 21, 25, 29]
swar_rows = [5, 8, 11, 14, 18, 22, 26, 30]
lyrics_rows = [6, 9, 19, 23, 27, 31]

# ---------------------------------------------------------------------------
# # bilawal_dhamaar

# articulation_rows = [12, 14, 31, 33]
# kann_swar_rows = [4, 10, 16, 22, 29]
# swar_rows = [5, 7, 11, 18, 23, 26, 30]
# lyrics_rows = [6, 8, 13, 19, 24, 27, 32]

In [11]:
# Print the results
print("Articulation Rows: ", articulation_rows)
print("Kann Swar Rows: ", kann_swar_rows)
print("Swar Rows: ", swar_rows)
print("Lyrics Rows: ", lyrics_rows)

Articulation Rows:  []
Kann Swar Rows:  [4, 13, 17, 21, 25, 29]
Swar Rows:  [5, 8, 11, 14, 18, 22, 26, 30]
Lyrics Rows:  [6, 9, 19, 23, 27, 31]


In [12]:
# storing image paths in lists for direct access

import os
import re
from collections import defaultdict

# Define the path to the folder containing the images
image_folder_path = 'Analysis/yaman_1_taal'

# # Define the section-wise row numbers
# articulation_rows = [12, 14, 31, 33]
# kann_swar_rows = [4, 10, 16, 22, 29]
# swar_rows = [5, 7, 11, 18, 23, 26, 30]
# lyrics_rows = [6, 8, 13, 19, 24, 27, 32]

# # Define the subgroup ranges
# subgroup_ranges = [
#     (4, 7),
#     (7, 8),
#     (10, 14),
#     (16, 19),
#     (22, 24),
#     (26, 27),
#     (29, 33)
# ]

# store the final updated subgroup to use further
subgroup_ranges = updated_subgroups

# Define the beat count (size of the lists)
beat_count = 12

# Function to extract information from the image filename
def extract_info_from_filename(filename):
    pattern = r'(\d+)_row(\d+)(?:_col(\d+))?_x(\d+)_y(\d+)_w(\d+)_h(\d+)'
    match = re.match(pattern, filename)
    if match:
        page_num = int(match.group(1))
        row_num = int(match.group(2))
        col_num = int(match.group(3)) if match.group(3) else None
        x = int(match.group(4))
        y = int(match.group(5))
        width = int(match.group(6))
        height = int(match.group(7))
        # Use os.path.join to handle path separators correctly
        image_path = os.path.normpath(os.path.join(image_folder_path, filename))
        return page_num, row_num, col_num, x, y, width, height, image_path
    return None

# Load all image filenames and extract their information
image_files = os.listdir(image_folder_path)
image_info = [extract_info_from_filename(f) for f in image_files]
image_info = [info for info in image_info if info is not None]

# Organize images by row and column
row_col_images = defaultdict(lambda: defaultdict(list))
for info in image_info:
    page_num, row_num, col_num, x, y, width, height, image_path = info
    row_col_images[row_num][col_num].append((x, y, width, height, image_path))

# Function to pad lists to match the beat count
def pad_lists(lists, size):
    if len(lists) < size:
        padding = [[] for _ in range(size - len(lists))]
        return padding + lists
    return lists

def save_segment(segment, subgroup_range, col, part_type, original_filename):
    """
    Function to save a segmented part and return its path.
    
    Parameters:
    - segment: The segmented image (enlarged by a factor of 3).
    - subgroup_range: The subgroup range.
    - col: The column number.
    - part_type: Type of segment ('upper' or 'lower').
    - original_filename: The original filename of the image before segmentation.
    
    Returns:
    - Path to the saved segment.
    """
    # Extract original image details from the filename
    pattern = r'(\d+)_row(\d+)_col(\d+)_x(\d+)_y(\d+)_w(\d+)_h(\d+)'
    match = re.match(pattern, original_filename)
    if not match:
        raise ValueError(f"Original filename {original_filename} does not match the expected pattern.")
    
    page_num = match.group(1)
    row_num = match.group(2)
    col_num = match.group(3)
    original_x = int(match.group(4))  # x-coordinate (pre-enlarged)
    original_y = int(match.group(5))  # y-coordinate (pre-enlarged)
    original_w = int(match.group(6))  # width (pre-enlarged)
    original_h = int(match.group(7))  # height (pre-enlarged)
    
    # Calculate new coordinates for the segmented part (scaled down by a factor of 3)
    if part_type == 'upper':
        # Upper part: y remains the same, height is the separation row
        new_x = original_x
        new_y = original_y
        new_w = original_w
        new_h = segment.shape[0] // 3  # Height of the upper part (scaled down)
    elif part_type == 'lower':
        # Lower part: y is original_y + height of the upper part, height is adjusted
        new_x = original_x
        new_y = original_y + (original_h - (segment.shape[0] // 3))  # Adjust y for lower part (scaled down)
        new_w = original_w
        new_h = segment.shape[0] // 3  # Height of the lower part (scaled down)
    else:
        raise ValueError("Invalid part_type. Must be 'upper' or 'lower'.")
    
    # Create the new filename
    new_filename = f"{page_num}_row{row_num}_col{col_num}_x{new_x}_y{new_y}_w{new_w}_h{new_h}_{part_type}.png"
    
    # Save the segmented image
    output_folder = os.path.normpath('Analysis/yaman_1_taal_segmented')
    os.makedirs(output_folder, exist_ok=True)
    segment_path = os.path.join(output_folder, new_filename)
    cv2.imwrite(segment_path, segment)
    
    return segment_path

# Function to preprocess an image
def preprocess_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 21, 5)
    return thresh

# Function to separate articulation in an image
def check_articulation(image):
    processed_image = preprocess_image(image)
    contours, _ = cv2.findContours(processed_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        if 10 < h < 21 and w > 25:
            upper_part = image[:y, :]
            if upper_part.shape[0] > 0:
                return image, True  # Return the upper part and a flag indicating segmentation was successful
            break
    
    return image, False  # Return the original image and a flag indicating no segmentation

# Function to process a subgroup and create the lists of lists
def process_subgroup(subgroup_range, is_first_subgroup):
    start_row, end_row = subgroup_range
    
    # Find the swar row in this subgroup
    swar_row = None
    for row in swar_rows:
        if start_row <= row <= end_row:
            swar_row = row
            break
    
    if not swar_row:
        return None, None, None, None, None
    
    # Find the kann swar row in this subgroup
    kann_swar_row = None
    for row in kann_swar_rows:
        if start_row <= row <= end_row:
            kann_swar_row = row
            break
    
    # Find the articulation rows in this subgroup
    articulation_rows_in_subgroup = [row for row in articulation_rows if start_row <= row <= end_row]
    
    # Find the lyrics row in this subgroup
    lyrics_row = None
    for row in lyrics_rows:
        if start_row <= row <= end_row:
            lyrics_row = row
            break
    
    # Get the swar images and their column numbers
    swar_images = row_col_images[swar_row]
    swar_cols = sorted(swar_images.keys())
    
    # Get the kann swar images and their column numbers (if kann swar row exists)
    kann_swar_images = row_col_images[kann_swar_row] if kann_swar_row else {}
    kann_swar_cols = sorted(kann_swar_images.keys())
    
    # Get the lyrics images (if lyrics row exists)
    lyrics_images = row_col_images[lyrics_row] if lyrics_row else {}
    lyrics_cols = sorted(lyrics_images.keys()) if lyrics_row else []
    
    # Create the lists of lists
    swar_list = []
    kann_swar_list = []
    swar_articulation_checks = [False] * len(swar_cols)
    lyrics_articulation_checks = [False] * len(lyrics_cols)
    lyrics_list = []
    
    # Case 1: If there is an explicit kann swar row
    if kann_swar_row:
        swar_index = 0
        kann_swar_index = 0
        
        while swar_index < len(swar_cols) or kann_swar_index < len(kann_swar_cols):
            swar_col = swar_cols[swar_index] if swar_index < len(swar_cols) else None
            kann_swar_col = kann_swar_cols[kann_swar_index] if kann_swar_index < len(kann_swar_cols) else None
            
            # If both columns exist and match
            if swar_col is not None and kann_swar_col is not None and swar_col == kann_swar_col:
                swar_list.append([x[4] for x in swar_images[swar_col]])  # Store image paths
                kann_swar_list.append([x[4] for x in kann_swar_images[kann_swar_col]])  # Store image paths
                swar_index += 1
                kann_swar_index += 1
            # If swar column exists but kann swar column doesn't match or is missing
            elif swar_col is not None and (kann_swar_col is None or swar_col < kann_swar_col):
                swar_list.append([x[4] for x in swar_images[swar_col]])  # Store image paths
                kann_swar_list.append([])
                swar_index += 1
            # If kann swar column exists but swar column doesn't match or is missing
            elif kann_swar_col is not None and (swar_col is None or kann_swar_col < swar_col):
                # Assign the kann swar to the next available swar column
                if swar_index < len(swar_cols):
                    swar_list.append([x[4] for x in swar_images[swar_cols[swar_index]]])  # Store image paths
                    kann_swar_list.append([x[4] for x in kann_swar_images[kann_swar_col]])  # Store image paths
                    swar_index += 1
                    kann_swar_index += 1
                else:
                    # If no more swar columns are available, append an empty list
                    swar_list.append([])
                    kann_swar_list.append([x[4] for x in kann_swar_images[kann_swar_col]])  # Store image paths
                    kann_swar_index += 1
    
    # Case 2: If there is no explicit kann swar row, check for hidden kann swars in the swar row
    else:
        for col in swar_cols:
            images_in_col = swar_images[col]
            
            # Separate outlier and non-outlier images
            outlier_images = [img for img in images_in_col if img[3] > 25]  # Images with h > 25
            non_outlier_images = [img for img in images_in_col if img[3] <= 25]  # Images with h <= 25
            
            # Process outlier images
            for img in outlier_images:
                x, y, w, h, image_path = img  # Extract image details
                
                # Load the image
                outlier_image = cv2.imread(image_path)
                
                # Check if articulation separation is supposed to give True
                _, is_articulated = check_articulation(outlier_image)
                
                if not is_articulated:

                    # Convert to grayscale
                    gray = cv2.cvtColor(outlier_image, cv2.COLOR_BGR2GRAY)
                    
                    # Apply Gaussian blur to the entire image
                    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
                    
                    # Adaptive thresholding on blurred image
                    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 21, 5)

                    # Morphological closing to connect broken parts of characters
                    closing_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
                    closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, closing_kernel)
                    
                    # Compute vertical projection (sum of non-white pixels for each row)
                    vertical_projection = np.sum(closed, axis=1) / 255  # Normalize to count non-white pixels
                    
                    # Find the row with the minimum non-white pixels (separation line)
                    # Define a reasonable range for the split line (e.g., 30% to 70% of the image height)
                    height = outlier_image.shape[0]
                    lower_bound = int(height * 0.3)  # 30% of the height
                    upper_bound = int(height * 0.5)  # 50% of the height

                    # Find the row with the minimum projection within the defined range
                    valid_range = vertical_projection[lower_bound:upper_bound]
                    if valid_range.size > 0:
                        separation_row_in_range = np.argmin(valid_range)  # Find the minimum within the range
                        separation_row = lower_bound + separation_row_in_range  # Adjust to the full image coordinates
                    else:
                        # If no valid split line is found, default to the middle of the image
                        separation_row = height // 2
                    
                    # Split the image into upper and lower parts
                    upper_part = outlier_image[:separation_row, :]
                    lower_part = outlier_image[separation_row:, :]
                    
                    # Crop extra white background from upper part
                    upper_gray = cv2.cvtColor(upper_part, cv2.COLOR_BGR2GRAY)
                    _, upper_binary = cv2.threshold(upper_gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
                    upper_coords = np.column_stack(np.where(upper_binary > 0))
                    if len(upper_coords) > 0:
                        y1, x1 = upper_coords.min(axis=0)
                        y2, x2 = upper_coords.max(axis=0)
                        upper_part_cropped = upper_part[y1:y2 + 1, x1:x2 + 1]
                    else:
                        upper_part_cropped = upper_part  # If no non-white pixels, keep as is
                    
                    # Crop extra white background from lower part
                    lower_gray = cv2.cvtColor(lower_part, cv2.COLOR_BGR2GRAY)
                    _, lower_binary = cv2.threshold(lower_gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
                    lower_coords = np.column_stack(np.where(lower_binary > 0))
                    if len(lower_coords) > 0:
                        y1, x1 = lower_coords.min(axis=0)
                        y2, x2 = lower_coords.max(axis=0)
                        lower_part_cropped = lower_part[y1:y2 + 1, x1:x2 + 1]
                    else:
                        lower_part_cropped = lower_part  # If no non-white pixels, keep as is
                    
                    # Save the cropped parts with updated naming convention
                    original_filename = os.path.basename(image_path)  # Extract original filename
                    upper_part_path = save_segment(upper_part_cropped, subgroup_range, col, 'upper', original_filename)
                    lower_part_path = save_segment(lower_part_cropped, subgroup_range, col, 'lower', original_filename)
                    
                    # Store the paths in the lists
                    swar_list.append([lower_part_path])  # Lower part goes to swar_list
                    kann_swar_list.append([upper_part_path])  # Upper part goes to kann_swar_list
                else:
                    # If articulation separation is supposed to give True, keep the original image
                    swar_list.append([image_path])
                    kann_swar_list.append([])
            
            # Process non-outlier images
            if len(non_outlier_images) == 1:
                # Only one image in this column, so no hidden kann swar
                swar_list.append([non_outlier_images[0][4]])  # Store image path
                kann_swar_list.append([])
            elif len(non_outlier_images) > 1:
                # Multiple images in the same column, so identify hidden kann swars
                # Sort images by y-value (lower y-value is kann swar)
                sorted_images = sorted(non_outlier_images, key=lambda x: x[1])  # Sort by y-value
                kann_swar_list.append([sorted_images[0][4]])  # Lower y-value is kann swar (store image path)
                swar_list.append([sorted_images[1][4]])  # Higher y-value is swar (store image path)
    
    # Handle articulation rows
    for articulation_row in articulation_rows_in_subgroup:
        # Find the row just before the articulation row
        prev_row = articulation_row - 1
        if prev_row in swar_rows:
            # Swar articulation
            articulation_images = row_col_images[articulation_row]
            articulation_cols = sorted(articulation_images.keys())
            for i, col in enumerate(swar_cols):
                if col in articulation_cols:
                    swar_articulation_checks[i] = True
        elif prev_row in lyrics_rows:
            # Lyrics articulation
            articulation_images = row_col_images[articulation_row]
            articulation_cols = sorted(articulation_images.keys())
            for i, col in enumerate(swar_cols):
                if col in articulation_cols:
                    lyrics_articulation_checks[i] = True
    
    # Handle lyrics row (append images one by one without comparing column numbers)
    if lyrics_row:
        # Get all lyrics images in order
        lyrics_cols = sorted(lyrics_images.keys())
        for col in lyrics_cols:
            lyrics_list.append([x[4] for x in lyrics_images[col]])  # Store image paths
    else:
        lyrics_list = [[] for _ in range(len(swar_cols))]
    
    # Pad lists to match the beat count
    if is_first_subgroup:
        swar_list = pad_lists(swar_list, beat_count)
        kann_swar_list = pad_lists(kann_swar_list, beat_count)
        swar_articulation_checks = pad_lists(swar_articulation_checks, beat_count)
        lyrics_articulation_checks = pad_lists(lyrics_articulation_checks, beat_count)
        lyrics_list = pad_lists(lyrics_list, beat_count)
    else:
        if len(swar_list) < beat_count:
            swar_list += [[] for _ in range(beat_count - len(swar_list))]
        if len(kann_swar_list) < beat_count:
            kann_swar_list += [[] for _ in range(beat_count - len(kann_swar_list))]
        if len(swar_articulation_checks) < beat_count:
            swar_articulation_checks += [False for _ in range(beat_count - len(swar_articulation_checks))]
        if len(lyrics_articulation_checks) < beat_count:
            lyrics_articulation_checks += [False for _ in range(beat_count - len(lyrics_articulation_checks))]
        if len(lyrics_list) < beat_count:
            lyrics_list += [[] for _ in range(beat_count - len(lyrics_list))]
    
    return swar_list, kann_swar_list, swar_articulation_checks, lyrics_articulation_checks, lyrics_list

# Process each subgroup and store the results
subgroup_results = {}
for i, subgroup_range in enumerate(subgroup_ranges):
    is_first_subgroup = (i == 0)
    swar_list, kann_swar_list, swar_articulation_checks, lyrics_articulation_checks, lyrics_list = process_subgroup(subgroup_range, is_first_subgroup)
    if swar_list and kann_swar_list:
        subgroup_results[subgroup_range] = {
            'swar_list': swar_list,
            'kann_swar_list': kann_swar_list,
            'swar_articulation_checks': swar_articulation_checks,
            'lyrics_articulation_checks': lyrics_articulation_checks,
            'lyrics_list': lyrics_list
        }

# Print the results for each subgroup
for subgroup_range, results in subgroup_results.items():
    print(f"Subgroup Range: {subgroup_range}")
    print(f"Kann Swar List: {results['kann_swar_list']}")
    print(f"Swar List: {results['swar_list']}")
    print(f"Swar Articulation Checks: {results['swar_articulation_checks']}")
    print(f"Lyrics List: {results['lyrics_list']}")
    print(f"Lyrics Articulation Checks: {results['lyrics_articulation_checks']}")
    print("-" * 80)

Subgroup Range: (4, 6)
Kann Swar List: [['Analysis\\yaman_1_taal\\0_row4_col1_x102_y134_w8_h10.png'], [], [], ['Analysis\\yaman_1_taal\\0_row4_col4_x212_y134_w11_h9.png'], ['Analysis\\yaman_1_taal\\0_row4_col5_x249_y133_w10_h10.png'], [], ['Analysis\\yaman_1_taal\\0_row4_col7_x322_y130_w9_h12.png'], [], [], [], [], []]
Swar List: [['Analysis\\yaman_1_taal\\0_row5_col1_x104_y147_w16_h16.png'], ['Analysis\\yaman_1_taal\\0_row5_col2_x145_y147_w16_h16.png'], ['Analysis\\yaman_1_taal\\0_row5_col3_x177_y146_w16_h16.png'], ['Analysis\\yaman_1_taal\\0_row5_col4_x216_y147_w15_h16.png'], ['Analysis\\yaman_1_taal\\0_row5_col5_x254_y143_w10_h20.png'], ['Analysis\\yaman_1_taal\\0_row5_col6_x295_y149_w9_h13.png'], ['Analysis\\yaman_1_taal\\0_row5_col7_x327_y150_w10_h12.png'], ['Analysis\\yaman_1_taal\\0_row5_col8_x364_y150_w9_h12.png'], ['Analysis\\yaman_1_taal\\0_row5_col9_x400_y144_w10_h17.png'], ['Analysis\\yaman_1_taal\\0_row5_col10_x436_y150_w11_h12.png'], ['Analysis\\yaman_1_taal\\0_row5_col11

In [13]:
import os
import cv2

# Function to preprocess an image
def preprocess_image(image):
    # Convert to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    
    # Adaptive thresholding on grayscale image
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 21, 5)
    return thresh

# Function to save segmented parts
def save_segment(segment, subgroup_range, index, part_type):
    """
    Function to save a segmented part and return its path.
    
    Parameters:
    - segment: The segmented image.
    - subgroup_range: The subgroup range.
    - index: The index in the list.
    - part_type: Type of segment ('left', 'mid', 'right').
    
    Returns:
    - Path to the saved segment.
    """
    output_folder = os.path.normpath('Analysis/yaman_1_taal_segmented')
    os.makedirs(output_folder, exist_ok=True)
    
    segment_filename = f"{subgroup_range[0]}_{subgroup_range[1]}_{index}_{part_type}.png"
    segment_path = os.path.join(output_folder, segment_filename)
    cv2.imwrite(segment_path, segment)
    
    return segment_path

# Function to extract meend and kann swar segments
def extract_alphabets_vertical(image_path):
    """
    Function to perform vertical segmentation on an image.
    
    Parameters:
    - image_path: Path to the image file.
    
    Returns:
    - left_part: Left part of the image (kann swar or None).
    - mid_part: Mid part of the image (meend).
    - right_part: Right part of the image (kann swar or None).
    """
    image = cv2.imread(image_path)
    if image is None:
        return None, None, None

    processed_image = preprocess_image(image)
    contours, _ = cv2.findContours(processed_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    valid_coords = []
    all_coords = []
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        if 10 <= h <= 25 and w > 25:
            valid_coords.append((x, y, w, h))
        else:
            all_coords.append((x, y, w, h))

    left_part, mid_part, right_part = None, None, None
    
    if valid_coords:
        valid_coords = sorted(valid_coords, key=lambda coord: coord[0])
        leftmost_valid = valid_coords[0]
        rightmost_valid = valid_coords[-1]

        left_cut = None
        x1, y1, w1, h1 = leftmost_valid
        for x, y, w, h in all_coords:
            if x <= x1 and (y + h) >= y1 and h > 10 and w > 10:
                left_cut = x1
                break

        right_cut = None
        x2, y2, w2, h2 = rightmost_valid
        for x, y, w, h in all_coords:
            if x >= (x2 + w2) and (y + h) >= y2 and h > 10 and w > 10:
                right_cut = x2 + w2
                break

        if left_cut is not None and right_cut is not None:
            left_part = image[:, :left_cut]
            mid_part = image[:, left_cut:right_cut]
            right_part = image[:, right_cut:]

        elif left_cut is not None:
            left_part = image[:, :left_cut]
            right_part = image[:, left_cut:]
        
        elif right_cut is not None:
            left_part = image[:, :right_cut]
            right_part = image[:, right_cut:]

    # If no segmentation occurred, treat the entire image as mid_part
    if left_part is None and right_part is None:
        mid_part = image

    return left_part, mid_part, right_part

# Function to identify meend and kann swar
def identify_meend_and_kann_swar(left_part, mid_part, right_part):
    """
    Function to identify and structure meend and kann swar based on the width of the segments.
    
    Parameters:
    - left_part: The left part of the image.
    - mid_part: The mid part of the image.
    - right_part: The right part of the image.
    
    Returns:
    - left_part: The left part (kann swar or None).
    - mid_part: The mid part (meend).
    - right_part: The right part (kann swar or None).
    """
    # Determine which part is meend based on width
    parts = {
        "left": left_part,
        "mid": mid_part,
        "right": right_part
    }

    # Filter out None parts
    valid_parts = {k: v for k, v in parts.items() if v is not None}

    # If no segmentation occurred (only mid_part exists)
    if len(valid_parts) == 1 and "mid" in valid_parts:
        # Treat the entire image as meend
        left_part = None
        right_part = None
        mid_part = valid_parts["mid"]
    
    # If there are only two parts, identify meend based on width
    elif len(valid_parts) == 2:
        # Find the part with the maximum width (meend)
        meend_key = max(valid_parts, key=lambda k: valid_parts[k].shape[1])
        kann_swar_key = [k for k in valid_parts.keys() if k != meend_key][0]

        # Reassign parts to ensure meend is in the middle
        if meend_key == "left":
            mid_part = valid_parts[meend_key]
            right_part = valid_parts[kann_swar_key]
            left_part = None
        elif meend_key == "right":
            mid_part = valid_parts[meend_key]
            left_part = valid_parts[kann_swar_key]
            right_part = None
        else:
            # If meend is already in the middle, no changes needed
            pass

    # If there are three parts, meend is always in the middle
    elif len(valid_parts) == 3:
        mid_part = valid_parts["mid"]
        left_part = valid_parts["left"]
        right_part = valid_parts["right"]

    return left_part, mid_part, right_part

# Function to update kann swar and meend lists
def update_kann_swar_and_meend_lists(subgroup_results):
    """
    Function to update kann swar and meend lists based on segmentation.
    
    Parameters:
    - subgroup_results: Dictionary containing subgroup results.
    """
    for subgroup_range, results in subgroup_results.items():
        kann_swar_list = results['kann_swar_list']
        swar_list = results['swar_list']
        
        # Initialize meend list with empty values
        meend_list = ['' for _ in range(len(swar_list))]
        
        i = 0
        while i < len(kann_swar_list):
            if kann_swar_list[i]:  # Check if the list is not empty
                image_path = kann_swar_list[i][0]
                # Extract width from the filename
                filename = os.path.basename(image_path)
                width = int(filename.split('_w')[1].split('_')[0])
                
                if width > 20:  # Only process if width > 20
                    # Perform segmentation
                    left_part, mid_part, right_part = extract_alphabets_vertical(image_path)
                    
                    # Identify and structure meend and kann swar
                    left_part, mid_part, right_part = identify_meend_and_kann_swar(left_part, mid_part, right_part)
                    
                    if mid_part is not None:  # If meend is found
                        # Mark start of meend
                        meend_list[i] = 'S'
                        
                        # Calculate x + w for the current image
                        x = int(filename.split('_x')[1].split('_')[0])
                        w = width
                        x_end = x + w
                        
                        # Find the end of meend
                        j = i + 1
                        while j < len(swar_list):
                            swar_image_path = swar_list[j][0]
                            swar_filename = os.path.basename(swar_image_path)
                            swar_x = int(swar_filename.split('_x')[1].split('_')[0])
                            
                            if swar_x >= x_end:
                                break  # Stop if swar_x is outside meend area
                            j += 1
                        
                        # Mark end of meend
                        if j > i:
                            meend_list[j - 1] = 'E'
                        
                        # Update kann swar list based on segmentation
                        if left_part is not None:
                            kann_swar_list[i] = [save_segment(left_part, subgroup_range, i, 'left')]
                        if right_part is not None:
                            kann_swar_list[j - 1] = [save_segment(right_part, subgroup_range, j - 1, 'right')]
                        if left_part is None and right_part is None:
                            kann_swar_list[i] = []  # Remove the original image if no segmentation
                        
                        # Skip processed indices
                        i = j
                    else:
                        i += 1
                else:
                    i += 1
            else:
                i += 1
        
        # Update the subgroup results with the meend list
        subgroup_results[subgroup_range]['meend_list'] = meend_list

# Example usage
update_kann_swar_and_meend_lists(subgroup_results)

# Print the updated results for each subgroup
for subgroup_range, results in subgroup_results.items():
    print(f"Subgroup Range: {subgroup_range}")
    print(f"Kann Swar List: {results['kann_swar_list']}")
    print(f"Meend List: {results['meend_list']}")
    print(f"Swar List: {results['swar_list']}")
    print(f"Lyrics List: {results['lyrics_list']}")
    print(f"Swar Articulation Checks: {results['swar_articulation_checks']}")
    print(f"Lyrics Articulation Checks: {results['lyrics_articulation_checks']}")
    print("-" * 80)

Subgroup Range: (4, 6)
Kann Swar List: [['Analysis\\yaman_1_taal\\0_row4_col1_x102_y134_w8_h10.png'], [], [], ['Analysis\\yaman_1_taal\\0_row4_col4_x212_y134_w11_h9.png'], ['Analysis\\yaman_1_taal\\0_row4_col5_x249_y133_w10_h10.png'], [], ['Analysis\\yaman_1_taal\\0_row4_col7_x322_y130_w9_h12.png'], [], [], [], [], []]
Meend List: ['', '', '', '', '', '', '', '', '', '', '', '']
Swar List: [['Analysis\\yaman_1_taal\\0_row5_col1_x104_y147_w16_h16.png'], ['Analysis\\yaman_1_taal\\0_row5_col2_x145_y147_w16_h16.png'], ['Analysis\\yaman_1_taal\\0_row5_col3_x177_y146_w16_h16.png'], ['Analysis\\yaman_1_taal\\0_row5_col4_x216_y147_w15_h16.png'], ['Analysis\\yaman_1_taal\\0_row5_col5_x254_y143_w10_h20.png'], ['Analysis\\yaman_1_taal\\0_row5_col6_x295_y149_w9_h13.png'], ['Analysis\\yaman_1_taal\\0_row5_col7_x327_y150_w10_h12.png'], ['Analysis\\yaman_1_taal\\0_row5_col8_x364_y150_w9_h12.png'], ['Analysis\\yaman_1_taal\\0_row5_col9_x400_y144_w10_h17.png'], ['Analysis\\yaman_1_taal\\0_row5_col10_x4

In [14]:
import os
import cv2
import numpy as np
from skimage.morphology import binary_erosion, binary_dilation, square
from skimage import img_as_ubyte

# Define the path to the folder to store segmented images
segmented_folder_path = os.path.normpath('Analysis/yaman_1_taal_segmented')
os.makedirs(segmented_folder_path, exist_ok=True)

# Function to preprocess an image
def preprocess_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    thresh = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 21, 5)
    return thresh

# Function to separate articulation in an image
def separate_articulation(image):
    processed_image = preprocess_image(image)
    contours, _ = cv2.findContours(processed_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        if 10 < h < 21 and w > 25:
            upper_part = image[:y, :]
            if upper_part.shape[0] > 0:
                return upper_part, True  # Return the upper part and a flag indicating segmentation was successful
            break
    
    return image, False  # Return the original image and a flag indicating no segmentation

# Function to segment a word into multiple images
def segment_image(img):
    # Convert to grayscale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Apply Gaussian blur to reduce noise
    gray = cv2.GaussianBlur(gray, (5, 5), 0)

    # Apply simple binary thresholding and invert the image
    _, binary = cv2.threshold(gray, 150, 255, cv2.THRESH_BINARY_INV)

    # Define structuring elements
    structuring_element2 = np.ones((2, 2), dtype=bool)
    structuring_element_erosion = square(3)

    # Apply binary dilation to fill gaps
    dilated = binary_dilation(binary, footprint=structuring_element2)

    # Apply binary erosion to separate connected components
    eroded = binary_erosion(dilated, footprint=structuring_element_erosion)
    eroded = img_as_ubyte(eroded)  # Convert to uint8 for display purposes

    # Perform vertical projection to find potential cut lines
    vertical_projection = np.sum(eroded, axis=0)

    # Find cut points by identifying valleys in the projection with heuristic
    threshold = 0.15 * np.max(vertical_projection)
    valleys = [x for x, y in enumerate(vertical_projection) if y < threshold]

    # Apply heuristic: if two consecutive valleys are close, take the right one
    cut_points = []
    min_distance = 13
    i = 0
    while i < len(valleys) - 1:
        if (valleys[i + 1] - valleys[i]) < min_distance:
            cut_points.append(valleys[i + 1])
            i += 2  # Skip the next valley since we took the right one
        else:
            cut_points.append(valleys[i])
            i += 1
    if i == len(valleys) - 1:
        cut_points.append(valleys[i])  # Add the last valley if it's not processed

    # Ensure no duplicate cut points and sort them
    cut_points = sorted(set(cut_points))

    # Separate the image at cut points
    cut_images = []
    start = 0
    for cut_point in cut_points:
        if cut_point - start > 10:  # Ensure segments are large enough
            cut_image = img[:, start:cut_point]
            cut_images.append(cut_image)
            start = cut_point

    # Add the last segment
    cut_images.append(img[:, start:])

    return cut_images

# Function to merge segments based on height-to-width ratio
def merge_segments(segments):
    final_images = []
    i = 0
    while i < len(segments):
        current_image = segments[i]
        current_ratio = current_image.shape[0] / current_image.shape[1]

        ratio_threshold = 1.8

        if current_image.shape[0] > 35:
            ratio_threshold = 2.9
        
        # If the ratio is greater than the threshold and it's the first segment
        if current_ratio > ratio_threshold and i == 0:
            # Merge with the next segment
            if i + 1 < len(segments):
                current_image = np.hstack((current_image, segments[i + 1]))
                final_images.append(current_image)
                i += 2
            else:
                final_images.append(current_image)
                i += 1
        # If two or more consecutive segments have a ratio greater than the threshold
        elif i < len(segments) - 1 and (segments[i + 1].shape[0] / segments[i + 1].shape[1]) > ratio_threshold:
            while i < len(segments) - 1 and (segments[i + 1].shape[0] / segments[i + 1].shape[1]) > ratio_threshold:
                current_image = np.hstack((current_image, segments[i + 1]))
                i += 1
            final_images.append(current_image)
            i += 1
        # If the ratio is greater than the threshold and it's not the first segment
        elif current_ratio > ratio_threshold and i != 0:
            # Merge with the previous segment
            if final_images:
                final_images[-1] = np.hstack((final_images[-1], current_image))
            else:
                final_images.append(current_image)
            i += 1
        else:
            final_images.append(current_image)
            i += 1

    return final_images

# Function to process a single image, segment, and save the results in the provided folder
def segment_word(image_path, output_folder):
    # Load the image
    img = cv2.imread(image_path)
    if img is None:
        return []
    
    # Segment the image
    segmented_images = segment_image(img)
    
    # Merge segments based on height-to-width ratio
    final_images = merge_segments(segmented_images)
    
    # Save the segmented images
    image_base_name = os.path.splitext(os.path.basename(image_path))[0]
    segmented_paths = []
    for i, segmented_image in enumerate(final_images):
        seg_image_path = os.path.normpath(os.path.join(output_folder, f'{image_base_name}_seg{i+1}.png'))
        cv2.imwrite(seg_image_path, segmented_image)
        segmented_paths.append(seg_image_path)
    
    return segmented_paths

# Function to update lists based on segmentation
def update_lists_with_segmentation(subgroup_results):
    for subgroup_range, results in subgroup_results.items():
        swar_list = results['swar_list']
        lyrics_list = results['lyrics_list']
        swar_articulation_checks = results['swar_articulation_checks']
        lyrics_articulation_checks = results['lyrics_articulation_checks']
        
        # Apply articulation segmentation to swar row
        for i in range(len(swar_list)):
            if not swar_articulation_checks[i] and swar_list[i]:  # Check if articulation is False and the list is not empty
                swar_image_path = swar_list[i][0]  # Get the image path
                swar_image = cv2.imread(swar_image_path)  # Load the image
                if swar_image is not None:
                    segmented_image, is_segmented = separate_articulation(swar_image)
                    if is_segmented:
                        swar_articulation_checks[i] = True  # Update articulation check
                        # Save the segmented image with the original name
                        original_name = os.path.basename(swar_image_path)
                        seg_image_path = os.path.normpath(os.path.join(segmented_folder_path, original_name))
                        cv2.imwrite(seg_image_path, segmented_image)
                        swar_list[i] = [seg_image_path]  # Update the list with the new image path
        
        # Apply articulation segmentation to lyrics row
        for i in range(len(lyrics_list)):
            if not lyrics_articulation_checks[i] and lyrics_list[i]:  # Check if articulation is False and the list is not empty
                lyrics_image_path = lyrics_list[i][0]  # Get the image path
                lyrics_image = cv2.imread(lyrics_image_path)  # Load the image
                if lyrics_image is not None:
                    segmented_image, is_segmented = separate_articulation(lyrics_image)
                    if is_segmented:
                        lyrics_articulation_checks[i] = True  # Update articulation check
                        # Save the segmented image with the original name
                        original_name = os.path.basename(lyrics_image_path)
                        seg_image_path = os.path.normpath(os.path.join(segmented_folder_path, original_name))
                        cv2.imwrite(seg_image_path, segmented_image)
                        lyrics_list[i] = [seg_image_path]  # Update the list with the new image path
        
        # Apply word segmentation to swar row
        for i in range(len(swar_list)):
            if swar_articulation_checks[i] and swar_list[i]:  # Check if articulation is True and the list is not empty
                swar_image_path = swar_list[i][0]  # Get the image path
                segmented_paths = segment_word(swar_image_path, segmented_folder_path)
                if segmented_paths:
                    swar_list[i] = segmented_paths  # Update the list with segmented image paths
        
        # Apply word segmentation to lyrics row
        for i in range(len(lyrics_list)):
            if lyrics_articulation_checks[i] and lyrics_list[i]:  # Check if articulation is True and the list is not empty
                lyrics_image_path = lyrics_list[i][0]  # Get the image path
                segmented_paths = segment_word(lyrics_image_path, segmented_folder_path)
                if segmented_paths:
                    lyrics_list[i] = segmented_paths  # Update the list with segmented image paths
        
        # Update the results
        subgroup_results[subgroup_range]['swar_list'] = swar_list
        subgroup_results[subgroup_range]['lyrics_list'] = lyrics_list
        subgroup_results[subgroup_range]['swar_articulation_checks'] = swar_articulation_checks
        subgroup_results[subgroup_range]['lyrics_articulation_checks'] = lyrics_articulation_checks

# Example usage
update_lists_with_segmentation(subgroup_results)

# Print the updated results for each subgroup
for subgroup_range, results in subgroup_results.items():
    print(f"Subgroup Range: {subgroup_range}")
    print(f"Meend List: {results['meend_list']}")
    print(f"Kann Swar List: {results['kann_swar_list']}")
    print(f"Swar List: {results['swar_list']}")
    print(f"Lyrics List: {results['lyrics_list']}")
    print(f"Swar Articulation Checks: {results['swar_articulation_checks']}")
    print(f"Lyrics Articulation Checks: {results['lyrics_articulation_checks']}")
    print("-" * 80)

Subgroup Range: (4, 6)
Meend List: ['', '', '', '', '', '', '', '', '', '', '', '']
Kann Swar List: [['Analysis\\yaman_1_taal\\0_row4_col1_x102_y134_w8_h10.png'], [], [], ['Analysis\\yaman_1_taal\\0_row4_col4_x212_y134_w11_h9.png'], ['Analysis\\yaman_1_taal\\0_row4_col5_x249_y133_w10_h10.png'], [], ['Analysis\\yaman_1_taal\\0_row4_col7_x322_y130_w9_h12.png'], [], [], [], [], []]
Swar List: [['Analysis\\yaman_1_taal\\0_row5_col1_x104_y147_w16_h16.png'], ['Analysis\\yaman_1_taal\\0_row5_col2_x145_y147_w16_h16.png'], ['Analysis\\yaman_1_taal\\0_row5_col3_x177_y146_w16_h16.png'], ['Analysis\\yaman_1_taal\\0_row5_col4_x216_y147_w15_h16.png'], ['Analysis\\yaman_1_taal\\0_row5_col5_x254_y143_w10_h20.png'], ['Analysis\\yaman_1_taal\\0_row5_col6_x295_y149_w9_h13.png'], ['Analysis\\yaman_1_taal\\0_row5_col7_x327_y150_w10_h12.png'], ['Analysis\\yaman_1_taal\\0_row5_col8_x364_y150_w9_h12.png'], ['Analysis\\yaman_1_taal\\0_row5_col9_x400_y144_w10_h17.png'], ['Analysis\\yaman_1_taal\\0_row5_col10_x4

In [15]:
import os
import cv2
import numpy as np
from tensorflow.keras.models import load_model

# Load the trained model
model = load_model('cnn_recognizer_music_15_v1.h5')

# Preprocess the input image
def preprocess_image(image_path):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError(f"Unable to read image at path: {image_path}")
    image = cv2.resize(image, (32, 32))  # Resize to match the model's input size
    image = np.expand_dims(image, axis=0)  # Add batch dimension
    return image

# Pass the image through the model and get predictions
def predict_class(image_path):
    preprocessed_image = preprocess_image(image_path)
    predictions = model.predict(preprocessed_image)
    predicted_class_index = np.argmax(predictions, axis=1)
    max_probability = np.max(predictions, axis=1)
    return predicted_class_index[0], max_probability[0]

# Define the classes
classes = ["सा", "रे", "ग", "म", "प", "ध", "नि", "रे॒", "ग॒", "ध॒", "नि॒", "म॑", 
           "सा\u0951", "रे\u0951", "ग\u0951", "म\u0951", "प\u0951", "ध\u0951", "रे॒\u0951", "ग॒\u0951", "म॑'", 
           "म\u093C", "म॑\u093C", "प\u093C", "ध॒\u093C", "ध\u093C", "नि॒\u093C", "नि\u093C", 
           ")", ",", "-", "४", "O", "(", "^^", "X", "३", "२", "|", "<_>"]

# Function to generate new lists with predicted class names
def generate_predicted_lists(subgroup_results):
    predicted_results = {}
    
    for subgroup_range, results in subgroup_results.items():
        # Initialize new lists for predicted class names
        predicted_swar_list = []
        predicted_kann_swar_list = []  # Add this if you have kann_swar_list
        
        # Predict class names for swar_list
        for image_paths in results['swar_list']:
            if image_paths:  # Check if the list is not empty
                predicted_classes = []
                for image_path in image_paths:
                    predicted_class_index, _ = predict_class(image_path)
                    predicted_class_name = classes[predicted_class_index]
                    predicted_classes.append(predicted_class_name)
                predicted_swar_list.append(predicted_classes)
            else:
                predicted_swar_list.append([])  # Append empty list for empty entries
        
        # Predict class names for kann_swar_list (if applicable)
        for image_paths in results['kann_swar_list']:
            if image_paths:  # Check if the list is not empty
                predicted_classes = []
                for image_path in image_paths:
                    predicted_class_index, _ = predict_class(image_path)
                    predicted_class_name = classes[predicted_class_index]
                    predicted_classes.append(predicted_class_name)
                predicted_kann_swar_list.append(predicted_classes)
            else:
                predicted_kann_swar_list.append([])  # Append empty list for empty entries
        
        # Store the predicted results for this subgroup
        predicted_results[subgroup_range] = {
            'predicted_swar_list': predicted_swar_list,
            'predicted_kann_swar_list': predicted_kann_swar_list  
        }
    
    return predicted_results

# Example usage
# Assuming subgroup_results is the dictionary you provided
predicted_results = generate_predicted_lists(subgroup_results)

# Print the predicted results
for subgroup_range, results in predicted_results.items():
    print(f"Subgroup Range: {subgroup_range}")
    print(f"Predicted Swar List: {results['predicted_swar_list']}")
    print(f"Predicted Kann Swar List: {results['predicted_kann_swar_list']}") 
    print("-" * 80)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 557ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3