In [269]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
from sklearn import linear_model
from scipy.signal import find_peaks
from scipy.ndimage import gaussian_filter1d
import csv


In [270]:
def load_image(image_path):
    im_rgb = cv2.cvtColor(cv2.imread(image_path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
    im_gray = cv2.cvtColor(im_rgb, cv2.COLOR_RGB2GRAY)
    return im_rgb, im_gray

def apply_thresholding(im_gray, threshold):
    _, binary = cv2.threshold(im_gray, threshold, 255, cv2.THRESH_BINARY)
    return binary

def detect_and_draw_lines(sobel_image, original_image, orientation="vertical"):
    # Apply binary threshold to prepare the Sobel image for Hough Transform
    # _, binary = cv2.threshold(sobel_image, 30, 255, cv2.THRESH_BINARY)

    # Perform Probabilistic Hough Line Transform
    lines = cv2.HoughLinesP(
        sobel_image,                    # Binary image (edge map)
        rho=1,                     # Distance resolution in pixels
        theta=np.pi/180,           # Angle resolution in radians
        threshold=25,             # Minimum number of intersecting points to detect a line
        minLineLength=10,          # Minimum length of line to be detected
        maxLineGap=10              # Maximum allowed gap between line segments
    )

    # Draw the detected lines on a copy of the original image
    result_image = original_image.copy()
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            if orientation == "vertical":
                # Draw only vertical lines (i.e., where x-coordinates are approximately the same)
                if abs(x1 - x2) < 10:  # Allowable tolerance for a "vertical" line
                    cv2.line(result_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
            elif orientation == "horizontal":
                # Draw only horizontal lines (i.e., where y-coordinates are approximately the same)
                if abs(y1 - y2) < 10:  # Allowable tolerance for a "horizontal" line
                    cv2.line(result_image, (x1, y1), (x2, y2), (255, 0, 0), 2)

    return result_image, lines

def plot_processing_log(processing_log):
    for i, (title, image) in enumerate(processing_log):
        plt.imshow(image, cmap='gray')
        plt.title(title)
        plt.axis('off')
        plt.show()

def process_image(im_path):
    # --- Load your image ---
    im_rgb, im_gray = load_image(im_path)
    processing_log = [('Original Image', im_rgb)]

    # --- Vertical Edge Detection (Sobel dx=1, dy=0) ---
    sobel_x = cv2.Sobel(im_gray, cv2.CV_64F, dx=1, dy=0, ksize=3)
    sobel_x = cv2.convertScaleAbs(sobel_x)
    processing_log.append(('Vertical Edge Enhancement (Sobel)', sobel_x))

    # --- Horizontal Edge Detection (Sobel dx=0, dy=1) ---
    sobel_y = cv2.Sobel(im_gray, cv2.CV_64F, dx=0, dy=1, ksize=3)
    sobel_y = cv2.convertScaleAbs(sobel_y)
    processing_log.append(('Horizontal Edge Enhancement (Sobel)', sobel_y))

    top_border, bottom_border, white_keys = detect_keys(sobel_x, sobel_y, processing_log, im_rgb)
    return top_border, bottom_border, white_keys, processing_log

def detect_keys(vertical_img, horizontal_img, processing_log, im_rgb):
    # _, vertical_bin = cv2.threshold(vertical_img, 170, 255, cv2.THRESH_BINARY)
    # _, horizontal_bin = cv2.threshold(horizontal_img, 170, 255, cv2.THRESH_BINARY)
    # plt.imshow(vertical_bin, cmap='gray')
    # plt.show()
    # plt.imshow(horizontal_bin, cmap='gray')
    # plt.show()
    row_sum_vertical = np.sum(vertical_img, axis=1)
    
    row_sum_horizontal = np.sum(horizontal_img, axis=1)
    column_sum_horizontal = np.sum(horizontal_img, axis=0)

    a = gaussian_filter1d(row_sum_horizontal, sigma=15)
    peaks, _ = find_peaks(a, height=1000, distance=5)
    plt.plot(a)
    plt.plot(peaks, a[peaks], "x")
    plt.show()
    top_row = peaks[0]
    bottom_row = peaks[-1]
    bottom_of_black_keys = peaks[-2]
    print(top_row, bottom_row)
    
    column_sum_vertical = np.sum(vertical_img[bottom_of_black_keys:bottom_row, :], axis=0)

    # 3. Detect vertical boundaries (for white keys)
    #    by summing the binary image across rows.
    # --------------------------------------------------
    # Sum each column -> large values = a vertical edge
    column_profile_smooth = gaussian_filter1d(column_sum_vertical, sigma=1)

    peaks, properties = find_peaks(column_profile_smooth, height=0, distance=20)
    peak_heights = properties['peak_heights']
    sorted_indices = np.argsort(peak_heights)[::-1]
    top_peaks = peaks[sorted_indices[:53]]
    top_peaks = np.sort(top_peaks)
    white_locations = top_peaks.copy()
    plt.plot(column_profile_smooth)
    plt.plot(top_peaks, column_profile_smooth[top_peaks], "x")
    plt.show()
    # Draw red dots at the detected peaks (x positions) on the original image
    for peak in top_peaks:
        cv2.circle(im_rgb, (peak, im_rgb.shape[0] // 2), 5, (255, 0, 0), -1)

    # Add the modified image with red dots to the processing log
    processing_log.append(('Detected Peaks', im_rgb))
    
    # Plot the image with red dots
    # plt.imshow(im_rgb)
    # plt.title('Detected Peaks')
    # plt.axis('off')
    # plt.show()

    # # 4. Detect black keys. 
    # # sum the row of y = bottom_of_black_keys, and find the peaks
    # # --------------------------------------------------
    # row_sum_at_black_keys =np.sum(horizontal_bin[bottom_of_black_keys-5:bottom_of_black_keys+5, :], axis=0)
    # plt.plot(row_sum_at_black_keys)
    # plt.show()
    row_sum_horizontal = np.sum(horizontal_img, axis=1)

    # a = gaussian_filter1d(row_sum_horizontal, sigma=1)
    peaks, _ = find_peaks(row_sum_horizontal, height=1000, distance=5)
    plt.plot(a)
    plt.title("Horizontal Bordes")
    plt.plot(peaks, a[peaks], "x")
    plt.show()

    top_border = peaks[0]
    white_keys = white_locations
    return top_border, bottom_row, white_keys



In [None]:
import pandas as pd
import json

for i in [5, 10, 21, 23, 24, 25, 26, 27]:
    path = f'dataset/MIDItest/miditest_videos/no_hand_frames/{i}/im.jpg'
    top_border, bottom_border, white_keys, processing_log = process_image(path)
    plot_processing_log(processing_log)
    print(len(white_keys))
    print(white_keys)
    white_keys_locations = [(int(i),int(j)) for i, j in zip(white_keys, white_keys[1:])]
    print(white_keys_locations)
    # Draw lines at the top and bottom borders
    im_rgb = cv2.cvtColor(cv2.imread(path, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
    for white_key in white_keys_locations:
        cv2.line(im_rgb, (white_key[0], top_border), (white_key[0], bottom_border), (255, 0, 0), 2)
    # plt.imshow(im_rgb[top_border:bottom_border, white_keys[0]:white_keys[1]], cmap='gray')
    plt.imshow(im_rgb)
    plt.show()

    border_information = {
        "top_border": int(top_border),
        "bottom_border": int(bottom_border),
        "white_keys": white_keys.tolist(),
        "white_keys_locations": white_keys_locations
    }

    output_path = f'dataset/MIDItest/miditest_videos/borders/{i}.json'
    with open(output_path, 'w') as json_file:
        json.dump(border_information, json_file)
