In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import glob
import csv
import os

In [13]:
def split_video_into_frames(video_path, output_folder):
    # Open the video file
    cap = cv2.VideoCapture(video_path)

    # Create the output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Read the video frame by frame
    frame_count = 0
    while True:
        ret, frame = cap.read()

        # Break the loop if we reach the end of the video
        if not ret:
            break

        # Save the frame as an image
        frame_count += 1
        frame_filename = f"{output_folder}/frame_{frame_count:02d}.jpg"
        cv2.imwrite(frame_filename, frame)

    # Release the video capture object
    cap.release()

    print(f"Frames extracted and saved to {output_folder}")


def merge_close_contours(contours, distance_threshold):
    """ Merge contours that are close to each other based on distance threshold. """
    centers = [np.mean(contour, axis=0)[0] for contour in contours]
    merged_contours = []
    used = [False] * len(contours)
    
    for i, center in enumerate(centers):
        if not used[i]:
            used[i] = True
            merged_contour = [contours[i]]
            for j, other_center in enumerate(centers):
                if not used[j] and np.linalg.norm(center - other_center) < distance_threshold:
                    used[j] = True
                    merged_contour.append(contours[j])
            merged_contours.append(np.vstack(merged_contour))
    
    return merged_contours

def numeric_sort(file_path):
    """ Helper function to extract numeric part for sorting """
    parts = file_path.split('_')
    numeric_part = int(parts[-1].split('.')[0])  # Assumes format 'frame_01.jpg'
    return numeric_part


# Function to track green dots and calculate properties with subtraction of red channel
def calculate_mean_pixels_with_tracking_sub(path_to_frames, output_csv_path, min_max_value,thresh,output_annotation):
    jpg_files = glob.glob(path_to_frames + '/*.jpg')
    jpg_files.sort(key=numeric_sort)  # Sort files numerically by frame number

    stage = path_to_frames.split(os.sep)[-2]
    video_num = path_to_frames.split(os.sep)[-1]

    with open(output_csv_path, 'a', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        frame_idx = 1
        dot_id = 1
        dot_id_map = defaultdict(lambda: None)
        
        output_folder = os.path.join(output_annotation+'/annotated_frames_thresh_'+thresh, stage, video_num)
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
        
        distance_threshold = 20  # Adjust this based on your specific needs
        
        for file in jpg_files:
            frame = cv2.imread(file)
            if frame is None:
                print("Error: Could not read the image.")
                continue
            
            # Adjust and process the frame
            blue_channel, green_channel, red_channel = frame[:,:,0], frame[:,:,1], frame[:,:,2]
            mask = red_channel > int(thresh)
            avg_green_value = np.mean(green_channel[mask])
            adjusted_green_channel = np.maximum(green_channel - avg_green_value, 0).astype(green_channel.dtype)
            frame[:, :, 1] = adjusted_green_channel

            _, binary_image = cv2.threshold(adjusted_green_channel, int(thresh), 255, cv2.THRESH_BINARY)
            min_contour_area = 5
            contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            filtered_contours = [contour for contour in contours if cv2.contourArea(contour) > min_contour_area]
            merged_contours = merge_close_contours(filtered_contours, distance_threshold)
            
            # Create a mask for the green dots
            mask = np.ones_like(adjusted_green_channel, dtype=np.uint8) * 255
            for contour in merged_contours:
                cv2.drawContours(mask, [contour], -1, 0, -1)  # Fill the contour area with black

            # Calculate the average pixel value excluding the green dots for each color channel
            avg_pixel_value_no_dots_b = cv2.mean(frame[:, :, 0], mask=mask)[0]
            avg_pixel_value_no_dots_g = cv2.mean(frame[:, :, 1], mask=mask)[0]
            avg_pixel_value_no_dots_r = cv2.mean(frame[:, :, 2], mask=mask)[0]

            new_dot_id_map = {}
            for contour in merged_contours:
                x, y, w, h = cv2.boundingRect(contour)
                roi = frame[y:y+h, x:x+w]
                mean_pixel_value = cv2.mean(roi)[1]
                dot_size = cv2.contourArea(contour)
                normalized_mean = (mean_pixel_value - min_max_value[0]) / (min_max_value[1] - min_max_value[0])
                green_pixel_count = np.sum(mask[y:y+h, x:x+w] == 255)
                pixel_sum = np.sum(roi[:, :, 1])

                center = (x + w // 2, y + h // 2)
                # Improved ID assignment logic to avoid close but distinct dots getting the same ID
                closest_dot_id = None
                closest_distance = float('inf')
                for prev_id, prev_center in dot_id_map.items():
                    if prev_center is not None:
                        distance = np.linalg.norm(np.array(center) - np.array(prev_center))
                        if distance < closest_distance:
                            closest_dot_id = prev_id
                            closest_distance = distance

                if closest_distance < 50:  # Lower threshold for same dot consideration
                    new_dot_id_map[closest_dot_id] = center
                    dot_id_to_use = closest_dot_id
                else:
                    new_dot_id_map[dot_id] = center
                    dot_id_to_use = dot_id
                    dot_id += 1
                
                text_position = (center[0] + 30, center[1])
                cv2.drawContours(frame, [contour], -1, (255, 255, 255), 2)
                cv2.putText(frame, str(dot_id_to_use), text_position, cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 3)  # Text in white
                csv_writer.writerow([video_num, stage, frame_idx, dot_id_to_use, x, y, mean_pixel_value, dot_size, normalized_mean, green_pixel_count, pixel_sum, avg_pixel_value_no_dots_b, avg_pixel_value_no_dots_g, avg_pixel_value_no_dots_r])

            dot_id_map.update(new_dot_id_map)
            frame_idx += 1
            
            annotated_frame_path = os.path.join(output_folder, f'frame_{frame_idx-1:04d}.jpg')
            cv2.imwrite(annotated_frame_path, frame)
            
    print('Processing complete.')
    
    
# Function to track green dots and calculate properties with no subtraction of red channel
def calculate_mean_pixels_with_tracking_no_sub(path_to_frames, output_csv_path, min_max_value, thresh,output_annotation):
    jpg_files = glob.glob(path_to_frames + '/*.jpg')
    jpg_files.sort(key=numeric_sort)  # Sort files numerically by frame number

    stage = path_to_frames.split(os.sep)[-2]
    video_num = path_to_frames.split(os.sep)[-1]

    with open(output_csv_path, 'a', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        frame_idx = 1
        dot_id = 1
        dot_id_map = defaultdict(lambda: None)  # Persistent mapping of dot IDs
        
        output_folder = os.path.join(output_annotation+'/annotated_frames_thresh_'+thresh, stage, video_num)
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
        
        distance_threshold = 20  # Adjust this based on your specific needs
        
        for file in jpg_files:
            frame = cv2.imread(file)
            if frame is None:
                print("Error: Could not read the image.")
                continue
            img = frame[:, :, 1]

            _, binary_image = cv2.threshold(img, int(thresh), 255, cv2.THRESH_BINARY)
            min_contour_area = 5
            contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
            filtered_contours = [contour for contour in contours if cv2.contourArea(contour) > min_contour_area]
            merged_contours = merge_close_contours(filtered_contours, distance_threshold)
            
            # Create a mask for the green dots
            mask = np.ones_like(img, dtype=np.uint8) * 255
            for contour in merged_contours:
                cv2.drawContours(mask, [contour], -1, 0, -1)  # Fill the contour area with black

            # Calculate the average pixel value excluding the green dots for each color channel
            avg_pixel_value_no_dots_b = cv2.mean(frame[:, :, 0], mask=mask)[0]
            avg_pixel_value_no_dots_g = cv2.mean(frame[:, :, 1], mask=mask)[0]
            avg_pixel_value_no_dots_r = cv2.mean(frame[:, :, 2], mask=mask)[0]

            new_dot_id_map = {}
            for contour in merged_contours:
                x, y, w, h = cv2.boundingRect(contour)
                roi = frame[y:y+h, x:x+w]
                mean_pixel_value = cv2.mean(roi)[1]
                dot_size = cv2.contourArea(contour)
                normalized_mean = (mean_pixel_value - min_max_value[0]) / (min_max_value[1] - min_max_value[0])
                green_pixel_count = np.sum(mask[y:y+h, x:x+w] == 255)
                pixel_sum = np.sum(roi[:, :, 1])

                center = (x + w // 2, y + h // 2)
                #logic to avoid close but distinct dots getting the same ID
                closest_dot_id = None
                closest_distance = float('inf')
                for prev_id, prev_center in dot_id_map.items():
                    if prev_center is not None:
                        distance = np.linalg.norm(np.array(center) - np.array(prev_center))
                        if distance < closest_distance:
                            closest_dot_id = prev_id
                            closest_distance = distance

                if closest_distance < 50:  # Lower threshold for same dot consideration
                    new_dot_id_map[closest_dot_id] = center
                    dot_id_to_use = closest_dot_id
                else:
                    new_dot_id_map[dot_id] = center
                    dot_id_to_use = dot_id
                    dot_id += 1
                text_position = (center[0] + 30, center[1])
                cv2.drawContours(frame, [contour], -1, (255, 255, 255), 2)
                cv2.putText(frame, str(dot_id_to_use), text_position, cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 3)  # Text in white

                csv_writer.writerow([video_num, stage, frame_idx, dot_id_to_use, x, y, mean_pixel_value, dot_size, normalized_mean, green_pixel_count, pixel_sum, avg_pixel_value_no_dots_b, avg_pixel_value_no_dots_g, avg_pixel_value_no_dots_r])

            dot_id_map.update(new_dot_id_map)
            frame_idx += 1
            
            annotated_frame_path = os.path.join(output_folder, f'frame_{frame_idx-1:04d}.jpg')
            cv2.imwrite(annotated_frame_path, frame)
            
    print('Processing complete.')    
    

def find_min_max_pixel_value_per_stage(path_to_frames,thresh):
    min_brightness = np.inf
    max_brightness = -np.inf
    frame = 1  
    jpg_files = glob.glob(path_to_frames + '/*.jpg')
    stage = path_to_frames.split('\\')[-2]
    video_num = path_to_frames.split('\\')[-1]
    
    for files in jpg_files:
        contour_index = 0
        image = cv2.imread(files)
        img = image[:, :, 1]
    
        if image is None:
            print("Error: Could not read the image.")
        else:
            # Threshold the image to create a binary image
            _, binary_image = cv2.threshold(img, int(thresh), 255, cv2.THRESH_BINARY)
    
            # Find contours in the binary image
            contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
            # Filter out small contours (assuming small contours correspond to noise)
            min_contour_area = 5
            filtered_contours = [contour for contour in contours if cv2.contourArea(contour) > min_contour_area]
    
            # Calculate the average pixel value for each contour
            for contour in filtered_contours:
                x, y, w, h = cv2.boundingRect(contour)
                roi = image[y:y+h, x:x+w]
                brightness = cv2.mean(roi)[1]
                if brightness < min_brightness:
                    min_brightness = brightness
                if brightness > max_brightness:
                    max_brightness = brightness
                contour_index +=1
        frame += 1
    return (min_brightness,max_brightness)    

### Getting frames out of the videos

In [10]:
# File should be in a format of videos/gene_type/stage_gene_videonum.avi
for gene in glob.glob("videos/*"):
    gene_type = gene.split('\\')
    for video_path in glob.glob("videos/"+gene_type[-1]+"/*"):
        splits_file = video_path.split('\\')[-1].split('.')[-2].split('_')
        split_video_into_frames(video_path,"./data/"+gene_type[-1]+"/"+splits_file[0]+"/"+splits_file[-1])

Frames extracted and saved to ./data/sog_d/nc11/1
Frames extracted and saved to ./data/sog_d/nc12/1
Frames extracted and saved to ./data/sog_d/nc12/2
Frames extracted and saved to ./data/sog_d/nc12/3
Frames extracted and saved to ./data/sog_d/nc12/4
Frames extracted and saved to ./data/sog_d/nc13/1
Frames extracted and saved to ./data/sog_d/nc13/2
Frames extracted and saved to ./data/sog_d/nc13/3
Frames extracted and saved to ./data/sog_d/nc13/4
Frames extracted and saved to ./data/sog_d/nc14/1
Frames extracted and saved to ./data/sog_d/nc14/2
Frames extracted and saved to ./data/sog_d/nc14/3
Frames extracted and saved to ./data/sog_d/nc14/4
Frames extracted and saved to ./data/sog_d/nc14/5
Frames extracted and saved to ./data/sog_d/nc14/6
Frames extracted and saved to ./data/sog_d/nc14/7
Frames extracted and saved to ./data/sog_d_su_h/nc12/1
Frames extracted and saved to ./data/sog_d_su_h/nc12/2
Frames extracted and saved to ./data/sog_d_su_h/nc12/3
Frames extracted and saved to ./dat

### Getting tracking data and dataset with subtraction of red channel

In [18]:
thresh = '55'
min_max_brightness = []
gene_type = "sog_d" #change this for different genes
for stage in glob.glob("data/"+gene_type+"/*"):
    stage_min = np.inf
    stage_max = -np.inf
    for video_path in glob.glob(stage+"\*"):
        _min,_max = find_min_max_pixel_value_per_stage(video_path,thresh)
        if _min < stage_min:
            stage_min = _min
        if _max > stage_max:
            stage_max = _max
    min_max_brightness.append((stage_min,stage_max))


stage_counter = 0
output_csv_path = "result/"+gene_type+"/avg_green_dot_sub/annotated_frames_thresh_"+thresh+"_"+gene_type+"_mean_pixel_normalized_tracking.csv"
if not os.path.exists("result/"+gene_type+"/avg_green_dot_sub/annotated_frames_thresh_"+thresh):
            os.makedirs("result/"+gene_type+"/avg_green_dot_sub/annotated_frames_thresh_"+thresh)
with open(output_csv_path, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(['VideoNum', 'Stage', 'Frame', 'DotID','X', 'Y','MeanPixelValue', 'DotSize', 'NormalizedMean', 'GreenPixelCount', 'PixelSum','AvgPixelValueNoDots_B', 'AvgPixelValueNoDots_G', 'AvgPixelValueNoDots_R'])
for frame_folder in glob.glob("data/"+gene_type+"/*"):
    for video in  glob.glob(frame_folder+"/*"):
        calculate_mean_pixels_with_tracking_sub(video,output_csv_path,min_max_brightness[stage_counter],thresh,"result/"+gene_type+"/avg_green_dot_sub")
    stage_counter+=1

### Getting tracking data and dataset with no subtraction of red channel

In [17]:
thresh = '55'
min_max_brightness = []
gene_type = "sog_d" #change this for different genes
for stage in glob.glob("data/"+gene_type+"/*"):
    stage_min = np.inf
    stage_max = -np.inf
    for video_path in glob.glob(stage+"\*"):
        _min,_max = find_min_max_pixel_value_per_stage(video_path,thresh)
        if _min < stage_min:
            stage_min = _min
        if _max > stage_max:
            stage_max = _max
    min_max_brightness.append((stage_min,stage_max))

stage_counter = 0
output_csv_path = "result/"+gene_type+"/avg_green_dot_no_sub/annotated_frames_thresh_"+thresh+"_"+gene_type+"_mean_pixel_normalized_pixel_tracking.csv"
if not os.path.exists("result/"+gene_type+"/avg_green_dot_no_sub/annotated_frames_thresh_"+thresh):
            os.makedirs("result/"+gene_type+"/avg_green_dot_no_sub/annotated_frames_thresh_"+thresh)
with open(output_csv_path, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        csv_writer.writerow(['VideoNum', 'Stage', 'Frame', 'DotID','X', 'Y','MeanPixelValue', 'DotSize', 'NormalizedMean', 'GreenPixelCount', 'PixelSum','AvgPixelValueNoDots_B', 'AvgPixelValueNoDots_G', 'AvgPixelValueNoDots_R'])
for frame_folder in glob.glob("data/"+gene_type+"/*"):
    for video in  glob.glob(frame_folder+"/*"):
        calculate_mean_pixels_with_tracking_no_sub(video,output_csv_path,min_max_brightness[stage_counter],thresh,"result/"+gene_type+"/avg_green_dot_no_sub")
    stage_counter+=1