In [1]:
import pandas as pd
import numpy as np
from glob import glob
import matplotlib.pyplot as plt
from PIL import Image
from IPython.display import display, clear_output
from tqdm import tqdm
import time

%matplotlib inline

In [2]:
plt.rc('text', usetex=True)
plt.rc('font', family='serif')

fig_width_pt = 244.0  # Get this from LaTeX using \the\columnwidth
text_width_pt = 508.0 # Get this from LaTeX using \the\textwidth

inches_per_pt = 1.0/72.27               # Convert pt to inches
golden_mean = (np.sqrt(5)-1.0)/2.0         # Aesthetic ratio
fig_width = fig_width_pt*inches_per_pt*1.5 # width in inches
fig_width_full = text_width_pt*inches_per_pt  # 17
fig_height =fig_width*golden_mean # height in inches
fig_size = [fig_width,fig_height] #(9,5.5) #(9, 4.5)

In [3]:
def show_image_and_wait_for_input(image_path):
    image = Image.open(image_path)  # Open the image using Pillow
    
    plt.figure(figsize=[10,10])  # Create a new figure
    plt.imshow(image)
    plt.axis('off')  # Hide axes
    display(plt.gcf())  # Display the image in Jupyter (non-blocking)
    
    user_input = None
    
    # Keep asking until a valid response ('y', 'n', or 'p')
    while user_input not in ['y', 'n', 'p']:
        user_input = input("Is the event real/significant? (y/n/p): ").strip().lower()

        if user_input in ['y', 'n', 'p']:
            clear_output(wait=True)  # Clear the current output (the image)
            plt.close()  # Close the figure window
    
    return user_input

In [4]:
folder = '/Users/zgl12/Modules/Kakapo/detected_events/'
figure_path = '/Users/zgl12/Modules/Kakapo/figures/'

files = sorted(glob(folder + '*.csv'))

figures_length = sorted(glob(figure_path + '*.png'))

print(f'Found {len(figures_length)} lightcurves')

time.sleep(1)

created_df = pd.DataFrame(columns=['figure_path', 'event', 'cluster', 'fwhm', 'roundness', 'snr', 'psfdiff', 
                                   'correlation', 'poisson_thresh', 'sig_med', 'y/n/p'])

for file in tqdm(files, total=len(files)):
    df = pd.read_csv(file)
    target = file.split('/')[-1].split('_')[-1].split('.csv')[0]
    end_part = file.split('/')[-1].split('events')[-1].split('.csv')[0]
    
    for i in range(len(df)):
        image_path = figure_path + 'figures' + end_part + f'_e{int(df.iloc[i]["cluster"])}.png'
        good_or_not = show_image_and_wait_for_input(image_path)
        
        line = df.iloc[i]
        
        created_df.loc[len(created_df)] = [image_path, target, line['cluster'], line['fwhm'], 
                                           line['roundness'], line['snr'], line['psfdiff'], 
                                  
                                  
                                  
                                  
                                           line['correlation'], line['poisson_thresh'], line['sig_med'], 
                                           good_or_not]
    # break
# c15 t204281574 





100%|██████████| 69/69 [1:22:14<00:00, 71.52s/it]


In [13]:
created_df.to_csv('trained_events.csv', index=False)