In [None]:
import os
import sys
import glob
import numpy as np
import pandas as pd

import astropy.units as u
from astropy.io import fits
from datetime import datetime
from matplotlib import pyplot as plt
from matplotlib.ticker import ScalarFormatter
from matplotlib import cm, colormaps, ticker, colors
from scipy import ndimage, optimize, stats, interpolate
from skimage import morphology, filters, exposure, measure

import sunpy.map
from sunpy.coordinates import frames
from astropy.coordinates import SkyCoord
from sunpy.map.maputils import all_coordinates_from_map, coordinate_is_on_solar_disk


import prepare_data
import detect
import plot_detection
from settings import *

%matplotlib inline
np.set_printoptions(precision=2, suppress=True)

# Data Inspection

## Prepare Data

Load lists of available datetimes of observations in the file system into memory.

For the usage of ACWE maps, the `DATE_RANGE` in settings.py must encompass all available ACWE map files.

In [None]:
# Extract He I observation datetimes from FITS files
HE_DATE_LIST = prepare_data.get_fits_date_list(
    DATE_RANGE, HE_DIR
)

# Extract magnetogram datetimes from 6302l FITS files
MAG_DATE_LIST = prepare_data.get_fits_date_list(
    DATE_RANGE, MAG_DIR
)

# Extract EUV datetimes from FITS files
EUV_DATE_LIST = prepare_data.get_fits_date_list(
    DATE_RANGE, EUV_DIR
)

# Extract ACWE datetimes from FITS files
ACWE_DATE_LIST = prepare_data.get_acwe_date_list(DATE_RANGE)

date_strs = [HE_DATE_LIST[0], HE_DATE_LIST[-1]]
file_date_str = f'{date_strs[0]}_{date_strs[-1]}'

num_maps = len(HE_DATE_LIST)
datetimes = [datetime.strptime(date_str, DICT_DATE_STR_FORMAT)
             for date_str in date_strs]
title_date_strs = [datetime.strftime(d, '%m/%d/%Y') for d in datetimes]
DATE_RANGE_SUPTITLE = (f'{num_maps} Maps Evaluated from '
                       + f'{title_date_strs[0]} to {title_date_strs[-1]}')

## Available Data

In [None]:
print('Available Datetimes for He I Observations:')
prepare_data.display_dates(HE_DATE_LIST)

In [None]:
print('Available Datetimes for Magnetograms:')
prepare_data.display_dates(MAG_DATE_LIST)

In [None]:
print('Available Datetimes for EUV Observations:')
prepare_data.display_dates(EUV_DATE_LIST)

In [None]:
print('Available Datetimes for ACWE Confidence Maps:')
prepare_data.display_dates(ACWE_DATE_LIST)

### Instrumental Darkening Effect

In [None]:
# Extract data into list
he_data_list = []

for he_date_str in HE_DATE_LIST:
    he_map = prepare_data.get_nso_sunpy_map(HE_DIR + he_date_str + '.fts')
    if not he_map:
        print(f'{he_date_str} He I observation extraction failed.')
    
    he_data_list.append(np.flipud(he_map.data))

Average of maps

In [None]:
avg_he_data = np.mean(np.array(he_data_list), axis=0)

plt.figure(figsize=(9,7))
plt.title(DATE_RANGE_SUPTITLE)
plt.imshow(avg_he_data,
         #   vmin=-10, vmax=10,
        #    vmin=-50, vmax=50,
           vmin=-100, vmax=100,
           cmap='RdBu')
plt.colorbar()

SVD

In [None]:
# Create data matrix to compute SVD
all_he_data = np.array(he_data_list)
num_maps = all_he_data.shape[0]
image_shape = all_he_data.shape[1:]

data_matrix = all_he_data.reshape(
    (num_maps, image_shape[0]*image_shape[1])
)
data_matrix = data_matrix.T

# # Retrieve and reshape an image from data matrix for verification
# img_idx = 0
# image = data_matrix[:,img_idx].reshape(image_shape)

In [None]:
U, S, Vh = np.linalg.svd(data_matrix, full_matrices=False)

In [None]:
# Exclude last in range
mode_range = [0,12]
num_modes = mode_range[-1] - mode_range[0]

num_cols = 2
image_size = 7
num_rows = int(np.ceil(num_modes/num_cols))
fig = plt.figure(figsize=(image_size*num_cols, image_size*num_rows))
axes = {}

for i in range(0,num_modes):
    axes[i] = fig.add_subplot(num_rows, num_cols, i + 1)
    
    mode = U[:,i].reshape(image_shape)
    bound = np.min([np.abs(np.min(mode)), np.max(mode)])/10
    
    axes[i].imshow(-mode, interpolation='none',
                   cmap=plt.cm.RdBu, vmin=-bound, vmax=bound)
    axes[i].set_title(f'Eigen-Sun {i}', fontsize=24)

In [None]:
fig = plt.figure(figsize=(6,4))
plt.semilogy(np.arange(S.size), S)
plt.ylabel('Singular Values')
plt.xlabel('Rank')

Rotate

In [None]:
out_dir = 'output/Rotated_He/'

for he_date_str in HE_DATE_LIST[:1]:
    he_date_str = '2015_06_06__16_08'
    he_map = prepare_data.get_nso_sunpy_map(HE_DIR + he_date_str + '.fts')
    if not he_map:
        print(f'{he_date_str} He I observation extraction failed.')
    
    # rotated_img_file = f'{out_dir}He{he_date_str}.jpg'
    # if os.path.isfile(rotated_img_file):
    #     print((f'He {he_date_str} rotation already exists.'))
    #     continue
    
    datetime = datetime.strptime(he_date_str, DICT_DATE_STR_FORMAT)
    P_angle = sunpy.coordinates.sun.P(time=datetime)

    he_map_rotated = he_map.rotate(angle=P_angle)
    fig = plt.figure(figsize=(5, 5))
    ax = plot_detection.plot_he_map(fig, (1,1,1), he_map_rotated, he_date_str)
# ax = plot_detection.plot_he_map(fig, (1, 1, 1), he_map, he_date_str)
#     fig = plt.figure(figsize=(4,4))
    
    # fig = plt.figure(dpi=300)
    # ax = fig.add_subplot()
    # im = ax.imshow(np.flipud(he_map_rotated.data), vmin=-50, vmax=50, cmap='RdBu',
    #                extent=[0,2700,2700,0])
    # ax.set_title(he_date_str)
    # fig.colorbar(im)

    # plt.savefig(rotated_img_file)
    # plt.close()
    # print(f'{he_date_str} rotated map saved')

In [None]:
detect.write_video(out_dir, fps=15)

### KPVT Data

In [None]:
# https://nispdata.nso.edu/ftp/kpvt/daily/medres/ 
fits_path = TEST_HE_DIR + '011219mag.fits'
kpvt_mag_disk_med_res_img = plot_detection.plot_raw_fits_content(
    fits_path, header_list=['UTDATE'], cmaps=[plt.cm.gray, plt.cm.gray],
    # print_header=True
)[0]
# https://nispdata.nso.edu/ftp/kpvt/daily/raw/
kpvt_he_fits_path = TEST_HE_DIR + '01dec19h.fits'
kpvt_he_disk_img = plot_detection.plot_raw_fits_content(
    kpvt_he_fits_path, header_list=['UTDATE'], cmaps=[plt.cm.gray, plt.cm.gray],
    # print_header=True
)[0]
# https://nispdata.nso.edu/ftp/kpvt/daily/raw/
kpvt_mag_fits_path = TEST_HE_DIR + '01dec19m.fits'
kpvt_mag_disk_img = plot_detection.plot_raw_fits_content(
    kpvt_mag_fits_path, header_list=['UTDATE'], cmaps=[plt.cm.gray],
    # print_header=True
)[0]
# https://nispdata.nso.edu/ftp/kpvt/synoptic/hel.hires/
fits_path = TEST_HE_DIR + 'hB1984.fits'
kpvt_he_synoptic_img = plot_detection.plot_raw_fits_content(
    fits_path, header_list=['DATE9'], cmaps=[plt.cm.gray],
    # print_header=True
)[0]
# /ftp/kpvt/synoptic/helium
fits_path = TEST_HE_DIR + 'h1984.fits'
kpvt_he_synoptic_low_res_img = plot_detection.plot_raw_fits_content(
    fits_path, header_list=['DATE'], cmaps=[plt.cm.gray],
    # print_header=True
)[0]

Visualize

In [None]:
titles = ['2001_12_19', 'CR1984']
plot_detection.plot_hists(
    [kpvt_he_disk_img, kpvt_he_synoptic_img], titles, semilogy=True
)

In [None]:
plt.imshow(kpvt_he_disk_img, vmin=-200, vmax=100, cmap='gray')

In [None]:
plt.imshow(kpvt_mag_disk_img, vmin=-50, vmax=50, cmap='gray')

In [None]:
plt.imshow(kpvt_he_synoptic_low_res_img, vmin=-300, vmax=100, cmap='gray')

### VSM Data

In [None]:
fits_path = HE_DIR + '2009_10_21__17_50.fts'
rockwell_he_disk_img = plot_detection.plot_raw_fits_content(
    fits_path, header_list=['DATE-OBS'], cmaps=[plt.cm.gray, plt.cm.afmhot],
    # print_header=True
)[0]
fits_path = HE_DIR + '2011_03_28__17_35.fts'
sarnoff_2011_he_disk_img = plot_detection.plot_raw_fits_content(
    fits_path, header_list=['DATE-OBS'], cmaps=[plt.cm.gray, plt.cm.afmhot],
    # print_header=True
)[0]
fits_path = HE_DIR + '2015_03_31__18_13.fts'
sarnoff_2015_he_disk_img = plot_detection.plot_raw_fits_content(
    fits_path, header_list=['DATE-OBS'], cmaps=[plt.cm.gray, plt.cm.afmhot],
    # print_header=True
)[0]
fits_path = TEST_HE_DIR + 'kbv2g150410t1408c2162_000_int-mas_dim-900.fits'
sarnoff_2015_he_synoptic_img = plot_detection.plot_raw_fits_content(
    fits_path, header_list=['DATE-OBS'], cmaps=[plt.cm.gray],
    # print_header=True
)[0]

Visualize

In [None]:
titles = ['2015_03_31__18_13', 'CR2162']
plot_detection.plot_hists(
    [sarnoff_2015_he_disk_img, sarnoff_2015_he_synoptic_img],
    titles, semilogy=True
)

In [None]:
plot_detection.plot_images(
    image_list=[rockwell_he_disk_img, sarnoff_2011_he_disk_img],
    title_list=['2009_10_21__17_50', '2011_03_28__17_35']
)

In [None]:
plt.imshow(rockwell_he_disk_img, vmin=-100, vmax=100, cmap='gray')

# Single Map Data

## Extract Data

Observations and pre-computed segmentations

### Observations

In [None]:
# Paper Cases ---------------------
# KPVT: H&H
# he_date_str = '2003_07_14__18_07'

# Rockwell: CH boundary difference
# he_date_str = '2004_11_13__16_31'

# Rockwell: Solar min case
# he_date_str = '2009_10_22__18_38'

# Sarnoff: Poster case
# he_date_str = '2012_06_11__18_01'

# Mini-Paper COSPAR Cases ---------

# Best case
# he_date_str = '2015_03_31__18_13'

# Not so bad case
he_date_str = '2015_06_06__16_08'

# Other COSPAR Cases --------------

# East axe
# he_date_str = '2015_01_04__20_30'

# N-S polar
# he_date_str = '2015_01_20__20_25'

# Center Hook
# he_date_str = '2015_02_10__18_45'

# QS blotch
# he_date_str = '2015_04_18__17_22'

# Other Cases ---------------------
# # Rockwell: Null detection
# he_date_str = '2004_11_20__17_07'

# Rockwell: Smiley
# he_date_str = '2004_12_03__16_36'

# Sarnoff: Pre-updated FITS
# he_date_str = '2012_04_01__17_03'

# Sarnoff: Start of Updated FITS in May
# he_date_str = '2012_05_01__18_08'

# Sarnoff: East limb hammer
# he_date_str = '2012_06_28__16_44'

# Failed limb detection
# he_date_str = '2012_07_08__19_37'

# Sarnoff: Greatest area in 06/2012
# he_date_str = '2012_06_09__19_20'

he_fits_file = DATA_FITS_FORMAT.format(
    data_dir=HE_DIR, date_str=he_date_str
)
he_map = prepare_data.get_nso_sunpy_map(he_fits_file)
he_map_data = np.flipud(he_map.data)

fig = plt.figure(figsize=(4, 4))
ax = plot_detection.plot_he_map(fig, (1, 1, 1), he_map, he_date_str)

In [None]:
mag_date_str = prepare_data.get_nearest_date_str(
    MAG_DATE_LIST, selected_date_str=he_date_str
)

# Extract magnetogram
mag_fits_file = DATA_FITS_FORMAT.format(
    data_dir=MAG_DIR, date_str=mag_date_str
)
mag_map = prepare_data.get_nso_sunpy_map(mag_fits_file)

fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(111, projection=mag_map)
mag_map.plot(axes=ax, vmin=-50, vmax=50)

In [None]:
euv_date_str = prepare_data.get_nearest_date_str(
    EUV_DATE_LIST, selected_date_str=he_date_str
)

# Extract euv map
euv_fits_file = DATA_FITS_FORMAT.format(
    data_dir=EUV_DIR, date_str=euv_date_str
)
euv_map = sunpy.map.Map(euv_fits_file)

fig = plt.figure(figsize=(4, 4))
ax = plot_detection.plot_euv_map(fig, (1, 1, 1), euv_map, euv_date_str)

### Pre-Processed Products

v0.5.1-v1.0

In [None]:
# Extract FITS file pre-processed map
pre_process_fits_file = (PREPROCESS_MAP_SAVE_DIR + he_date_str
                        + '_pre_processed_map.fits')
pre_processed_map = sunpy.map.Map(pre_process_fits_file)
pre_processed_map_data = np.flipud(pre_processed_map.data)

fig = plt.figure(figsize=(7, 4))
ax = fig.add_subplot(111, projection=pre_processed_map)
pre_processed_map.plot(axes=ax)

In [None]:
# Extract differentially rotated magnetogram map
reprojected_fits_file = (f'{ROTATED_MAG_SAVE_DIR}'
                         f'Mag{mag_date_str}_He{he_date_str}.fits')
reprojected_mag_map = sunpy.map.Map(reprojected_fits_file)

# Extract saved processed magnetogram
reprojected_smooth_file = (f'{ROTATED_MAG_SAVE_DIR}Mag{mag_date_str}'
                           f'_He{he_date_str}_smooth.fits')
reprojected_smooth_map = sunpy.map.Map(reprojected_smooth_file)

fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(111, projection=reprojected_mag_map)
reprojected_mag_map.plot(axes=ax, vmin=-50, vmax=50)

### Ensemble Map

v1.0+

In [None]:
# Extract saved ensemble map
ensemble_file = f'{DETECTION_MAP_SAVE_DIR}{he_date_str}_ensemble_map.fits'
ensemble_map = sunpy.map.Map(ensemble_file)
ensemble_map_data = np.flipud(ensemble_map.data)

fig = plt.figure(figsize=(6, 4))
ax = fig.add_subplot(111, projection=ensemble_map)
im = ensemble_map.plot(
    axes=ax, title=ensemble_map.observer_coordinate.obstime.value,
    cmap='magma', vmin=0, vmax=1)
plot_detection.plot_map_contours(ax, reprojected_smooth_map)
fig.colorbar(im)

In [None]:
# TODO: Delete
# PAPER_PLOT_DIR = 'paper/paper_plots/2024_08_plots/'
PAPER_PLOT_DIR = 'paper/mini_paper_plots/2024_08_plots/'

# DETECTION_VERSION_DIR = DETECT_DIR + 'v1_0/'
# DETECTION_VERSION_DIR = DETECT_DIR + 'v1_0_No_Thresh/'
DETECTION_VERSION_DIR = DETECT_DIR + 'v1_1/'
# DETECTION_VERSION_DIR = DETECT_DIR + 'v1_1_No_Thresh/'
DETECTION_MAP_SAVE_DIR = DETECTION_VERSION_DIR + 'Saved_fits_Files/'

### ACWE & Fused Map

In [None]:
acwe_date_str = prepare_data.get_nearest_date_str(
    ACWE_DATE_LIST, he_date_str
)
acwe_map = prepare_data.get_acwe_sunpy_map(
    acwe_date_str, ACWE_DATE_LIST
)

fig = plt.figure(figsize=(6, 4))
ax = fig.add_subplot(111, projection=acwe_map)
im = acwe_map.plot(
    axes=ax, title=acwe_map.observer_coordinate.obstime.value,
    cmap='viridis', vmin=0, vmax=1
)
fig.colorbar(im)

In [None]:
fused_file = f'{FUSED_MAP_SAVE_DIR}{he_date_str}_fused_map.fits'
fused_map = sunpy.map.Map(fused_file)

fig = plt.figure(figsize=(6, 4))
ax = fig.add_subplot(111, projection=fused_map)
im = fused_map.plot(
    axes=ax, title=fused_map.observer_coordinate.obstime.value,
    cmap='copper', vmin=0, vmax=1)
plot_detection.plot_map_contours(ax, reprojected_smooth_map)
fig.colorbar(im)

### Past STRIDE-CH Data Products

v0.1-v0.5 Pre-Proccessed

In [None]:
#  Extract saved pre-processed image array
pre_process_file = (PREPROCESS_NPY_SAVE_DIR + he_date_str
                    + '_pre_processed_map.npy')
pre_processed_map_data = np.load(pre_process_file, allow_pickle=True)[-1]
pre_processed_map = sunpy.map.Map(np.flipud(pre_processed_map_data), he_map.meta)

fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(111, projection=pre_processed_map)
pre_processed_map.plot(axes=ax, title='')

In [None]:
# Extract Heliographic coordinate reprojected magnetogram map
hg_mag_fits_file = (f'{HELIOGRAPH_MAG_SAVE_DIR}'
                    f'Mag{mag_date_str}_He{he_date_str}.fits')
hg_mag_map = sunpy.map.Map(hg_mag_fits_file)

fig = plt.figure(figsize=(7, 4))
ax = fig.add_subplot(111, projection=hg_mag_map)
hg_mag_map.plot(axes=ax, title='', vmin=-50, vmax=50)

v0.5.1

In [None]:
# Extract saved ensemble map
ensemble_file = f'{DETECTION_MAP_SAVE_DIR}{he_date_str}_ensemble_map.fits'
ensemble_map = sunpy.map.Map(ensemble_file)
ensemble_map_data = np.flipud(ensemble_map.data)

# Extract saved processed magnetogram
reprojected_smooth_file = (f'{ROTATED_MAG_SAVE_DIR}Mag{mag_date_str}'
                           f'_He{he_date_str}_smooth.fits')
reprojected_smooth_map = sunpy.map.Map(reprojected_smooth_file)

fig = plt.figure(figsize=(6, 4))
ax = fig.add_subplot(111, projection=ensemble_map)
im = ensemble_map.plot(axes=ax, title='', cmap='magma', vmin=0, vmax=100)
plot_detection.plot_map_contours(ax, reprojected_smooth_map)
fig.colorbar(im)

v0.2-0.5

In [None]:
# Extract saved ensemble map array and convert to Sunpy map
ensemble_file = f'{DETECTION_NPY_SAVE_DIR}{he_date_str}_ensemble_map.npy'
ensemble_map_data = np.load(ensemble_file, allow_pickle=True)[-1]
ensemble_map = sunpy.map.Map(np.flipud(ensemble_map_data), he_map.meta)
ensemble_map.plot_settings['cmap'] = colormaps['magma']

fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(111, projection=ensemble_map)
ensemble_map.plot(axes=ax, title='')

#### Single Mask

v0.5.1-v1.0

In [None]:
# Create testing ensemble map from a single segmentation
# percent_of_peak = 70
# morph_radius_dist = 15

percent_of_peak = 80
morph_radius_dist = 13

ch_mask_data = detect.get_ch_mask_list_v0_5_1(
    pre_processed_map, [percent_of_peak], [morph_radius_dist]
)[0]
ensemble_map_data = np.where(~np.isnan(np.flipud(pre_processed_map.data)), 0, np.nan)
ensemble_map_data = np.where(ch_mask_data, 100, ensemble_map_data)

ensemble_map = sunpy.map.Map(np.flipud(ensemble_map_data), pre_processed_map.meta)
ensemble_map.plot_settings['cmap'] = colormaps['magma']

fig = plt.figure(figsize=(7, 4))
ax = fig.add_subplot(111, projection=ensemble_map)
ensemble_map.plot(axes=ax, title='')

v0.5

In [None]:
# Extract saved single mask array and convert to Sunpy map
mask_file = f'{DETECTION_NPY_SAVE_DIR}{he_date_str}_ensemble_map.npy'
mask_data = np.load(mask_file, allow_pickle=True)[-1]
mask_map = sunpy.map.Map(np.flipud(mask_data), he_map.meta)
mask_map.plot_settings['cmap'] = colormaps['gray']

he_base_data = np.where(he_map.data == he_map.data[0,0], np.nan, he_map.data)
he_base_map = sunpy.map.Map(he_base_data, he_map.meta)

fig = plt.figure(figsize=(24, 5))

plot_detection.plot_he_map(fig, (1, 4, 1), he_map, he_date_str)

# Plot He I observation with overlayed detection contours
ax = fig.add_subplot(142, projection=he_map)
he_base_map.plot(axes=ax, vmin=-100, vmax=100, title=he_date_str,
                    cmap='afmhot')
for contour in mask_map.contour(0):
    ax.plot_coord(contour, color='black', linewidth=1)

plot_detection.plot_euv_map(fig, (1, 4, 3), euv_map, euv_date_str)

ax = fig.add_subplot(144, projection=he_map)
reprojected_mag_map.plot(axes=ax, vmin=-50, vmax=50, title=mag_date_str)
plot_detection.plot_map_contours(ax, reprojected_smooth_map)

## Inspect Observations

In [None]:
# Crop EUV map to similar zoom level to other observations 
he_submap = he_map.submap(
    bottom_left=SkyCoord(
        # Tx=-750*u.arcsec, Ty=-600*u.arcsec,
        Tx=-150*u.arcsec, Ty=-600*u.arcsec,
        frame=he_map.coordinate_frame
    ),
    top_right=SkyCoord(
        # Tx=0*u.arcsec, Ty=0*u.arcsec,
        Tx=600*u.arcsec, Ty=0*u.arcsec,
        frame=he_map.coordinate_frame
    )
)
he_submap.plot(vmin=-100, vmax=100)

In [None]:
hist_data = np.where(he_submap.data.flatten() == 0, np.nan, he_submap.data.flatten())
edges = np.arange(-100, 101, 2)

fig = plt.figure(figsize=(7,5), dpi=150)
ax = fig.add_axes(111)

data = ax.hist(
    hist_data, edges, histtype='step',
    color='white', edgecolor='black', linewidth=3,
)
ax.yaxis.set_major_formatter(ScalarFormatter(useMathText=True))
ax.ticklabel_format(axis='y', style='sci', scilimits=(0,0))

ax.set_ylabel('Counts')
ax.set_xlabel('Equivalent Width (m\u00C5)')

# Publication Plots

Requires data loaded from the Single Map Data section.

See Multi-Date Outcomes > Confidence Histograms for lat,lon histograms

In [None]:
# PAPER_PLOT_DATE_NAME = 'kpvt-'
# PAPER_PLOT_DATE_NAME = 'rockwell-decline-'
# PAPER_PLOT_DATE_NAME = 'rockwell-min-'
# PAPER_PLOT_DATE_NAME = 'sarnoff-'

# PAPER_PLOT_DATE_NAME = '2012-06-'
# PAPER_PLOT_DATE_NAME = '2015-03-'
# PAPER_PLOT_DATE_NAME = '2015-03-v1_0'
# PAPER_PLOT_DATE_NAME = '2015-04-05-'
# PAPER_PLOT_DATE_NAME = '2015-06-06-'
# PAPER_PLOT_DATE_NAME = '2015-06-06-v1_0-'
PAPER_PLOT_DATE_NAME = '2015-06-11-'

### Data

He I Image

In [None]:
pretty_mode = False

fig = plt.figure(figsize=(10, 10), dpi=300)

if pretty_mode:
    # Remove off disk pixels
    all_hp_coords = sunpy.map.maputils.all_coordinates_from_map(he_map)
    on_disk_mask = sunpy.map.maputils.coordinate_is_on_solar_disk(all_hp_coords)
    he_plot_map = sunpy.map.Map(
        np.where(on_disk_mask, he_map.data, -100), he_map.meta
    )
    
    darkened_cmap_array = colormaps['afmhot'](
        np.linspace(0, 0.9, 256)
    )
    plot_cmap = colors.ListedColormap(darkened_cmap_array)
    
    # Saturate post-2010 Sarnoff imagery at +/- 100mA
    ax = fig.add_subplot(111, projection=he_map)
    he_plot_map.plot(axes=ax, vmin=-100, vmax=100, cmap=plot_cmap)
    
    pretty_str = '-pretty'
else:
    he_plot_map = he_map
    pretty_str = ''
    
    ax = plot_detection.plot_he_map(
        fig, (1, 1, 1), he_plot_map, he_date_str
    )

ax.set_title('')
ax.coords.grid(False)
for coord in ax.coords:
    coord.set_ticks_visible(False)
    coord.set_ticklabel_visible(False)

plt.savefig(
    PAPER_PLOT_DIR + PAPER_PLOT_DATE_NAME + f'he{pretty_str}.jpeg',
    bbox_inches='tight'
)

EUV Image

In [None]:
fig = plt.figure(figsize=(10, 10), dpi=300)
ax = plot_detection.plot_euv_map(fig, (1, 1, 1), euv_map, euv_date_str)

ax.set_title('')
ax.coords.grid(False)
for coord in ax.coords:
    coord.set_ticks_visible(False)
    coord.set_ticklabel_visible(False)

plt.savefig(
    PAPER_PLOT_DIR + PAPER_PLOT_DATE_NAME + 'euv.jpeg',
    bbox_inches='tight'
)

Magnetogram

In [None]:
# Required to access Sunpy colormaps via matplotlib.colormaps
import sunpy.visualization.colormaps as cm

In [None]:
pretty_mode = False
pretty_cmap_thresh = 0.2

# fig = plt.figure(figsize=(10, 10), dpi=300)
fig = plt.figure(figsize=(10, 10), dpi=100)
ax = fig.add_subplot(111, projection=mag_map)

if pretty_mode:
    # Remove off disk pixels
    all_hp_coords = sunpy.map.maputils.all_coordinates_from_map(mag_map)
    on_disk_mask = sunpy.map.maputils.coordinate_is_on_solar_disk(all_hp_coords)
    mag_plot_map = sunpy.map.Map(
        np.where(on_disk_mask, mag_map.data, -2000), mag_map.meta
    )
    pretty_str = '-pretty'
    
    black_background = [0,0,0,1]
    
    # # Blue to black to red attempts
    # blue_black_cmap_array = colormaps['seismic_r'](
    #     np.linspace(0.5 + pretty_cmap_thresh, 1, 256)
    # )
    # black_red_cmap_array = colormaps['seismic_r'](
    #     np.linspace(0, 0.5 - pretty_cmap_thresh, 256)
    # )
    # plot_cmap = colors.ListedColormap(
    #     np.vstack((
    #         black_background, blue_black_cmap_array,
    #         # black_background, black_background,
    #         black_red_cmap_array
    #     ))
    # )
    
    brightened_cmap_array = colormaps['RdBu_r'](
        np.linspace(0, 1, 256)
    )
    plot_cmap = colors.ListedColormap(
        np.vstack((black_background, brightened_cmap_array))
    )
    mag_plot_map.plot(axes=ax, vmin=-50, vmax=50, cmap=plot_cmap)
    
    # # HMI colormap
    # hmi_cmap_array = colormaps['hmimag'](
    #     np.linspace(0, 1, 256)
    # )
    # plot_cmap = colors.ListedColormap(
    #     np.vstack((black_background, hmi_cmap_array))
    # )
    # mag_plot_map.plot(axes=ax, vmin=-1500, vmax=1500, cmap=plot_cmap)
else:
    mag_plot_map = sunpy.map.Map(
        np.where(mag_map.data == 0, -50, mag_map.data), mag_map.meta
    )
    mag_plot_map.plot(axes=ax, vmin=-50, vmax=50)
    
    pretty_str = ''

ax.set_title('')
ax.coords.grid(False)
for coord in ax.coords:
    coord.set_ticks_visible(False)
    coord.set_ticklabel_visible(False)

plt.savefig(
    PAPER_PLOT_DIR + PAPER_PLOT_DATE_NAME + f'mag{pretty_str}.jpeg',
    bbox_inches='tight'
)

### Ensemble Map

Ensemble Plot with Confidence, Posterior Probability, or Unipolarity-Labeled Colorbar

In [None]:
cb_type = 'confidence'
# cb_type = 'v1.1_no_thresh'
# cb_type = 'v1.0_no_thresh'
background_offset = -0.4
confidence_range = [0,1]
cmap = 'magma'

probability_threshold = 0.4
gray_below_thresh = ('no_thresh' in cb_type)

# Ensemble map to plot with the background solar disk set to an offset value
offset_bg_ensemble_map_data = np.where(
    ensemble_map.data == 0, background_offset, ensemble_map.data
)
ensemble_plot_map = sunpy.map.Map(offset_bg_ensemble_map_data, he_map.meta)

# Create colormap objects with brightened colors for nonzero-valued pixels.
# The plot colormap has a black background as well, but not the colorbar.
# Arrays are first sampled from colormap objects, then modified colormap
# objects are created. -------------------------------------------------------
black_background = [0,0,0,1]
zero_confidence_color_val = np.interp(
    0, [background_offset, 1], confidence_range
)

if gray_below_thresh:
    cmap_sample_num = int((1 - probability_threshold)*256)
    gray_cmap_array = colormaps['gray'](
        np.linspace(0.4, 0.6, int(probability_threshold*256))
    )
    
    cmap_array = colormaps[cmap](np.linspace(0, 1, cmap_sample_num))
    plot_cmap = colors.ListedColormap(
        np.vstack((black_background, gray_cmap_array, cmap_array))
    )
    
    brightened_cmap_array = colormaps[cmap](
        np.linspace(zero_confidence_color_val, 1, cmap_sample_num)
    )
    cb_cmap = colors.ListedColormap(
        np.vstack((gray_cmap_array, brightened_cmap_array))
    )
    gray_below_thresh_str = '-gray'
else:
    cmap_array = colormaps[cmap](np.linspace(0, 1, 256))
    plot_cmap = colors.ListedColormap(
        np.vstack((black_background, cmap_array))
    )

    brightened_cmap_array = colormaps[cmap](
    np.linspace(zero_confidence_color_val, 1, 256)
    )
    cb_cmap = colors.ListedColormap(brightened_cmap_array)
    gray_below_thresh_str = ''

# Plot figure -----------------------------------------------------------------
fig = plt.figure(figsize=(4, 3), dpi=300)
ax = fig.add_subplot(111, projection=ensemble_plot_map)
ensemble_plot_map.plot(
    axes=ax, cmap=plot_cmap, annotate=False,
    vmin=background_offset, vmax=confidence_range[1]
)
plot_detection.plot_map_contours(ax, reprojected_smooth_map)
ensemble_plot_map.draw_grid(axes=ax)

# Remove xy grid and ticks
ax.coords.grid(False)
for coord in ax.coords:
    coord.set_ticks_visible(False)
    coord.set_ticklabel_visible(False)

norm = colors.Normalize(vmin=0, vmax=1)
cb = plt.colorbar(cm.ScalarMappable(norm=norm, cmap=cb_cmap), ax=ax)
cb.ax.tick_params(labelsize=8)

if cb_type == 'confidence':
    cb.set_label('Confidence', labelpad=11, rotation=-90)
    plt.savefig(
        PAPER_PLOT_DIR + PAPER_PLOT_DATE_NAME + 'stride.jpeg',
        bbox_inches='tight'
    )
else:
    if cb_type == 'v1.1_no_thresh':
        cb.set_label(r'Posterior Probability', labelpad=13, rotation=-90)
    elif cb_type == 'v1.0_no_thresh':
        cb.set_label(r'Unipolarity $U$', labelpad=13, rotation=-90)
    
    plt.savefig(
        (PAPER_PLOT_DIR + PAPER_PLOT_DATE_NAME
         + f'no-thresh{gray_below_thresh_str}.jpeg'),
        bbox_inches='tight'
    )

Old v1.0 with optional dark or light mode

In [None]:
dark = True
background_offset = -0.4
confidence_range = [0,1]


# Ensemble map to plot with the background solar disk set to an offset value
if dark:
    cmap = 'magma'
    ensemble_plot_map = sunpy.map.Map(
        np.where(ensemble_map.data == 0, background_offset, ensemble_map.data), he_map.meta
    )
else:
    cmap = 'viridis'
    ensemble_plot_map = sunpy.map.Map(
        np.where(ensemble_map.data == 0, np.nan, ensemble_map.data), he_map.meta
    )

# Create colormap objects with brightened colors for nonzero-valued pixels,
# as well as a black background ------------------------------------------------
zero_confidence_color = np.interp(0, [background_offset, 1], confidence_range)
color_range = np.linspace(zero_confidence_color, 1, 256)
black_background = [0,0,0,1]
brightened_cmap_array = colormaps[cmap](color_range)

black_background_cmap = colors.ListedColormap(
    np.vstack((black_background, brightened_cmap_array))
)
cb_cmap = colors.ListedColormap(brightened_cmap_array)

# Plot figure -----------------------------------------------------------------
fig = plt.figure(figsize=(4, 3), dpi=300)
ax = fig.add_subplot(111, projection=ensemble_plot_map)
ensemble_plot_map.plot(
    axes=ax, cmap=black_background_cmap, annotate=False,
    vmin=confidence_range[0], vmax=confidence_range[1]
)
plot_detection.plot_map_contours(ax, reprojected_smooth_map)

if dark:
    ensemble_plot_map.draw_grid(axes=ax)
else:
    ensemble_plot_map.draw_grid(axes=ax, color='black')
    ensemble_plot_map.draw_limb(axes=ax, color='black')

# Remove xy grid and ticks
ax.coords.grid(False)
for coord in ax.coords:
    coord.set_ticks_visible(False)
    coord.set_ticklabel_visible(False)

norm = colors.Normalize(vmin=0, vmax=1)
cb = plt.colorbar(cm.ScalarMappable(norm=norm, cmap=cb_cmap), ax=ax)
# custom_font = {'fontname':'Times New Roman'}
cb.set_label(r'Unipolarity $U$', labelpad=13, rotation=-90)#, **custom_font)
cb.ax.tick_params(labelsize=8)

plt.savefig(
    PAPER_PLOT_DIR + PAPER_PLOT_DATE_NAME +'no-thresh.jpeg',
    bbox_inches='tight'
)

### ACWE Map

In [None]:
# background_offset = -0.2
background_offset = 0
confidence_range = [0,1]
cmap = 'inferno'


# ACWE map to plot with the background solar disk set to an offset value
offset_bg_acwe_map_data = np.where(
    acwe_map.data == 0, background_offset, acwe_map.data
)
acwe_plot_map = sunpy.map.Map(offset_bg_acwe_map_data, acwe_map.meta)

# Create colormap objects with brightened colors for nonzero-valued pixels.
# The plot colormap has a black background as well, but not the colorbar.
# Arrays are first sampled from colormap objects, then modified colormap
# objects are created. -------------------------------------------------------
black_background = [0,0,0,1]
zero_confidence_color_val = np.interp(
    0, [background_offset, 1], confidence_range
)

cmap_array = colormaps[cmap](np.linspace(0, 1, 256))
plot_cmap = colors.ListedColormap(
    np.vstack((black_background, cmap_array))
)

brightened_cmap_array = colormaps[cmap](
np.linspace(zero_confidence_color_val, 1, 256)
)
cb_cmap = colors.ListedColormap(brightened_cmap_array)

# Plot figure -----------------------------------------------------------------
fig = plt.figure(figsize=(4, 3), dpi=300)
ax = fig.add_subplot(111, projection=acwe_plot_map)
acwe_plot_map.plot(
    axes=ax, cmap=plot_cmap, annotate=False,
    vmin=background_offset, vmax=confidence_range[1]
)
plot_detection.plot_map_contours(ax, reprojected_smooth_map)
acwe_plot_map.draw_grid(axes=ax)

# Remove xy grid and ticks
ax.coords.grid(False)
for coord in ax.coords:
    coord.set_ticks_visible(False)
    coord.set_ticklabel_visible(False)

norm = colors.Normalize(vmin=0, vmax=1)
cb = plt.colorbar(cm.ScalarMappable(norm=norm, cmap=cb_cmap), ax=ax)
cb.ax.tick_params(labelsize=8)

cb.set_label('Confidence', labelpad=11, rotation=-90)
plt.savefig(
    PAPER_PLOT_DIR + PAPER_PLOT_DATE_NAME + f'acwe-{cmap}.jpeg',
    bbox_inches='tight'
)

### Fused Map

In [None]:
background_offset = -0.6
confidence_range = [0,1]
cmap = 'copper'

# fused map to plot with the background solar disk set to an offset value
offset_bg_fused_map_data = np.where(
    fused_map.data == 0, background_offset, fused_map.data
)
fused_plot_map = sunpy.map.Map(offset_bg_fused_map_data, he_map.meta)

# Create colormap objects with brightened colors for nonzero-valued pixels.
# The plot colormap has a black background as well, but not the colorbar.
# Arrays are first sampled from colormap objects, then modified colormap
# objects are created. -------------------------------------------------------
black_background = [0,0,0,1]
zero_confidence_color_val = np.interp(
    0, [background_offset, 1], confidence_range
)

cmap_array = colormaps[cmap](np.linspace(0, 1, 256))
plot_cmap = colors.ListedColormap(
    np.vstack((black_background, cmap_array))
)

brightened_cmap_array = colormaps[cmap](
np.linspace(zero_confidence_color_val, 1, 256)
)
cb_cmap = colors.ListedColormap(brightened_cmap_array)

# Plot figure -----------------------------------------------------------------
fig = plt.figure(figsize=(4, 3), dpi=300)
ax = fig.add_subplot(111, projection=fused_plot_map)
fused_plot_map.plot(
    axes=ax, cmap=plot_cmap, annotate=False,
    vmin=background_offset, vmax=confidence_range[1]
)
plot_detection.plot_map_contours(ax, reprojected_smooth_map)
fused_plot_map.draw_grid(axes=ax)

# Remove xy grid and ticks
ax.coords.grid(False)
for coord in ax.coords:
    coord.set_ticks_visible(False)
    coord.set_ticklabel_visible(False)

norm = colors.Normalize(vmin=0, vmax=1)
cb = plt.colorbar(cm.ScalarMappable(norm=norm, cmap=cb_cmap), ax=ax)
cb.ax.tick_params(labelsize=8)

cb.set_label('Confidence', labelpad=11, rotation=-90)
plt.savefig(
    PAPER_PLOT_DIR + PAPER_PLOT_DATE_NAME + 'fused.jpeg',
    bbox_inches='tight'
)

### Methods

He I Histogram

In [None]:
hist_data = np.where(he_map_data.flatten() == 0, np.nan, he_map_data.flatten())
edges = np.arange(-100, 101, 10)

fig = plt.figure(figsize=(7*0.6,5*0.6), dpi=300)
ax = fig.add_axes(111)

data = ax.hist(
    hist_data, edges, histtype='step',
    color='white', edgecolor='black', linewidth=2,
)
ax.yaxis.set_major_formatter(ScalarFormatter(useMathText=True))
ax.ticklabel_format(axis='y', style='sci', scilimits=(0,0))
ax.set_xticks([-100, -50, 0, 50, 100])

ax.set_ylabel('Counts')
ax.set_xlabel('He I Pixel Intensity')

fig.savefig(
    PAPER_PLOT_DIR + PAPER_PLOT_DATE_NAME + 'hist-he.jpeg',
    bbox_inches='tight'
)

Pre-Processed He I Histogram

In [None]:
hist_data = pre_processed_map_data.flatten()
edges = np.arange(-1, 1.1, 0.1)

fig = plt.figure(figsize=(7*0.6,5*0.6), dpi=300)
ax = fig.add_axes(111)

data = ax.hist(
    hist_data, edges, histtype='step',
    color='white', edgecolor='black', linewidth=2,
)
ax.yaxis.set_major_formatter(ScalarFormatter(useMathText=True))
ax.ticklabel_format(axis='y', style='sci', scilimits=(0,0))
ax.set_xticks([-1, -0.5, 0, 0.5, 1])

ax.set_ylabel('Counts')
ax.set_xlabel('Rescaled He I Pixel Intensity')

plt.savefig(
    PAPER_PLOT_DIR + PAPER_PLOT_DATE_NAME + 'hist-preproc.jpeg',
    bbox_inches='tight'
)

Threshold Mask

In [None]:
percent_of_peak = 90
# percent_of_peak = 70

thresh_bound = detect.get_thresh_bound(pre_processed_map_data, percent_of_peak)

thresh_mask = np.where(pre_processed_map_data > thresh_bound, 1, 0)
empty_disk = np.where(~np.isnan(pre_processed_map_data), 0, np.nan)
thresh_disk = np.where(thresh_mask, 1, empty_disk)
thresh_map = sunpy.map.Map(np.flipud(thresh_disk), he_map.meta)

# fig = plt.figure(figsize=(10,10))
# ax = fig.add_subplot(1, 1, 1)
# ax.imshow(thresh_disk, cmap='magma')

# ax.tick_params(left=False, right=False, labelleft=False,
#                labelbottom=False, bottom=False)

fig = plt.figure(figsize=(3, 3), dpi=300)
ax = fig.add_subplot(111, projection=thresh_map)
thresh_map.plot(axes=ax, cmap='magma', annotate=False)
thresh_map.draw_grid(axes=ax)


ax.coords.grid(False)
for coord in ax.coords:
    coord.set_ticks_visible(False)
    coord.set_ticklabel_visible(False)

plt.savefig(
    PAPER_PLOT_DIR + PAPER_PLOT_DATE_NAME + f't{percent_of_peak}.jpeg',
    bbox_inches='tight'
)

Preliminary Segmentation Mask

In [None]:
# percent_of_peak = 90
# morph_radius = 13

percent_of_peak = 70
morph_radius = 15

ch_mask = detect.get_ch_mask_list_v0_5_1(
    pre_processed_map, [percent_of_peak], [morph_radius]
)[0]

empty_disk = np.where(~np.isnan(pre_processed_map_data), 0, np.nan)
ch_disk = np.where(ch_mask, 1, empty_disk)
ch_map = sunpy.map.Map(np.flipud(ch_disk), he_map.meta)

# fig = plt.figure(figsize=(10,10))
# ax = fig.add_subplot(1, 1, 1)
# ax.imshow(ch_disk, cmap='magma')

# ax.tick_params(left=False, right=False, labelleft=False,
#                labelbottom=False, bottom=False)

fig = plt.figure(figsize=(3, 3), dpi=300)
ax = fig.add_subplot(111, projection=ch_map)
ch_map.plot(axes=ax, cmap='magma', annotate=False)
ch_map.draw_grid(axes=ax)


ax.coords.grid(False)
for coord in ax.coords:
    coord.set_ticks_visible(False)
    coord.set_ticklabel_visible(False)

plt.savefig(
    PAPER_PLOT_DIR + PAPER_PLOT_DATE_NAME + f't{percent_of_peak}-r{morph_radius}.jpeg',
    bbox_inches='tight'
)

## ROC

Plots the following:
- Community CH detection method scatter from COSPAR benchmark dataset
- STRIDE v1.1 ROC curves for training vs testing data

Pre-run CH Labels functions

In [None]:
train_fraction = 0.479 # 2012 only
outcomes_for_features = ['unipolarity', 'grad_median', 'cm_foreshort']
labeled_cand_df_file = CH_LABEL_DIR + '2024_06_06__20_29_outcomes.csv'

# Load LDA ---------------------------------------------------------------------
with open(LDA_FILE_NAME, 'rb') as lda_file:
    lda = pickle.load(lda_file)

# Load classified features and labels ------------------------------------------
labeled_cand_df = pd.read_csv(labeled_cand_df_file)
classify_cand_df = labeled_cand_df[labeled_cand_df['label_id'] != 0.5]
num_cands = classify_cand_df.shape[0]

X = np.array([classify_cand_df[outcome] for outcome in outcomes_for_features]).T
y = np.array(classify_cand_df['label_id'])

num_train_cands = int(train_fraction*num_cands)
X_train = X[:num_train_cands, :]
y_train = y[:num_train_cands]
X_test = X[num_train_cands:, :]
y_test = y[num_train_cands:]

In [None]:
scatter_probabilities = [0.1, 0.2, 0.3, 0.4, 0.8, 0.9]
probability_thresholds = np.linspace(0,1,500)

train_probabilities = lda.predict_proba(X_train)[:,1]
train_roc_rates = np.array(
    [get_roc_rates(train_probabilities, y_train, probability_thresh)
     for probability_thresh in probability_thresholds]
)

test_probabilities = lda.predict_proba(X_test)[:,1]
test_roc_rates = np.array(
    [get_roc_rates(test_probabilities, y_test, probability_thresh)
     for probability_thresh in probability_thresholds]
)
scatter_roc_rates = np.array(
    [get_roc_rates(test_probabilities, y_test, probability_thresh)
     for probability_thresh in scatter_probabilities]
)

In [None]:
p_thresh_idx = scatter_probabilities.index(0.4)
scatter_roc_rates[p_thresh_idx]

In [None]:
import seaborn as sns

colors = sns.color_palette('muted')
colors

In [None]:
zoom_mode = True
plt.figure(figsize=(3.5,3.5), dpi=300)

# Marker size
s = 20

# STRIDE ROC ----------------------------------------------------------------------
plt.plot(
    test_roc_rates[:,1], test_roc_rates[:,0], 
    label=r'STRIDE-CH', color='peru', 
    linewidth=2
)
# plt.plot(
#     train_roc_rates[:,1], train_roc_rates[:,0], 
#     label=r'STRIDE-CH Train', color='peru', 
#     linewidth=1
# )

# Corner COSPAR ------------------------------------------------------------------
plt.scatter(0.21,0.77, s,label='Baseline', color='k', marker='s')
plt.scatter(0,0.78, s,label='CHIMERA', color=colors[0])
# plt.scatter(0.04,0.84,label='SPOCA (o:Base | ^:HEK)')
plt.scatter(0.04,0.84, s,label='SPoCA-CH/-HEK', color=colors[1])
# plt.scatter(0.25,0.87,marker='^',color='tab:orange')  # SPOCA-HEK
plt.scatter(0.25,0.87, s,marker='^', color=colors[1])      # SPOCA-HEK
# plt.scatter(0.07,0.83,label='ACWE  (o:03     | ^:  04)')
plt.scatter(0.07,0.83, s,label='ACWE03/04', color=colors[2])
# plt.scatter(0.29,0.86,marker='^',color='tab:green')   # ACWE-04
plt.scatter(0.29,0.86, s,marker='^', color=colors[2])      # ACWE-04
plt.scatter(0.13,0.85, s,label='CRONNOS', color=colors[3])
plt.scatter(0.21,0.79, s,label='CHARM', color=colors[4])
plt.scatter(0.35,0.82, s,label='CNN193', color=colors[5])
plt.scatter(0.38,0.98, s,label='WWWBCS', color=colors[6])

if zoom_mode:
    zoom_str = '-zoom'
    plt.xlim([-0.015,0.45])
    plt.ylim([0.55,1.022])

    # STRIDE Scatter -------------------------------------------------
    plt.scatter(
        scatter_roc_rates[:,1], scatter_roc_rates[:,0],
        color='peru', marker='s'
    )
    for prob, xi, yi in zip(
            scatter_probabilities, scatter_roc_rates[:,1],
            scatter_roc_rates[:,0]
        ):
        
        if prob == 0.4:
            xytext = (-6, 6)
        else:
            xytext = (2, -10)
        
        plt.annotate(
            f'{prob:.1f}', xy=(xi, yi), xycoords='data', 
            xytext=xytext,    # Corner zoom
            # xytext=(-20, 8),  # Unzoomed
            textcoords='offset points', fontsize=7.5
        )
else:
    zoom_str = ''
    plt.xlim([-0.05,1.05])
    plt.ylim([-0.05,1.05])

    # All COSPAR ----------------------------------------------------
    plt.scatter(0.49,0.99, s,label='CHMAP', color=colors[7])
    plt.scatter(0.54,0.86, s,label='CHIPS', color=colors[8])
    plt.scatter(0.62,0.87, s,label='SYNCH', color=colors[9])
    plt.scatter(0.85,0.92, s,label='CHORTLE', marker='d', color=colors[0])


plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right', reverse=True, fontsize=7.5)

plt.savefig(
    PAPER_PLOT_DIR + f'ROC{zoom_str}.jpeg',
    bbox_inches='tight'
)

# Single Segmentation

Requires single map extraction from single map data section

## Pre-Process

### Versions

v0.5.1

In [None]:
pre_processed_map_v0_5_1_map = detect.pre_process_v0_5_1(he_map)

arrays = [he_map_data, np.flipud(pre_processed_map_v0_5_1_map.data)]
titles = ['L2 Observation', 'Pre-Processed Observation']

plot_detection.plot_hists(arrays, titles, semilogy=True)

v0.4

In [None]:
pre_process_v0_4_he = detect.pre_process_v0_4(he_map_data)

arrays = [he_map_data, pre_process_v0_4_he]
titles = ['L2 Observation', 'Pre-Processed Observation']

plot_detection.plot_hists(arrays, titles, semilogy=True)

In [None]:
# Compare dates
date_idx = 0
for he_date_str in HE_DATE_LIST[date_idx:date_idx + 3]:
    compare_he_fits_file = DATA_FITS_FORMAT.format(
        data_dir=HE_DIR, date_str=he_date_str
    )
    raw_he = prepare_data.get_image_from_fits(compare_he_fits_file)
    pre_process_v0_4_he = detect.pre_process_v0_4(raw_he)
    arrays = [raw_he, pre_process_v0_4_he]
    titles = [he_date_str, 'Pre-Processed']
    plot_detection.plot_hists(arrays, titles, semilogy=True)

v0.1

In [None]:
pre_process_v0_1_he, he_high_cut, he_nan = detect.pre_process_v0_1(
    he_map_data, peak_count_cutoff_percent=0.1
)

arrays = [he_map_data, he_nan, he_high_cut, pre_process_v0_1_he]
titles = ['he', 'he NaN', 'he High Cut', 'he Band Cut']

plot_detection.plot_hists(arrays[0:2], titles[0:2], semilogy=True)
plot_detection.plot_hists(arrays[2:4], titles[2:4], semilogy=True)

In [None]:
# Compare dates
date_idx = 0
for he_date_str in HE_DATE_LIST[date_idx:date_idx + 3]:
    compare_he_fits_file = DATA_FITS_FORMAT.format(
        data_dir=HE_DIR, date_str=he_date_str
    )
    raw_he = prepare_data.get_image_from_fits(compare_he_fits_file)
    pre_process_v0_1_he = detect.pre_process_v0_1(raw_he)[0]
    arrays = [raw_he, pre_process_v0_1_he]
    titles = [he_date_str, 'Pre-Processed']
    plot_detection.plot_hists(arrays, titles, semilogy=True)

vY: Pre-Processed Map Reprojected to Heliographic Coordinates

In [None]:
pre_processed_vY_map = detect.pre_process_vY(he_map)

fig = plt.figure(figsize=(11, 10))

ax = fig.add_subplot(2, 2, 1, projection=he_map)
he_map.plot(axes=ax, title=he_map.date)

ax = fig.add_subplot(2, 2, 2, projection=he_map)
he_map.plot(axes=ax, vmin=-100, vmax=100, title='+/-100 mAngstrom Saturation')

ax = fig.add_subplot(2, 2, (3,4), projection=pre_processed_vY_map)
pre_processed_vY_map.plot(axes=ax, title='')

### Save Outputs

v0.1-v0.4

In [None]:
overwrite = False

if not os.path.isdir(PREPROCESS_NPY_SAVE_DIR):
    os.makedirs(PREPROCESS_NPY_SAVE_DIR)

for he_date_str in HE_DATE_LIST:
    
    # Optionally overwrite existing files
    pre_process_file = (PREPROCESS_NPY_SAVE_DIR + he_date_str
                        + '_pre_processed_map.npy')
    if os.path.isfile(pre_process_file) and not overwrite:
        print((f'He {he_date_str} pre-processed map already exists.'))
        continue
    
    he_fits_file = DATA_FITS_FORMAT.format(
        data_dir=HE_DIR, date_str=he_date_str
    )
    he_map_data = prepare_data.get_image_from_fits(he_fits_file)
    
    # pre_processed_map_data = detect.pre_process_v0_1(he_map_data)[0]
    pre_processed_map_data = detect.pre_process_v0_4(he_map_data)
    
    save_list = [he_date_str, pre_processed_map_data]
    np.save(pre_process_file, np.array(save_list, dtype=object), 
            allow_pickle=True)
    print(f'{he_date_str} Pre-Processed Map Saved')

vY: Pre-Processed Map Reprojected to Heliographic Coordinates

In [None]:
overwrite = False

if not os.path.isdir(PREPROCESS_MAP_SAVE_DIR):
    os.makedirs(PREPROCESS_MAP_SAVE_DIR)

for he_date_str in HE_DATE_LIST:
    
    # Optionally overwrite existing files
    pre_process_file = (PREPROCESS_MAP_SAVE_DIR + he_date_str
                        + '_pre_processed_map.fits')
    if os.path.isfile(pre_process_file) and not overwrite:
        print((f'He {he_date_str} pre-processed map already exists.'))
        continue
    
    he_fits_file = DATA_FITS_FORMAT.format(
        data_dir=HE_DIR, date_str=he_date_str
    )
    he_map = prepare_data.get_nso_sunpy_map(he_fits_file)
    
    pre_processed_map = detect.pre_process_vY(he_map)
    
    pre_processed_map.save(pre_process_file, overwrite=overwrite)
    print(f'{he_date_str} Pre-Processed Map Saved')

HE I/EUV Ratio

In [None]:
overwrite = False


if not os.path.isdir(RATIO_SAVE_DIR):
    os.makedirs(RATIO_SAVE_DIR)

for he_date_str in HE_DATE_LIST:

    euv_date_str = prepare_data.get_nearest_date_str(
        EUV_DATE_LIST, selected_date_str=he_date_str
    )
    
    # Optionally overwrite existing files
    ratio_fits_file = f'{RATIO_SAVE_DIR}He{he_date_str}_EUV{euv_date_str}.fits'
    if os.path.isfile(ratio_fits_file):
        if overwrite:
            os.remove(ratio_fits_file)
        else:
            print((f'{he_date_str} to {euv_date_str} ratio already exists.'))
            continue
    
    # Extract He I observation
    he_fits_file = DATA_FITS_FORMAT.format(
        data_dir=HE_DIR, date_str=he_date_str
    )
    he_map = prepare_data.get_nso_sunpy_map(he_fits_file)
    if not he_map:
        print(f'{he_date_str} He I observation extraction failed.')
        continue
    
    # Remove error causing keywords which have invalid ascii content
    he_map.meta.pop('history')
    he_map.meta.pop('comment')
    
    # Extract and reproject EUV observation
    euv_fits_file = DATA_FITS_FORMAT.format(
        data_dir=EUV_DIR, date_str=euv_date_str
    )
    euv_map = sunpy.map.Map(euv_fits_file)
    reprojected_euv_map = prepare_data.diff_rotate(
        input_map=euv_map, target_map=he_map
    )
    
    # Pre-process He I data via background removal and upper cutoff
    # Satisfactory only for Sarnoff camera observations
    he_map_data = np.where(he_map.data == he_map.data[0,0],
                           np.nan, he_map.data)
    he_map_data = np.where(he_map_data >= np.percentile(he_map_data, 99.9),
                           np.nan, he_map_data)
    
    ratio_data = np.divide(he_map_data, (reprojected_euv_map.data)**0.5)
    ratio_map = sunpy.map.Map(ratio_data, he_map.meta)
    
    # Save to FITS files
    ratio_map.save(ratio_fits_file)
    print(f'He{he_date_str} to EUV{euv_date_str} ratio saved.')

He I/EUV Ratio Pre-Process

In [None]:
overwrite = False

if not os.path.isdir(PREPROCESS_NPY_SAVE_DIR):
    os.makedirs(PREPROCESS_NPY_SAVE_DIR)

for he_date_str in HE_DATE_LIST:
    
    euv_date_str = prepare_data.get_nearest_date_str(
        EUV_DATE_LIST, selected_date_str=he_date_str
    )
    
    # Optionally overwrite existing files
    pre_process_file = (PREPROCESS_NPY_SAVE_DIR + he_date_str
                        + '_pre_processed_map.npy')
    if os.path.isfile(pre_process_file) and not overwrite:
        print((f'He {he_date_str} pre-processed map already exists.'))
        continue

    ratio_fits_file = f'{RATIO_SAVE_DIR}He{he_date_str}_EUV{euv_date_str}.fits'

    ratio_map_data = prepare_data.get_image_from_fits(ratio_fits_file)
    
    # pre_processed_map_data = detect.pre_process_v0_1(ratio_map_data)[0]
    pre_processed_map_data = detect.pre_process_v0_4(ratio_map_data)
    
    save_list = [he_date_str, pre_processed_map_data]
    np.save(pre_process_file, np.array(save_list, dtype=object), 
            allow_pickle=True)
    print(f'{he_date_str} Pre-Processed Map Saved')

#### Ratio Comparison
He I, EUV, & He I/EUV Ratio

In [None]:
overwrite = False
out_dir = OUTPUT_DIR + 'Ratio_Comparison/' + DATE_DIR

if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

for he_date_str in HE_DATE_LIST:
    
    # Optionally overwrite existing files
    comparison_img_file = f'{out_dir}He{he_date_str}.jpg'
    if os.path.isfile(comparison_img_file) and not overwrite:
        print((f'He {he_date_str} ratio comparison already exists.'))
        continue

    euv_date_str = prepare_data.get_nearest_date_str(
        EUV_DATE_LIST, selected_date_str=he_date_str
    )
    
    # Extract observations and ratio map
    he_fits_file = DATA_FITS_FORMAT.format(
        data_dir=HE_DIR, date_str=he_date_str
    )
    he_map = prepare_data.get_nso_sunpy_map(he_fits_file)
    if not he_map:
        print(f'{he_date_str} He I observation extraction failed.')
        continue
    
    euv_fits_file = DATA_FITS_FORMAT.format(
        data_dir=EUV_DIR, date_str=euv_date_str
    )
    euv_map = sunpy.map.Map(euv_fits_file)
    
    ratio_fits_file = f'{RATIO_SAVE_DIR}He{he_date_str}_EUV{euv_date_str}.fits'
    ratio_map = sunpy.map.Map(ratio_fits_file)
    
    # Create panel plot
    fig = plt.figure(figsize=(18, 5))

    plot_detection.plot_he_map(fig, (1, 3, 1), he_map, he_date_str)

    plot_detection.plot_euv_map(fig, (1, 3, 2), euv_map, euv_date_str)

    ax = fig.add_subplot(133, projection=he_map)
    ratio_map.plot(axes=ax, cmap='jet', vmin=-1, vmax=6)
    
    # Save plot
    plt.savefig(comparison_img_file)
    plt.close(fig)
    print(f'He {he_date_str} map comparison saved.')

### Alternates

In [None]:
def remove_peak_counts(array):
    """Retrieve an array with the value of peak counts replaced with NaN.
    """
    peak_counts_val = detect.get_peak_counts_loc(array, bins_as_percent=False)
    zero_vals = (array > peak_counts_val - 1e-2) & (array < peak_counts_val + 1e-2)
    
    return np.where(zero_vals, np.NaN, array)

def band_pass(raw_he):
    """Pre-process equivalent width array by setting background to NaN
    and a simple brightness band pass.
    """
    he_nan = np.where(raw_he == 0, np.NaN, raw_he)
    
    he_high_cut = np.where(he_nan > 100, np.NaN, he_nan)
    # he_band_cut = np.where(he_high_cut < -100, np.NaN, he_high_cut)
    he_band_cut = np.clip(he_high_cut, -100, 100)
    
    return he_band_cut, he_high_cut, he_nan


def equalize(raw_he):
    """Pre-process equivalent width array by setting background to NaN
    and a simple brightness band pass.
    """
    # Histogram equalization
    he1 = exposure.equalize_hist(raw_he)
    he1 = detect.remove_background(he1)
    
    # Shift nonzero values into positive range and equalize histogram
    he2 = np.where(raw_he == 0, 0, raw_he + np.abs(np.min(raw_he)))
    he3 = exposure.equalize_hist(he2)
    
    he3 = np.where(he3 == np.min(he3), np.NaN, he3)
    
    return he3, he2, he1


def rescale(raw_he):
    """Pre-process equivalent width array by applying linear rescaling
    to normalize the contrast and setting background to NaN. Linear
    rescaling between 2-98 percentiles produces a less harsh contrast
    enhancement than histogram equalization.
    """
    p2, p98 = np.percentile(raw_he[~np.isnan(raw_he)], (2, 98))
    
    # Shift nonzero values into positive range and normalize
    he1 = np.where(raw_he == 0, 0, raw_he + np.abs(np.min(raw_he)))
    he2 = exposure.rescale_intensity(he1, in_range=(p2, p98))
    
    # Normalize directly
    he3 = exposure.rescale_intensity(raw_he, in_range=(p2, p98))
    he3 = detect.remove_background(he3)
        
    return he3, he2, he1


def rescale_center(raw_he):
    """Pre-process equivalent width array by applying linear rescaling
    to normalize the contrast, set background to NaN, and centering mode
    to zero.
    """
    p2, p98 = np.percentile(raw_he, (2, 98))
    
    # Linearly rescale
    he1 = exposure.rescale_intensity(raw_he, in_range=(p2, p98))    
    he2 = detect.remove_background(he1)
    
    # Center mode to zero
    peak_counts_val = detect.get_peak_counts_loc(he2, bins_as_percent=False)
    he3 = he2 - peak_counts_val + 1

    return he3, he2, he1

Band Pass

In [None]:
he_band_cut, he_high_cut, he_nan = band_pass(he_map_data)

arrays = [he_map_data, he_nan, he_high_cut, he_band_cut]
titles = ['he', 'he NaN', 'he High Cut', 'he Band Cut']

plot_detection.plot_hists(arrays[0:2], titles[0:2], semilogy=True)
plot_detection.plot_hists(arrays[2:4], titles[2:4], semilogy=True)

Equalize

In [None]:
he3, he2, he1 = equalize(he_map_data)

arrays = [he_map_data, he1, he2, he3]
titles = ['he', 'Equalized', 'Shifted', 'Shifted & Equalized']

plot_detection.plot_hists(arrays[0:2], titles[0:2], semilogy=True)
plot_detection.plot_hists(arrays[2:4], titles[2:4], semilogy=True)

In [None]:
hist, edges = detect.get_hist(he3, bins_as_percent=True, n=1000)
plt.semilogy(edges[0:-1], hist)
detect.get_peak_counts_loc(he3)

Rescaled

In [None]:
he3, he2, he1 = rescale(he_map_data)

arrays = [he_map_data, he3, he1, he2]
titles = ['he', 'Stretched', 'Shifted', 'Shifted & Stretched']

plot_detection.plot_hists(arrays[0:2], titles[0:2], semilogy=True)
plot_detection.plot_hists(arrays[2:4], titles[2:4], semilogy=True)

In [None]:
he3, he2, he1 = rescale_center(he_map_data)

arrays = [he_map_data, he1, he2, he3]
titles = ['he', 'Stretched', 'Shifted', 'Removed Background']

plot_detection.plot_hists(arrays[0:2], titles[0:2], semilogy=True)
plot_detection.plot_hists(arrays[2:4], titles[2:4], semilogy=True)

Pre-Processed Ratio

In [None]:
ratio_fits_file = f'{RATIO_SAVE_DIR}He{he_date_str}_EUV{euv_date_str}.fits'
raw_ratio = prepare_data.get_image_from_fits(ratio_fits_file)
ratio = detect.pre_process_v0_4(raw_ratio)

arrays = [raw_ratio, ratio]
titles = ['Raw Ratio', 'Pre-Processed Ratio']

plot_detection.plot_hists(arrays, titles, semilogy=True)

Off-Limb Masking

In [None]:
all_hp_coords = all_coordinates_from_map(he_map)
mask = coordinate_is_on_solar_disk(all_hp_coords)
limb_removed_he_map = sunpy.map.Map(
    np.where(mask, he_map.data, np.nan), he_map.meta
)

fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(111, projection=limb_removed_he_map)
limb_removed_he_map.plot(axes=ax, title='', vmin=-100, vmax=100)

In [None]:
all_hp_coords = all_coordinates_from_map(mag_map)
mask = coordinate_is_on_solar_disk(all_hp_coords)
limb_removed_mag_map = sunpy.map.Map(mag_map.data, mag_map.meta, mask=~mask)

fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(111, projection=limb_removed_mag_map)
limb_removed_mag_map.plot(axes=ax, title='', vmin=-50, vmax=50)

## Reprojection

Save Heliographic Reprojection Outputs

In [None]:
overwrite = True
smooth_size_percent = 10

if not os.path.isdir(HELIOGRAPH_MAG_SAVE_DIR):
    os.makedirs(HELIOGRAPH_MAG_SAVE_DIR)

for he_date_str in HE_DATE_LIST:

    mag_date_str = prepare_data.get_nearest_date_str(
        MAG_DATE_LIST, selected_date_str=he_date_str
    )
    
    fits_file_name = f'{HELIOGRAPH_MAG_SAVE_DIR}Mag{mag_date_str}_He{he_date_str}'
    reprojected_fits_file = f'{fits_file_name}.fits'
    reprojected_smooth_fits_file = f'{fits_file_name}_smooth.fits'
    
    # Optionally overwrite existing files
    if (os.path.isfile(reprojected_fits_file) or \
        os.path.isfile(reprojected_smooth_fits_file)) and not overwrite:
        print((f'{mag_date_str} magnetogram reprojected '
                + f'to {he_date_str} already exists.'))
        continue
    
    # Extract observations
    pre_process_file = (PREPROCESS_MAP_SAVE_DIR + he_date_str
                        + '_pre_processed_map.fits')
    pre_processed_map = sunpy.map.Map(pre_process_file)
    
    mag_fits_file = DATA_FITS_FORMAT.format(
        data_dir=MAG_DIR, date_str=mag_date_str
    )
    mag_map = prepare_data.get_nso_sunpy_map(mag_fits_file)

    # Process magnetogram
    hg_mag_map = detect.reproject_to_cea(mag_map)
    reprojected_mag_map = hg_mag_map.reproject_to(
        pre_processed_map.wcs, algorithm='adaptive'
    )
    
    smoothed_map = prepare_data.get_smoothed_map(mag_map, smooth_size_percent)
    hg_smoothed_map = detect.reproject_to_cea(smoothed_map)
    reprojected_smooth_map = hg_smoothed_map.reproject_to(
        pre_processed_map.wcs, algorithm='adaptive'
    )
    
    # Save to FITS files
    reprojected_mag_map.save(reprojected_fits_file, overwrite=overwrite)
    reprojected_smooth_map.save(reprojected_smooth_fits_file, overwrite=overwrite)
    print(f'{mag_date_str} magnetogram reprojected to {he_date_str} map saved.')

Differential rotation

In [None]:
# he_date_str = HE_DATE_LIST[3]
# mag_date_str = MAG_DATE_LIST[4]

he_fits_file = DATA_FITS_FORMAT.format(
   data_dir=HE_DIR, date_str=he_date_str
)
mag_fits_file = DATA_FITS_FORMAT.format(
    data_dir=MAG_DIR, date_str=mag_date_str
)
he_map = prepare_data.get_nso_sunpy_map(he_fits_file)
mag_map = prepare_data.get_nso_sunpy_map(mag_fits_file)

fig = plt.figure(figsize=(12, 5))

ax1 = fig.add_subplot(121, projection=mag_map)
mag_map.plot(axes=ax1, vmin=-50, vmax=50,
               title=f'Original: {mag_map.date}')

smoothed_map = prepare_data.get_smoothed_map(mag_map, smooth_size_percent=10)
plot_detection.plot_map_contours(ax1, smoothed_map)

reprojected_map = prepare_data.diff_rotate(
   input_map=mag_map, target_map=he_map
)

ax2 = fig.add_subplot(122, projection=reprojected_map)
reprojected_map.plot(axes=ax2, vmin=-50, vmax=50,
                     title=f'Reprojection: {reprojected_map.date}')

reprojected_smooth_map = prepare_data.diff_rotate(
   input_map=smoothed_map, target_map=he_map
)
plot_detection.plot_map_contours(ax2, reprojected_smooth_map)

Helioprojective Scale

In [None]:
# (HP Tx, Ty arcsec)/pix > (HP Tx, Ty arcsec)
Tx_scale = he_map.scale.axis1.to(u.arcsec/u.pix) * u.pix
Ty_scale = he_map.scale.axis2.to(u.arcsec/u.pix) * u.pix

f'Tx: {Tx_scale.value:.5f} Ty: {Ty_scale.value:.5f} arcsec'

In [None]:
# (HP Tx, Ty arcsec) > (HC Mm)
hp_delta_coords = frames.Helioprojective(
    he_map.scale.axis1*u.pix,
    he_map.scale.axis2*u.pix,
    observer='earth', obstime=he_map.date
)
hc_delta_coords = hp_delta_coords.transform_to(
    frames.Heliocentric(observer='earth', obstime=he_map.date)
)
f'x: {hc_delta_coords.x.to(u.Mm).value:.5f} y: {hc_delta_coords.x.to(u.Mm).value:.5f} Mm'

#### Carrington Non-CEA

In [None]:
# Reprojection map shape scaling factors
# Increase to increase map resolution and reduce distance scale / pixel
# Aim to match Helioprojective scale to preserve resolution
NON_CEA_LON_FACTOR = 1.55
NON_CEA_LAT_FACTOR = 1.55

# Obtain dimension  in image pixel number of the solar radius
Rs_hp_coord = SkyCoord(
    he_map.rsun_obs, 0*u.arcsec, frame='helioprojective',
    observer='earth', obstime=he_map.date
)
Rs_pixel_pair = he_map.world_to_pixel(Rs_hp_coord)
ref_pixel_pair = he_map.world_to_pixel(he_map.reference_coordinate)
Rs_dim = int((Rs_pixel_pair.x - ref_pixel_pair.x).value)

new_rows = int(2*Rs_dim*NON_CEA_LAT_FACTOR)
new_cols = int(4*Rs_dim*NON_CEA_LON_FACTOR)

hg_header = sunpy.map.header_helper.make_heliographic_header(
    he_map.date, he_map.observer_coordinate,
    shape=(new_rows, new_cols), frame='stonyhurst'
)

# Convert to 180 deg center longitude for Carrington map visualization
hg_header['crval1'] = 180
non_cea_map = he_map.reproject_to(
    hg_header#, algorithm='adaptive'
)

fig = plt.figure(figsize=(16, 5))

ax = fig.add_subplot(1, 3, 1, projection=he_map)
he_map.plot(axes=ax, vmin=-100, vmax=100, title=he_map.date)

ax = fig.add_subplot(1, 3, (2,3), projection=non_cea_map)
non_cea_map.plot(axes=ax, vmin=-100, vmax=100, title='')

(new_rows, new_cols)

Non-CEA Scale

In [None]:
# (HG lon, lat deg)/pix > (HG lon, lat deg)
lon_scale = non_cea_map.scale.axis1.to(u.deg/u.pix) * u.pix
lat_scale = non_cea_map.scale.axis2.to(u.deg/u.pix) * u.pix

f'lon: {lon_scale.value:.5f} lat: {lat_scale.value:.5f} deg'

In [None]:
# (HG lon, lat deg) > (HC Mm)
x_scale = (non_cea_map.rsun_meters * lon_scale.to(u.rad)/u.rad).to(u.Mm)
y_scale = (non_cea_map.rsun_meters * lat_scale.to(u.rad)/u.rad).to(u.Mm)

f'x: {x_scale.value:.5f} y: {y_scale.value:.5f} Mm'

In [None]:
# WRONG
# (HG lon, lat deg) > (HC Mm)
hg_delta_coords = frames.HeliographicStonyhurst(lon_scale, lat_scale)
hc_delta_coords = hg_delta_coords.transform_to(
    frames.Heliocentric(observer='earth', obstime=non_cea_map.date)
)

x_scale = hc_delta_coords.x.to(u.Mm)
y_scale = hc_delta_coords.y.to(u.Mm)
f'x: {x_scale.value:.5f} y: {y_scale.value:.5f} Mm'

# WRONG
# (HP??? lon, lat deg) > (HC Mm)
hp_delta_coords = frames.Helioprojective(
    lon_scale, lat_scale,
    observer='earth', obstime=he_map.date
)
hc_delta_coords = hp_delta_coords.transform_to(
    frames.Heliocentric(observer='earth', obstime=non_cea_map.date)
)

x_scale = hc_delta_coords.x.to(u.Mm)
y_scale = hc_delta_coords.y.to(u.Mm)
f'x: {x_scale.value:.5f} y: {y_scale.value:.5f} Mm'

#### Stonyhurst CEA

In [None]:
# Reprojection map shape scaling factors
# Increase to increase map resolution and reduce distance scale / pixel
# Aim to match Helioprojective scale within 0.01 tolerance to preserve resolution
CEA_X_SCALE_FACTOR = np.pi/2
CEA_Y_SCALE_FACTOR = 1

# Obtain dimension  in image pixel number of the solar radius
Rs_hp_coord = SkyCoord(
    he_map.rsun_obs, 0*u.arcsec, frame='helioprojective',
    observer='earth', obstime=he_map.date
)
Rs_pixel_pair = he_map.world_to_pixel(Rs_hp_coord)
ref_pixel_pair = he_map.world_to_pixel(he_map.reference_coordinate)
Rs_dim = int((Rs_pixel_pair.x - ref_pixel_pair.x).value)

new_row_num = int(2*Rs_dim*CEA_Y_SCALE_FACTOR)
new_col_num = int(4*Rs_dim*CEA_X_SCALE_FACTOR)

hg_header = sunpy.map.header_helper.make_heliographic_header(
    he_map.date,
    he_map.observer_coordinate,
    # observer,
    shape=(new_row_num, new_col_num), frame='stonyhurst',
    projection_code='CEA'
)

# Specify Earth-based observer for solar radius, distance to Sun,
# and Heliographic coordinates to avoid warning messages due to
# missing keywords
earth_hp_coords = frames.Helioprojective(
    Tx=0*u.arcsec, Ty=0*u.arcsec,
    observer='earth', obstime=he_map.date,
)
earth_header = sunpy.map.make_fitswcs_header((1,1), earth_hp_coords)
for earth_coord_key in ['RSUN_REF', 'DSUN_OBS', 'HGLN_OBS', 'HGLT_OBS']:
    hg_header[earth_coord_key] = earth_header[earth_coord_key]

cea_he_map = he_map.reproject_to(
    hg_header, #algorithm='adaptive'
)

# Crop map to within 90 degrees of the central meridian
top_right = SkyCoord(
    lon=90*u.deg, lat=90*u.deg, frame=cea_he_map.coordinate_frame
)
bottom_left = SkyCoord(
    lon=-90*u.deg, lat=-90*u.deg, frame=cea_he_map.coordinate_frame
)
cea_he_map = cea_he_map.submap(bottom_left, top_right=top_right)

fig = plt.figure(figsize=(8, 4))
ax = fig.add_subplot(111, projection=cea_he_map)
cea_he_map.plot(axes=ax, vmin=-100, vmax=100)

(new_row_num, new_col_num)

Raw Scale

In [None]:
# (HP Tx, Ty arcsec)/pix > (HP Tx, Ty arcsec)
Tx_scale = he_map.scale.axis1.to(u.arcsec/u.pix) * u.pix
Ty_scale = he_map.scale.axis2.to(u.arcsec/u.pix) * u.pix

f'Tx: {Tx_scale.value:.5f} Ty: {Ty_scale.value:.5f} arcsec'

In [None]:
# (HP Tx, Ty arcsec) > (HC Mm)
hp_delta_coords = frames.Helioprojective(
    he_map.scale.axis1*u.pix,
    he_map.scale.axis2*u.pix,
    observer='earth', obstime=he_map.date
)
hc_delta_coords = hp_delta_coords.transform_to(
    frames.Heliocentric(observer='earth', obstime=he_map.date)
)
f'x: {hc_delta_coords.x.to(u.Mm).value:.5f} y: {hc_delta_coords.x.to(u.Mm).value:.5f} Mm'

In [None]:
cea_mag_map = detect.reproject_to_cea(mag_map)

reprojected_mag_map = cea_mag_map.reproject_to(
    cea_he_map.wcs, #algorithm='adaptive'
)

fig = plt.figure(figsize=(8, 4))
ax = fig.add_subplot(111, projection=reprojected_mag_map)
reprojected_mag_map.plot(axes=ax, vmin=-50, vmax=50)

CEA Scale

In [None]:
# Scale Available: (HG lon, lat deg)/pix > (HG lon, lat deg)
lon_scale = cea_he_map.scale.axis1.to(u.deg/u.pix) * u.pix
lat_scale = cea_he_map.scale.axis2.to(u.deg/u.pix) * u.pix

f'lon: {lon_scale.value:.5f} lat: {lat_scale.value:.5f} deg'

In [None]:
# (HG lon, lat deg) > (HC Mm)
x_scale = (cea_he_map.rsun_meters * lon_scale.to(u.rad)/u.rad).to(u.Mm)
y_scale = (cea_he_map.rsun_meters * lat_scale.to(u.rad)/u.rad).to(u.Mm)

f'x: {x_scale.value:.5f} y: {y_scale.value:.5f} Mm'

In [None]:
# WRONG
# (HG lon, lat deg) > (HC Mm)
hg_delta_coords = frames.HeliographicStonyhurst(
    pre_processed_map.scale.axis1*u.pix,
    pre_processed_map.scale.axis2*u.pix,
)
hc_delta_coords = hg_delta_coords.transform_to(
    frames.Heliocentric(observer='earth', obstime=pre_processed_map.date)
)
x_scale = hc_delta_coords.x.to(u.Mm)
y_scale = hc_delta_coords.y.to(u.Mm)
f'x: {x_scale.value:.5f} y: {y_scale.value:.5f} Mm'

# WRONG
# (HP??? lon, lat deg) > (HC Mm)
hp_delta_coords = frames.Helioprojective(
    pre_processed_map.scale.axis1*u.pix,
    pre_processed_map.scale.axis2*u.pix,
    observer='earth', obstime=he_map.date
)
hc_delta_coords = hp_delta_coords.transform_to(
    frames.Heliocentric(observer='earth', obstime=pre_processed_map.date)
)
x_scale = hc_delta_coords.x.to(u.Mm)
y_scale = hc_delta_coords.y.to(u.Mm)
f'x: {x_scale.value:.5f} y: {y_scale.value:.5f} Mm'

## Preliminary Segmentations

In [None]:
GREEN = '#6ece58'
BLUE = '#3e4989'
ORANGE = '#fd9668'
PURPLE = '#721f81'


def get_thresh_px_percent_list(array, percent_of_peak_list):
    """Retrieve the area percentage of pixels accepted by varied thresholds.
    """
    thresh_bound_list = [
        detect.get_thresh_bound(array, percent_of_peak)
        for percent_of_peak in percent_of_peak_list
    ]
    px_percent_list = [
        np.count_nonzero(array > thresh_bound)*100/array.size
        for thresh_bound in thresh_bound_list
    ]
    return px_percent_list


def get_parameter_stats(outcome_list):
    """Retrieve maximum difference between segmentations in area percentage
    detected, the average area percentage at the max difference for a cutoff,
    the number selected below this cutoff, and differences in area percentage.
    """    
    outcome_diffs = np.abs(np.diff(outcome_list))

    max_diff_i = np.argmax(outcome_diffs)
    max_diff = np.max(outcome_diffs)*100/outcome_list[max_diff_i]
    
    cutoff = np.mean([outcome_list[max_diff_i], 
                      outcome_list[max_diff_i + 1]])

    selected_parameter_num = np.count_nonzero(outcome_list > cutoff)
    
    return max_diff, cutoff, selected_parameter_num, outcome_diffs


def plot_pixel_percent_bars(ax, parameter_list, pixel_percent_list,
                            max_diff, cutoff, selected_parameter_num,
                            step, title, unit, xlabel, thresh=True):
    bar_width = 0.8*step
    selected_parameters = parameter_list[selected_parameter_num:]
    
    ax.set_title(f'{title}\n Cutoff: {selected_parameters[0]}{unit} | ' +
                 f'Max Difference: {max_diff:.1f}%' , fontsize=28)
    ax.set_xlabel(xlabel, fontsize=24)
    
    ax.set_ylabel('Pixel Percentage (%)', fontsize=24)
    
    ax.plot([parameter_list[0] - step/2, parameter_list[-1] + step/2], [cutoff, cutoff], 
               linestyle='--', color='k', linewidth=3)
    
    if thresh:
        ax.bar(parameter_list, pixel_percent_list, 
               width=bar_width, color=BLUE)
        ax.bar(selected_parameters, 
               pixel_percent_list[selected_parameter_num:], 
               width=bar_width, color=GREEN)
    else:
        ax.bar(parameter_list, pixel_percent_list,
               width=bar_width, color=PURPLE)
        ax.bar(selected_parameters,
               pixel_percent_list[selected_parameter_num:], 
               width=bar_width, color=ORANGE)

### Threshold

#### Versions

v0.5.1 Pre-Process

In [None]:
pre_processed_v0_5_1_map = detect.pre_process_v0_5_1(he_map)

plot_detection.plot_thresholds(
    np.flipud(pre_processed_v0_5_1_map.data), bounds=[75, 90, 105],
    bounds_as_percent=True
)

v0.4 Pre-Process

In [None]:
pre_process_v0_4_he = detect.pre_process_v0_4(he_map_data)

plot_detection.plot_thresholds(
    pre_process_v0_4_he, bounds=[75, 90, 105], bounds_as_percent=True
)

v0.1 Pre-Process

In [None]:
pre_process_v0_1_he = detect.pre_process_v0_1(he_map_data)[0]

plot_detection.plot_thresholds(
    pre_process_v0_1_he, bounds=[75, 90, 105], bounds_as_percent=True
)

In [None]:
# Compare dates
date_idx = 0
for he_date_str in HE_DATE_LIST[date_idx:date_idx + 3]:
    compare_he_fits_file = DATA_FITS_FORMAT.format(
        data_dir=HE_DIR, date_str=he_date_str
    )
    raw_he = prepare_data.get_image_from_fits(compare_he_fits_file)
    he = detect.pre_process_v0_1(raw_he)[0]

    plot_detection.plot_thresholds(he, bounds=[75, 85, 100], bounds_as_percent=True)

In [None]:
# Parameter sweep
step = 5
percent_of_peak_lists = [
    list(np.arange(0,200,step)), list(np.arange(80,130,step))
]

for percent_of_peak_list in percent_of_peak_lists:
    px_percent_list = get_thresh_px_percent_list(pre_process_v0_1_he, percent_of_peak_list)
    
    parameter_stats = get_parameter_stats(px_percent_list)
    max_diff, cutoff, selected_parameter_num, pixel_percent_diffs = parameter_stats
    
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot()

    plot_pixel_percent_bars(
        ax, percent_of_peak_list, px_percent_list, max_diff, cutoff, selected_parameter_num,
        step, title='Threshold', unit='%', xlabel='Percent of Peak Pixel Count (%)', thresh=True)

vY Pre-Process: Pre-Processed Map Reprojected to Heliographic Coordinates

In [None]:
pre_processed_vY_map = detect.pre_process_vY(he_map)

plot_detection.plot_thresholds(
    np.flipud(pre_processed_vY_map.data), bounds=[75, 90, 105],
    bounds_as_percent=True
)

### Structuring Element Radius

In [None]:
def plot_varied_morph_radius(pre_processed_map_data, percent_of_peak_list,
                             morph_radius_list, ch_mask_list, px=False):
    plot_detection.plot_images([pre_processed_map_data], image_size=4)

    image_list = [pre_processed_map_data for _ in range(len(ch_mask_list))]
    axes = plot_detection.plot_image_grid(
        image_list, num_cols=3, cmap='afmhot', image_size=7
    )
    zipped_items = zip(axes.values(), percent_of_peak_list,
                       morph_radius_list, ch_mask_list)

    for ax, percent_of_peak, radius, ch_mask in zipped_items:
        if px:
            ax.set_title(f'{percent_of_peak:d}% of Peak | {radius:d}px Radius')
        else:
            ax.set_title((f'{percent_of_peak:d}% of Mode Threshold | '
                          f'{radius:d}Mm SE Disk Radius'))
        
        ax.tick_params(left=False, right=False, labelleft=False,
                       labelbottom=False, bottom=False)
            
        ax.contour(ch_mask, cmap='gray')

#### Versions

v0.5.1 Pre-Process

In [None]:
morph_radius_list = [8, 12, 16]
percent_of_peak_list = [80 for _ in range(len(morph_radius_list))]


pre_processed_v0_5_1_map = detect.pre_process_v0_5_1(he_map)

ch_mask_list = detect.get_ch_mask_list_v0_5_1(
    pre_processed_v0_5_1_map, percent_of_peak_list, morph_radius_list
)
plot_varied_morph_radius(
    np.flipud(pre_processed_v0_5_1_map.data), percent_of_peak_list,
    morph_radius_list, ch_mask_list
)

v0.4 Pre-Process

In [None]:
morph_radius_list = [12,16,20]
percent_of_peak_list = [90 for _ in range(len(morph_radius_list))]

pre_process_v0_4_he = detect.pre_process_v0_4(he_map_data)

ch_mask_list = [
    detect.get_ch_mask(pre_process_v0_4_he, percent_of_peak, morph_radius)
    for percent_of_peak, morph_radius
    in zip(percent_of_peak_list, morph_radius_list)
]
plot_varied_morph_radius(pre_process_v0_4_he, percent_of_peak_list,
                         morph_radius_list, ch_mask_list, px=True)

v0.1 Pre-Process

In [None]:
morph_radius_list = [12,16,20]
percent_of_peak_list = [90 for _ in range(len(morph_radius_list))]

pre_process_v0_1_he = detect.pre_process_v0_1(he_map_data)[0]

ch_mask_list = [
    detect.get_ch_mask(pre_process_v0_1_he, percent_of_peak, morph_radius)
    for percent_of_peak, morph_radius
    in zip(percent_of_peak_list, morph_radius_list)
]
plot_varied_morph_radius(pre_process_v0_1_he, percent_of_peak_list,
                         morph_radius_list, ch_mask_list, px=True)

In [None]:
# Parameter sweep
step = 2
morph_radius_lists = [
    list(np.arange(1,21,step)), list(np.arange(8,15,step))
]

for morph_radius_list in morph_radius_lists:
    percent_of_peak_list = [90 for _ in range(len(morph_radius_list))]
    ch_mask_list = [
        detect.get_ch_mask(pre_process_v0_1_he, percent_of_peak, morph_radius)
        for percent_of_peak, morph_radius
        in zip(percent_of_peak_list, morph_radius_list)
    ]
    
    px_percent_list = detect.get_px_percent_list(ch_mask_list)
    
    parameter_stats = get_parameter_stats(px_percent_list)
    max_diff, cutoff, selected_parameter_num, pixel_percent_diffs = parameter_stats
    
    fig = plt.figure(figsize=(20, 10))
    ax = fig.add_subplot()

    plot_pixel_percent_bars(
        ax, morph_radius_list, px_percent_list, max_diff, cutoff, selected_parameter_num,
        step, title='SE Disk Radius', unit='px', xlabel='SE Disk Radius (px)', thresh=False
    )

vY Pre-Process: Pre-Processed Map Reprojected to Heliographic Coordinates

In [None]:
he_date_str = HE_DATE_LIST[1]
morph_radius_list = [8, 12, 16]
percent_of_peak_list = [90 for _ in range(len(morph_radius_list))]


pre_processed_vY_map = detect.pre_process_vY(he_map)

ch_mask_list = detect.get_ch_mask_list_vY(
    pre_processed_vY_map, percent_of_peak_list, morph_radius_list
)
plot_varied_morph_radius(
    np.flipud(pre_processed_vY_map.data), percent_of_peak_list,
    morph_radius_list, ch_mask_list
)

#### Alternates

In [None]:
# Initial distance definition of SE disk radius ideas

# Area square: (HP Tx, Ty arcsec) > (HC Mm)
# Get HG coords: (pixel) > (HP Tx, Ty arcsec) > (HG lon, lat deg)
# Reproject CEA: (HP Tx, Ty arcsec) > (pixel)

# SE Disk Radius: (HC Mm) > (pixel)
# HC to HP/HG to pixel
# To pixel step fails
empty_dim_list = [0*u.km for _ in morph_radius_list]
morph_radius_hc_coords = SkyCoord(
    x=[morph_radius_dist*u.Mm for morph_radius_dist in morph_radius_list],
    y=empty_dim_list, z=empty_dim_list, frame='heliocentric',
    observer='earth', obstime=pre_processed_map.date
)
morph_radius_hp_coords = morph_radius_hc_coords.transform_to(
    frames.Helioprojective(observer='earth', obstime=pre_processed_map.date)
)
morph_radius_pixel_coord = pre_processed_map.world_to_pixel(
    morph_radius_hc_coords
).x
ref_pixel_coord = pre_processed_map.world_to_pixel(
    pre_processed_map.reference_coordinate
).x

### Save Single Mask Outputs

v0.2-v0.4

In [None]:
overwrite = False

percent_of_peak = 80
morph_radius = 18


if not os.path.isdir(DETECTION_NPY_SAVE_DIR):
    os.makedirs(DETECTION_NPY_SAVE_DIR)

for he_date_str in HE_DATE_LIST:
    
    # Optionally overwrite existing files
    mask_file = f'{DETECTION_NPY_SAVE_DIR}{he_date_str}_ensemble_map.npy'
    if os.path.isfile(mask_file) and not overwrite:
        print((f'He {he_date_str} single mask already exists.'))
        continue
    
    pre_process_file = (PREPROCESS_NPY_SAVE_DIR + he_date_str
                        + '_pre_processed_map.npy')
    pre_processed_map = np.load(pre_process_file, allow_pickle=True)[-1]

    ch_mask = detect.get_ch_mask(
        pre_processed_map, percent_of_peak, morph_radius
    )
    
    save_list = [he_date_str, ch_mask]
    np.save(mask_file, np.array(save_list, dtype=object), allow_pickle=True)
    print(f'{he_date_str} Single Mask Saved')

### Design Variable Grid

Includes fill and remove steps

In [None]:
def plot_design_var_grid(pre_processed_map_data, percent_of_peak_list,
                         morph_radius_list, ch_mask_list, num_cols):
    image_list = [pre_processed_map_data for _ in range(len(ch_mask_list))]
    axes = plot_detection.plot_image_grid(
        image_list, num_cols, cmap='afmhot', image_size=7
    )
    zipped_items = zip(axes.values(), percent_of_peak_list,
                    morph_radius_list, ch_mask_list)

    for ax, percent_of_peak, radius, ch_mask in zipped_items:
        ax.set_title(f'{percent_of_peak:d}% of Peak | {radius:d}Mm Radius')
        
        ax.tick_params(left=False, right=False, labelleft=False,
                        labelbottom=False, bottom=False)
            
        ax.contour(ch_mask, cmap='gray')

In [None]:
def plot_varied_masks(pre_processed_map_data, percent_of_peak_list,
                      morph_radius_list, ch_mask_list, title=True):
    """UNUSED: Plot each mask as an individual image with not axis ticks"""
    plot_detection.plot_images([pre_processed_map_data], image_size=4)
    
    zipped_items = zip(percent_of_peak_list,
                       morph_radius_list, ch_mask_list)
    
    for percent_of_peak, radius, ch_mask in zipped_items:
        
        empty_disk = np.where(~np.isnan(pre_processed_map_data), 0, np.nan)
        ch_disk = np.where(ch_mask, 1, empty_disk)
        
        axes = plot_detection.plot_image_grid(
            [ch_disk], num_cols=3, cmap='magma', image_size=7
        )
        
        if title:
            axes[0].set_title((f'{percent_of_peak:d}% of Mode Threshold | '
                               f'{radius:d}Mm SE Disk Radius'))
        
        axes[0].tick_params(left=False, right=False, labelleft=False,
                       labelbottom=False, bottom=False)

percent_of_peak_list = [90, 70]
morph_radius_list = [13, 15]

ch_mask_list = detect.get_ch_mask_list_v0_5_1(
    pre_processed_map, percent_of_peak_list, morph_radius_list
)
plot_varied_masks(
    np.flipud(pre_processed_map.data), percent_of_peak_list,
    morph_radius_list, ch_mask_list, title=False
)

#### Versions

##### v0.5.1 Pre-Process

Design Grid

In [None]:
# v0.5.1 SOLIS Design
percent_of_peak_list = [70, 70, 80, 90]
morph_radius_list = [   15, 17, 13, 13] # Mm

# # v0.5.1 KPVT Design
# percent_of_peak_list = [85, 105, 85, 95]
# morph_radius_list = [   17, 13, 15, 13] # Mm

ch_mask_list = detect.get_ch_mask_list_v0_5_1(
    pre_processed_map, percent_of_peak_list, morph_radius_list
)

plot_design_var_grid(
    pre_processed_map_data, percent_of_peak_list, morph_radius_list,
    ch_mask_list, num_cols=2
)

Exploratory Grid

In [None]:
# # Breaks up QS merger for COSPAR triangular CH
# percent_of_peak_list = [80, 70, 80, 90]
# morph_radius_list = [   12, 17, 13, 13] # Mm

percent_of_peaks = [80, 90, 100]
morph_radii = [      9, 13, 17] # Mm
percent_of_peak_list = [percent_of_peak 
                        for _ in morph_radii
                        for percent_of_peak in percent_of_peaks]
morph_radius_list = [morph_radius 
                     for morph_radius in reversed(morph_radii)
                     for _ in percent_of_peaks]

ch_mask_list = detect.get_ch_mask_list_v0_5_1(
    pre_processed_map, percent_of_peak_list, morph_radius_list
)

plot_design_var_grid(
    pre_processed_map_data, percent_of_peak_list, morph_radius_list,
    ch_mask_list, num_cols=len(percent_of_peaks)
)

##### vY Pre-Process
Pre-Processed Map Reprojected to Heliographic Coordinates

In [None]:
percent_of_peak_list = [90, 90]
morph_radius_list = [10, 15]

pre_processed_vY_map = detect.pre_process_vY(he_map)

ch_mask_list = detect.get_ch_mask_list_vY(
    pre_processed_vY_map, percent_of_peak_list, morph_radius_list
)
plot_varied_morph_radius(
    np.flipud(pre_processed_vY_map.data), percent_of_peak_list,
    morph_radius_list, ch_mask_list
)

## Ensemble

### Versions

Appropriate pre-processed products must be extracted

v1.1: Pre-run CH Labels functions and CH Labels > Load Saved Outcomes

In [None]:
with open(LDA_FILE_NAME, 'rb') as lda_file:
    lda = pickle.load(lda_file)

In [None]:
# v1.0 SOLIS Design
percent_of_peak_list = [70, 70, 80, 90]
morph_radius_list = [   15, 17, 13, 13] # Mm

# # v1.0 KPVT Design
# percent_of_peak_list = [85, 105, 85, 95]
# morph_radius_list = [   17, 13, 15, 13] # Mm

In [None]:
V1_1_CLASSIFY_FEATURES = ['unipolarity', 'grad_median', 'cm_foreshort']
from detect import *

morph_radius_dist_list = morph_radius_list
# TODO: load saved lda
# # def get_ensemble_v1_1(he_map_data, pre_processed_map, reprojected_mag_map,
# #                       percent_of_peak_list, morph_radius_dist_list,
# #                       probability_threshold):
# #     """Retrieve an ensemble of segmentations sorted by CH unipolarity.
    
# #     Args
#         # he_map_data: Numpy array of He I observation
#         # pre_processed_map: Sunpy map object to segment
#         # reprojected_mag_map: Sunpy map object of magnetogram reprojected
# #             to align with the ensemble map
# #         percent_of_peak_list: list of float percentage measured from the zero
# #             value up to or beyond the histogram value
# #         morph_radius_dist_list: list of float distances in Mm for radius of
# #             disk structuring element in morphological operations
# #         probability_threshold: float probability in [0,1) at which to
# #             threshold candidate CHs
# #     Returns
# #         Ensemble greyscale coronal holes mask sorted by unipolarity.
# #         List of coronal holes masks.
# #         List of confidence levels in mask layers.
# #         Confidence assignment metric list of unipolarity.
# #     """

# Create segmentation masks across the full solar disk of candidate
# regions for varied design variable combinations
full_disk_cand_mask_list = get_ch_mask_list_v0_5_1(
    pre_processed_map, percent_of_peak_list, morph_radius_dist_list
)

# List to be extended by masks for distinct CHs from all segmentations
cand_masks = []
ones_array = np.ones_like(he_map_data)

for full_disk_cand_mask in full_disk_cand_mask_list:
    cand_masks_in_full_disk_mask = get_map_data_by_ch(
        ones_array, full_disk_cand_mask
    )
    cand_masks.extend(cand_masks_in_full_disk_mask)

num_cand = len(cand_masks)

# Compute constant area per square pixel once for all CHs
A_per_square_px = get_A_per_square_px(pre_processed_map)

# Array to hold candidate feature values
cand_feature_array = np.zeros((num_cand, len(V1_1_CLASSIFY_FEATURES)))

# TODO: Takes 10s, speed-up?
for cand_idx in range(num_cand):
    distinct_cand_mask = cand_masks[cand_idx]
    distinct_cand_map = sunpy.map.Map(
        distinct_cand_mask, # Not flipping works right
        pre_processed_map.meta
    )
    outcome_dict = get_outcomes(
        distinct_cand_map, reprojected_mag_map, A_per_square_px,
        he_map_data=he_map_data
    )
    cand_feature_values = [
        outcome_dict[feature] for feature in V1_1_CLASSIFY_FEATURES
    ]
    cand_feature_array[cand_idx, :] = cand_feature_values

cand_probabilities = lda.predict_proba(cand_feature_array)[:,1]

# Sort unipolarities from greatest to least
sorted_idxs = np.argsort(cand_probabilities)

# Sort candidate regions from greatest to least predicted probability
cand_masks = [cand_masks[i] for i in sorted_idxs]
cand_probabilities = [
    cand_probabilities[i] for i in sorted_idxs
]

In [None]:
probability_threshold = 0 # All false alarms
# probability_threshold = 0.04 # No misses
# probability_threshold = 0.4 # Balance hits vs misses


# Assign confidence by probability above a threshold
confidence_levels = np.array(
    [(probability - probability_threshold)/(1 - probability_threshold)
    for probability in cand_probabilities]
)
confidence_levels = np.where(
    confidence_levels > 0, confidence_levels, 0
)

# Construct ensemble map by adding distinct CHs with assigned
# confidence level values to an empty base disk
ensemble_map_data = np.where(
    ~np.isnan(np.flipud(pre_processed_map.data)), 0, np.nan
)
for distinct_cand, confidence in zip(cand_masks, confidence_levels):
    ensemble_map_data = np.where(
        ~np.isnan(distinct_cand), confidence, ensemble_map_data
    )
# return ensemble_map_data, cand_masks, confidence_levels, cand_feature_array

In [None]:
# EUV comparison
nrows = 1
ensemble_map = sunpy.map.Map(
    np.flipud(ensemble_map_data), pre_processed_map.meta
)

fig = plt.figure(figsize=(21, 5))

# Plot He observation
ax1_gridspec = (nrows, 3, 1)
ax2_gridspec = (nrows, 3, 2)
ax3_gridspec = (nrows, 3, 3)

plot_detection.plot_he_map(fig, ax1_gridspec, he_map, he_date_str)

# Plot ensemble map with overlayed neutral lines
ax = plot_detection.plot_ensemble_map_v1_0(fig, ax2_gridspec, ensemble_map)
plot_detection.plot_map_contours(ax, reprojected_smooth_map)
ax.set_title(f'Probability Threshold {probability_threshold:.2f}')

plot_detection.plot_euv_map(fig, ax3_gridspec, euv_map, euv_date_str)
print()

In [None]:
# metric_list = list(cand_feature_array[:,0])
metric_list = cand_probabilities

plot_detection.plot_ensemble(
    pre_processed_map_data, ensemble_map_data, cand_masks,
    confidence_levels, metric_list
)

v0.5.1

In [None]:
# Conservative Design
# percent_of_peak_list = [80, 80, 90, 100]
# morph_radius_list = [   15, 17, 13, 13] # Mm

# Aggressive Design
percent_of_peak_list = [70, 70, 80, 90]
morph_radius_list = [   15, 17, 13, 13] # Mm

unipolarity_threshold = 0.5


out = detect.get_ensemble_v0_5_1(
    pre_processed_map, reprojected_mag_map,
    percent_of_peak_list, morph_radius_list,
    unipolarity_threshold
)
ensemble_map_data, masks_by_ch, confidence_list, unipolarity_by_ch = out

plot_detection.plot_ensemble(
    pre_processed_map_data, ensemble_map_data, masks_by_ch,
    confidence_list, unipolarity_by_ch
)

v0.5: Directly Assigned Unipolarity

In [None]:
percent_of_peak_list = [80,80, 90, 100,100]
morph_radius_list = [   18,20, 16, 16, 20] # px
unipolarity_threshold = 0.5


out = detect.get_ensemble_v0_5(
    pre_processed_map, reprojected_mag_map,
    percent_of_peak_list, morph_radius_list, unipolarity_threshold
)
ensemble_map_data, map_data_by_ch, confidence_list, unipolarity_by_ch = out

plot_detection.plot_ensemble(
    pre_processed_map_data, ensemble_map_data, map_data_by_ch,
    confidence_list, unipolarity_by_ch
)

v0.3: Evenly Assigned Smoothness

In [None]:
percent_of_peak_list = [80,80, 90, 100,100]
morph_radius_list = [   13,17, 15, 13, 17] # px


out = detect.get_ensemble_v0_3(
    pre_processed_map_data, percent_of_peak_list, morph_radius_list
)
ensemble_map, map_data_by_ch, confidence_list, gradient_medians = out
plot_detection.plot_ensemble(
    pre_processed_map_data, ensemble_map, map_data_by_ch,
    confidence_list, gradient_medians
)

v0.2: Detected Pixel Percentage Sort

In [None]:
percent_of_peak_list = [80,80,90,100,100]
morph_radius_list = [13,17,15,13,17] # px


out = detect.get_ensemble_v0_2(
    pre_processed_map_data, percent_of_peak_list, morph_radius_list
)
ensemble_map_data, ch_mask_list, confidence_list, px_percent_list = out
plot_detection.plot_ensemble(
    pre_processed_map_data, ensemble_map_data, ch_mask_list,
    confidence_list, px_percent_list, mask_contour=True
)

vY

In [None]:
percent_of_peak_list = [85, 73, 95, 85]
morph_radius_list = [   10, 14, 10, 14]
unipolarity_threshold = 0.5

out = detect.get_ensemble_vY(
    pre_processed_map, hg_mag_map,
    percent_of_peak_list, morph_radius_list,
    unipolarity_threshold
)
ensemble_map_data, masks_by_ch, confidence_list, unipolarity_by_ch = out

plot_detection.plot_ensemble(
    pre_processed_map_data, ensemble_map_data, masks_by_ch,
    confidence_list, unipolarity_by_ch
)

### Save Outputs

v0.5.1

In [None]:
overwrite = False

# v0.5.1 SOLIS Design
percent_of_peak_list = [70, 70, 80, 90]
morph_radius_list = [   15, 17, 13, 13] # Mm
# unipolarity_threshold = 0.5
unipolarity_threshold = 0

# # v0.5.1 KPVT Design
# percent_of_peak_list = [85, 105, 85, 95]
# morph_radius_list = [   17, 13, 15, 13] # Mm
# unipolarity_threshold = 0


if not os.path.isdir(DETECTION_MAP_SAVE_DIR):
    os.makedirs(DETECTION_MAP_SAVE_DIR)

for he_date_str in HE_DATE_LIST:
    
    # Optionally overwrite existing files
    ensemble_file = f'{DETECTION_MAP_SAVE_DIR}{he_date_str}_ensemble_map.fits'
    if os.path.isfile(ensemble_file) and not overwrite:
        print((f'He {he_date_str} ensemble map already exists.'))
        continue
    
    # Extract pre-processed map
    pre_process_file = (PREPROCESS_MAP_SAVE_DIR + he_date_str
                        + '_pre_processed_map.fits')
    pre_processed_map = sunpy.map.Map(pre_process_file)

    # Extract saved processed magnetograms
    mag_date_str = prepare_data.get_nearest_date_str(
        MAG_DATE_LIST, selected_date_str=he_date_str
    )
    reprojected_fits_file = (f'{ROTATED_MAG_SAVE_DIR}'
                             f'Mag{mag_date_str}_He{he_date_str}.fits')
    reprojected_mag_map = sunpy.map.Map(reprojected_fits_file)

    ensemble_map_data = detect.get_ensemble_v0_5_1(
        pre_processed_map, reprojected_mag_map,
        percent_of_peak_list, morph_radius_list,
        unipolarity_threshold
    )[0]
    ensemble_map = sunpy.map.Map(
        np.flipud(ensemble_map_data), pre_processed_map.meta
    )
    
    ensemble_map.save(ensemble_file, overwrite=overwrite)
    print(f'{he_date_str} Ensemble Map Saved')

v0.5

In [None]:
overwrite = False

percent_of_peak_list = [80,80, 90, 100,100]
morph_radius_list = [18,20, 16, 16,20]
unipolarity_threshold = 0.5


if not os.path.isdir(DETECTION_NPY_SAVE_DIR):
    os.makedirs(DETECTION_NPY_SAVE_DIR)

for he_date_str in HE_DATE_LIST:
    
    # Optionally overwrite existing files
    ensemble_file = f'{DETECTION_NPY_SAVE_DIR}{he_date_str}_ensemble_map.npy'
    if os.path.isfile(ensemble_file) and not overwrite:
        print((f'He {he_date_str} ensemble map already exists.'))
        continue
    
    # Extract He I observation
    he_fits_file = DATA_FITS_FORMAT.format(
        data_dir=HE_DIR, date_str=he_date_str
    )
    he_map = prepare_data.get_nso_sunpy_map(he_fits_file)

    pre_process_file = (PREPROCESS_NPY_SAVE_DIR + he_date_str
                        + '_pre_processed_map.npy')
    pre_processed_map_data = np.load(pre_process_file, allow_pickle=True)[-1]
    pre_processed_map = sunpy.map.Map(
        np.flipud(pre_processed_map_data), he_map.meta
    )

    # Extract saved processed magnetograms
    mag_date_str = prepare_data.get_nearest_date_str(
        MAG_DATE_LIST, selected_date_str=he_date_str
    )
    reprojected_fits_file = (f'{ROTATED_MAG_SAVE_DIR}'
                            f'Mag{mag_date_str}_He{he_date_str}.fits')
    reprojected_mag_map = sunpy.map.Map(reprojected_fits_file)

    ensemble_map_data = detect.get_ensemble_v0_5(
        pre_processed_map, reprojected_mag_map,
        percent_of_peak_list, morph_radius_list,
        unipolarity_threshold
    )[0]
    
    save_list = [he_date_str, ensemble_map_data]
    np.save(ensemble_file, np.array(save_list, dtype=object), allow_pickle=True)
    print(f'{he_date_str} Ensemble Map Saved')

v0.2-v0.4

In [None]:
overwrite = True

# percent_of_peak_list = [80,80, 90, 100,100]
# morph_radius_list = [18,20, 16, 16,20]
percent_of_peak_list = [100,100, 110, 120,120]
morph_radius_list = [18,20, 16, 16,20]


if not os.path.isdir(DETECTION_NPY_SAVE_DIR):
    os.makedirs(DETECTION_NPY_SAVE_DIR)

for he_date_str in HE_DATE_LIST:
    
    # Optionally overwrite existing files
    ensemble_file = f'{DETECTION_NPY_SAVE_DIR}{he_date_str}_ensemble_map.npy'
    if os.path.isfile(ensemble_file) and not overwrite:
        print((f'He {he_date_str} ensemble map already exists.'))
        continue
    
    pre_process_file = (PREPROCESS_NPY_SAVE_DIR + he_date_str
                        + '_pre_processed_map.npy')
    pre_processed_map = np.load(pre_process_file, allow_pickle=True)[-1]

    # ensemble_map_data = detect.get_ensemble_v0_2(
    #     pre_processed_map, percent_of_peak_list, morph_radius_list
    # )[0]
    ensemble_map_data = detect.get_ensemble_v0_3(
        pre_processed_map, percent_of_peak_list, morph_radius_list
    )[0]
    
    save_list = [he_date_str, ensemble_map_data]
    np.save(ensemble_file, np.array(save_list, dtype=object), allow_pickle=True)
    print(f'{he_date_str} Ensemble Map Saved')

vY

In [None]:
overwrite = False

# percent_of_peak_list = [  62, 68, 73, 80]
# morph_radius_list = [11, 13,  8, 10]
# unipolarity_threshold = 0.01
percent_of_peak_list = [85, 73, 95, 85]
morph_radius_list = [   10, 14, 10, 14]
unipolarity_threshold = 0.5



if not os.path.isdir(DETECTION_MAP_SAVE_DIR):
    os.makedirs(DETECTION_MAP_SAVE_DIR)

for he_date_str in HE_DATE_LIST:
    
    # Optionally overwrite existing files
    ensemble_file = f'{DETECTION_MAP_SAVE_DIR}{he_date_str}_ensemble_map.fits'
    if os.path.isfile(ensemble_file) and not overwrite:
        print((f'He {he_date_str} ensemble map already exists.'))
        continue
    
    # Extract pre-processed map
    pre_process_file = (PREPROCESS_MAP_SAVE_DIR + he_date_str
                        + '_pre_processed_map.fits')
    pre_processed_map = sunpy.map.Map(pre_process_file)

    # Extract saved processed magnetograms
    mag_date_str = prepare_data.get_nearest_date_str(
        MAG_DATE_LIST, selected_date_str=he_date_str
    )
    reprojected_fits_file = (f'{HELIOGRAPH_MAG_SAVE_DIR}'
                            f'Mag{mag_date_str}_He{he_date_str}.fits')
    reprojected_mag_map = sunpy.map.Map(reprojected_fits_file)

    ensemble_map_data = detect.get_ensemble_vY(
        pre_processed_map, reprojected_mag_map,
        percent_of_peak_list, morph_radius_list,
        unipolarity_threshold
    )[0]
    ensemble_map = sunpy.map.Map(
        np.flipud(ensemble_map_data), pre_processed_map.meta
    )
    
    ensemble_map.save(ensemble_file, overwrite=overwrite)
    print(f'{he_date_str} Ensemble Map Saved')

### Alternates

v0.5b: Evenly Assigned Unipolarity, No threshold

In [None]:
def get_unipol_ensemble(pre_processed_map, reprojected_mag_map,
                        percent_of_peak_list, morph_radius_list,
                        even_confidence=True):
    """Retrieve an ensemble of segmentations sorted by CH unipolarity.
    
    Args
        pre_processed_map: Sunpy map object to segment
        reprojected_mag_map: Sunpy map object of magnetogram reprojected
            to align with the ensemble map
        percent_of_peak_list: list of float percentage values
            at which to take threshold
        morph_radius_list: list of int pixel number for radius of disk 
            structuring element in morphological operations
        even_confidence: boolean to specify confidence assignment
            True to assign confidence by even ranking in (0,100]%
            False to assign confidence as 100% for unipolarity of 0
                and 0% for unipolarity of 1
    Returns
        Ensemble greyscale coronal holes mask sorted by unipolarity.
        List of coronal holes masks.
        List of confidence levels in mask layers.
    """
    pre_processed_map_data = np.flipud(pre_processed_map.data)
    
    # Create global segmentations for varied design variable combinations
    ch_masks = [
        detect.get_ch_mask(pre_processed_map_data, percent_of_peak, morph_radius)
        for percent_of_peak, morph_radius
        in zip(percent_of_peak_list, morph_radius_list)
    ]
    
    # List to be extended by masks for distinct CHs from all segmentations
    masks_by_ch = []
    
    ones_array = np.ones_like(pre_processed_map_data)
    
    for ch_mask in ch_masks:
        masks_by_ch.extend(
            detect.get_map_data_by_ch(ones_array, ch_mask)
        )
    
    num_ch = len(masks_by_ch)
    
    # Compute constant area per square pixel once for all CHs
    A_per_square_px = detect.get_A_per_square_px(pre_processed_map)
    
    # List to hold unipolarity for distinct CHs from all segmentations
    unipolarity_by_ch = []
    
    for ch_label in range(num_ch):
        distinct_ch_mask = masks_by_ch[ch_label]
        
        # Not flipping works right
        distinct_ch_map = sunpy.map.Map(
            distinct_ch_mask, pre_processed_map.meta
        )
        # ax = fig.add_subplot(num_rows, num_cols, ch_label + 1, projection=pre_processed_map)
        # distinct_ch_map.plot(cmap='magma')
        
        # fake_mag_map_data = np.where(~np.isnan(np.flipud(distinct_ch_mask)), 25,
        #                              reprojected_mag_map.data)
        # fake_mag_map = sunpy.map.Map(fake_mag_map_data, reprojected_mag_map.meta)
        # outcomes = get_outcomes(
        #     distinct_ch_map, fake_mag_map, A_per_square_px
        # )
        
        outcome_dict = detect.get_outcomes(
            distinct_ch_map, pre_processed_map_data, reprojected_mag_map,
            A_per_square_px
        )
        unipolarity_by_ch.append(outcome_dict['unipolarity'])
    
    # Sort unipolarities from greatest to least
    sorted_idxs = np.argsort(unipolarity_by_ch)
    
    # Sort candidate CHs from greatest to least gradient median
    masks_by_ch = [masks_by_ch[i] for i in sorted_idxs]
    unipolarity_by_ch = [unipolarity_by_ch[i] for i in sorted_idxs]
    
    # Assign confidence by direct ranking or by unipolarity value
    if even_confidence:
        confidence_list = [(c + 1)*100/num_ch
                           for c in range(num_ch)]
    else:
        confidence_list = [100 - unipolarity*100
                           for unipolarity in unipolarity_by_ch]

    # Construct ensemble map by adding distinct CHs with assigned
    # confidence level values to an empty base disk    
    ensemble_map_data = np.where(
        ~np.isnan(pre_processed_map_data), 0, np.nan
    )
    for distinct_ch, confidence in zip(masks_by_ch, confidence_list):
        ensemble_map_data = np.where(
            ~np.isnan(distinct_ch), confidence, ensemble_map_data
        )
    return ensemble_map_data, masks_by_ch, confidence_list, unipolarity_by_ch

In [None]:
percent_of_peak_list = [80,80, 90, 100,100]
morph_radius_list = [18,20, 16, 16,20]


pre_process_v0_4_he_map_data = detect.pre_process_v0_4(he_map_data)
pre_process_v0_4_he_map = sunpy.map.Map(
    np.flipud(pre_process_v0_4_he), he_map.meta
)

out = get_unipol_ensemble(
    pre_process_v0_4_he_map, reprojected_mag_map,
    percent_of_peak_list, morph_radius_list,
    even_confidence=True
)
ensemble_map_data, map_data_by_ch, confidence_list, unipolarity_by_ch = out

plot_detection.plot_ensemble(
    pre_process_v0_4_he_map_data, ensemble_map_data, map_data_by_ch,
    confidence_list, unipolarity_by_ch
)

v0.5c: EUV Ratio, v0.4

In [None]:
percent_of_peak_list = [80,80, 90, 100,100]
morph_radius_list = [18,20, 16, 16,20]


ratio_fits_file = f'{RATIO_SAVE_DIR}He{he_date_str}_EUV{euv_date_str}.fits'
raw_ratio = prepare_data.get_image_from_fits(ratio_fits_file)

pre_process_v0_4_ratio_map_data = detect.pre_process_v0_4(raw_ratio)

out = detect.get_ensemble_v0_3(
    pre_process_v0_4_ratio_map_data, percent_of_peak_list, morph_radius_list
)
ensemble_map, map_data_by_ch, confidence_list, gradient_medians = out
plot_detection.plot_ensemble(
    pre_process_v0_4_ratio_map_data, ensemble_map, map_data_by_ch,
    confidence_list, gradient_medians
)

v0.3b: Percentile Assigned Smoothness

In [None]:
def get_smooth_ensemble(pre_processed_map, percent_of_peak_list,
                        morph_radius_list, even_confidence=True):
    """Retrieve an ensemble of segmentations sorted by CH smoothness.
    
    Args
        array: image to process
        percent_of_peak_list: list of float percentage values
            at which to take threshold
        morph_radius_list: list of int pixel number for radius of disk 
            structuring element in morphological operations
        even_confidence: boolean to specify confidence assignment
            True to assign confidence by even ranking in (0,100]%
            False to assign confidence by percentile of gradient
                median among values from other candidate CHs in [0,100]%
    Returns
        Ensemble greyscale coronal holes mask sorted by mean gradient.
        List of binary coronal holes masks.
        List of confidence levels in mask layers.
    """
    # Create global segmentations for varied design variable combinations
    ch_masks = [
        detect.get_ch_mask(pre_processed_map, percent_of_peak, morph_radius)
        for percent_of_peak, morph_radius
        in zip(percent_of_peak_list, morph_radius_list)
    ]
    
    # Lists to hold pre processed map and gradient data respectively
    # for distinct CHs from all segmentations
    map_data_by_ch = []
    grad_data_by_ch = []
    
    for ch_mask in ch_masks:
        # Masked array of candidate CHs
        masked_candidates = detect.get_masked_candidates(pre_processed_map, ch_mask)
        
        # Compute spatial gradient
        gradient_candidates = filters.sobel(masked_candidates)
        
        map_data_by_ch.extend(
            detect.get_map_data_by_ch(pre_processed_map, ch_mask)
        )
        grad_data_by_ch.extend(
            detect.get_map_data_by_ch(gradient_candidates, ch_mask)
        )
    
     # Obtain sorting indixes from greatest to least gradient median
    gradient_medians = [np.median(grad_data[~np.isnan(grad_data)])
                        for grad_data in grad_data_by_ch]
    sorted_idxs = np.flip(np.argsort(gradient_medians))
    
    # Sort candidate CHs from greatest to least gradient median
    map_data_by_ch = [map_data_by_ch[i] for i in sorted_idxs]
    gradient_medians = [gradient_medians[i] for i in sorted_idxs]
    
    # Assign confidence by percentile or direct ranking
    num_ch = len(map_data_by_ch)
    if even_confidence:
        confidence_list = [(c + 1)*100/num_ch
                           for c in range(num_ch)]
    else:
        percent_conversion = 100 / (np.max(gradient_medians)
                                    - np.min(gradient_medians))
        confidence_list = [
            100 - (grad_median - np.min(gradient_medians)) *percent_conversion
            for grad_median in gradient_medians
        ]

    # Construct ensemble map by adding distinct CHs with assigned
    # confidence level values to an empty base disk
    ensemble_map = np.where(~np.isnan(pre_processed_map), 0, np.nan)
    
    for distinct_ch, confidence in zip(map_data_by_ch, confidence_list):
        ensemble_map = np.where(
            ~np.isnan(distinct_ch), confidence, ensemble_map
        )
    return ensemble_map, map_data_by_ch, confidence_list, gradient_medians

In [None]:
percent_of_peak_list = [80,80,90,100,100]
morph_radius_list = [13,17,15,13,17]


pre_process_v0_1_he = detect.pre_process_v0_1(he_map_data)[0]
out = get_smooth_ensemble(
    pre_process_v0_1_he, percent_of_peak_list, morph_radius_list,
    even_confidence=False
)
ensemble_map, map_data_by_ch, confidence_list, gradient_medians = out
plot_detection.plot_ensemble(
    pre_process_v0_1_he, ensemble_map, map_data_by_ch,
    confidence_list, gradient_medians
)

## ACWE Fuse

### Verification

Downsampling test

In [None]:
# Step size for down sampling from 2048x2048 to 512x512
ENSEMBLE_MAP_DOWNSAMPLE_STEP = 4

# stride_map_data = ensemble_map_data[
#     ::ENSEMBLE_MAP_DOWNSAMPLE_STEP, ::ENSEMBLE_MAP_DOWNSAMPLE_STEP
# ]
stride_map_data = np.where(np.isnan(ensemble_map_data), 0, ensemble_map_data)

stride_map = sunpy.map.Map(np.flipud(stride_map_data), ensemble_map.meta)
# resized_stride_map = sunpy.map.Map(np.flipud(stride_map_data), ensemble_map.meta)

Map alignment via differential rotation test

In [None]:
# STRIDE to ACWE (4096, 4096) takes 45 sec
rotated_stride_map = diff_rotate(ensemble_map, acwe_map)
plt.imshow(np.flipud(rotated_stride_map.data))

In [None]:
# ACWE to STRIDE (2048, 2048) takes 10 sec
rotated_acwe_map = diff_rotate(acwe_map, ensemble_map)
plt.imshow(np.flipud(rotated_acwe_map.data))

### Fuse

In [None]:
# Align ACWE map to STRIDE map datetime via differential rotation
rotated_acwe_map = diff_rotate(acwe_map, ensemble_map)
rotated_acwe_map_data = np.flipud(rotated_acwe_map.data)

# Fuse by taking pixel-wise maximum confidence
fused_map_data = np.max(
    np.stack((ensemble_map_data, rotated_acwe_map_data)), axis=0
)
fused_map = sunpy.map.Map(np.flipud(fused_map_data), ensemble_map.meta)

In [None]:
fig = plt.figure(figsize=(4,3), dpi=400)
ax = fig.add_subplot(1, 1, 1)
im = ax.imshow(fused_map_data, cmap='copper')
# ax.set_title(f' He I: {he_date_str} \nEUV: {acwe_date_str}')
ax.set_title(f' He I: {he_date_str}')

ax.tick_params(left=False, right=False, labelleft=False,
               labelbottom=False, bottom=False)

cb = fig.colorbar(im)
cb.set_label('Confidence', labelpad=11, rotation=-90)
cb.ax.tick_params(labelsize=8)

# Single Date Outcomes

Requires single map extraction from single map data section

## Calc Verification

### Properties Per CH

#### Calculate on CHs in Confidence Range

In [None]:
confidence_range = [0,1]
# confidence_range = [0.5,0.9]
thresh_ensemble_map_data = np.where(
    np.all([ensemble_map.data >= confidence_range[0],
            ensemble_map.data <= confidence_range[1]], axis=0),
    ensemble_map.data, 0
)

# Remove leftover edges
large_obj_mask = morphology.remove_small_objects(
    np.where(thresh_ensemble_map_data > 0, True, False), min_size=5000
)
thresh_ensemble_map_data[~large_obj_mask] = 0

# Remove off-disk for plottting, though not necessary for outcomes
thresh_ensemble_map_data[np.isnan(ensemble_map.data)] = np.nan

thresh_ensemble_map = sunpy.map.Map(
    thresh_ensemble_map_data, ensemble_map.meta
)
fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(111, projection=thresh_ensemble_map)
thresh_ensemble_map.plot(axes=ax, title='', cmap='magma',
                         vmin=confidence_range[0], vmax=confidence_range[1])

In [None]:
confidence_level = 0

# Compute outcomes by CH and sort from greatest to least
outcome_by_ch_dict = detect.get_outcomes_by_ch(
    thresh_ensemble_map, he_map_data, reprojected_mag_map, confidence_level
)
sorted_idxs = np.flip(np.argsort(outcome_by_ch_dict['unipolarity']))

sorted_outcome_by_ch_dict = {}
for key, outcome_by_ch in zip(outcome_by_ch_dict, outcome_by_ch_dict.values()):
    sorted_outcome_by_ch_dict[key] = [outcome_by_ch[i] for i in sorted_idxs]

    
if confidence_level <= 0:
    confidence_level = 1e-3

# Mask of detected CHs at the given confidence level
confidence_ch_mask = np.where(
    thresh_ensemble_map.data >= confidence_level, 1, 0
)

# List of ensemble map data for distinct CHs
ensemble_map_data_by_ch = detect.get_map_data_by_ch(
    thresh_ensemble_map.data, confidence_ch_mask
)
ensemble_map_data_by_ch = [ensemble_map_data_by_ch[i] for i in sorted_idxs]


[print(f'{area:.1e} Mm^2', end='\t')
 for area in sorted_outcome_by_ch_dict['area']]
print()
[print(f'Lat: {cm_lat:.1f} deg', end='\t')
 for cm_lat in sorted_outcome_by_ch_dict['cm_lat']]
print()
[print(f'Lon: {cm_lon:.1f} deg', end='\t')
 for cm_lon in sorted_outcome_by_ch_dict['cm_lon']]
print()
[print(f'{signed_flux:.4e} Mx', end='\t')
 for signed_flux in sorted_outcome_by_ch_dict['signed_flux']]
print()
[print(f'Skew: {mag_skew:.4f}', end='\t')
 for mag_skew in sorted_outcome_by_ch_dict['mag_skew']]
print()
[print(f'U: {unipolarity:.4f}', end='\t')
 for unipolarity in sorted_outcome_by_ch_dict['unipolarity']]
print()
[print(f'Grad: {grad_median:.4f}', end='\t')
 for grad_median in sorted_outcome_by_ch_dict['grad_median']]
print()

In [None]:
# ch_idx = np.argmin(sorted_outcome_by_ch_dict['unipolarity'])
# ch_idx = np.argmax(sorted_outcome_by_ch_dict['area'])
# ch_idx = np.argmax(sorted_outcome_by_ch_dict['grad_median'])
# ch_idx = np.argmax(np.abs(sorted_outcome_by_ch_dict['mag_skew']))
# ch_idx = np.argmin(np.abs(sorted_outcome_by_ch_dict['cm_lon']))
ch_idx = 2

# signed_flux = sorted_outcome_by_ch_dict['signed_flux'][ch_idx]
# mag_skew = sorted_outcome_by_ch_dict['mag_skew'][ch_idx]
unipolarity = sorted_outcome_by_ch_dict['unipolarity'][ch_idx]
area = sorted_outcome_by_ch_dict['area'][ch_idx]
cm_lon = sorted_outcome_by_ch_dict['cm_lon'][ch_idx]
cm_lat = sorted_outcome_by_ch_dict['cm_lat'][ch_idx]
# grad_median = sorted_outcome_by_ch_dict['grad_median'][ch_idx]

# Calculate foreshortening factor. 0: 90deg inc angle, 1: 0deg inc angle
B0 = ensemble_map.observer_coordinate.lat.value
foreshort_factor = np.cos(np.deg2rad(cm_lon))*np.cos(np.deg2rad(cm_lat - B0))

# title = f'{area:.2e} Mm^2 | {signed_flux:.2e} Mx | {mag_skew:.2f} Skew'
# title = f'{signed_flux:.2e} Mx | {unipolarity:.2f} Unipolarity | {mag_skew:.2f} Skew'
title = rf'{unipolarity:.2f} Unipolarity | {area:.1e} $Mm^2$ | {foreshort_factor:.2f} Foreshort Factor'
# title = f'{unipolarity:.2f} Unipolarity | {grad_median:.2f} Gradient Median'

A_per_square_px = detect.get_A_per_square_px(ensemble_map)        


selected_ch_map_data = ensemble_map_data_by_ch[ch_idx]
selected_ch_map_data = np.where(np.isnan(selected_ch_map_data), -100,
                                selected_ch_map_data)
selected_ch_map = sunpy.map.Map(selected_ch_map_data, he_map.meta)

fig = plt.figure(figsize=(6, 6))
fig.suptitle(he_date_str)

ax = fig.add_subplot(projection=ensemble_map)
ensemble_map.plot(axes=ax, title=title)
ensemble_map.draw_grid(axes=ax)
for contour in selected_ch_map.contour(0):
    ax.plot_coord(contour, color='g')

In [None]:
fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(111, projection=reprojected_mag_map)
reprojected_mag_map.plot(axes=ax, vmin=-50, vmax=50)

Rank each CH

In [None]:
def get_ranked_mag_map(ensemble_map, confidence_level, sorted_idxs):
    """Retrieve a map of CHs from a single segmentation as ranked by a
    histogram statistic.
    
    Args
        ensemble_map_data: map data which was segmented
        ch_mask: binary coronal holes mask
    Returns
        
    """    
    # Mask of detected CHs at the given confidence level
    confidence_ch_mask = np.where(
        ensemble_map.data >= confidence_level, ensemble_map.data, 0
    )

    # List of ensemble map data for distinct CHs
    ensemble_map_data_by_ch = detect.get_map_data_by_ch(
        ensemble_map.data, confidence_ch_mask
    )
    num_ch = len(ensemble_map_data_by_ch)
    ensemble_map_data_by_ch = [ensemble_map_data_by_ch[i]
                               for i in np.flip(sorted_idxs)]
    
    ranked_map_data = np.where(
        ~np.isnan(ensemble_map.data), 0, np.nan
    )
    for map_data, ch_num in zip(ensemble_map_data_by_ch, range(num_ch)):
        ranked_map_data = np.where(
            ~np.isnan(map_data), (ch_num + 1)*100/num_ch, ranked_map_data
        )
    
    ranked_map = sunpy.map.Map(ranked_map_data, he_map.meta)
    return ranked_map

In [None]:
num_ch = len(ensemble_map_data_by_ch)

# image_list = [pre_processed_map_data for _ in range(num_ch)]

mag_data = np.flipud(reprojected_mag_map.data)
smooth_size = 0.05 *mag_data.shape[0]
smoothed_mag_data = np.where(np.isnan(mag_data), 0, mag_data)
smoothed_mag_data = ndimage.uniform_filter(
    smoothed_mag_data, smooth_size
)
mag_data = np.clip(
    np.where(~np.isnan(mag_data), smoothed_mag_data, np.nan), -2, 2
)
image_list = [mag_data for _ in range(num_ch)]

axes = plot_detection.plot_image_grid(image_list, num_cols=3, cmap='gray')

for ax, i, ch_data in zip(axes.values(), range(num_ch), ensemble_map_data_by_ch):
    mask = np.where(np.isnan(ch_data), 0, 1)
    
    ax.set_title((f'{sorted_outcome_by_ch_dict["unipolarity"][i]:.2f} Unipolarity | '
                  f'{sorted_outcome_by_ch_dict["mag_skew"][i]:.2f} Skew'))
    ax.contour(np.flipud(mask), cmap=plt.cm.plasma)

Save maps thresholded above a confidence level and then sorted by that outcome

In [None]:
# NEUTRAL LINE COMPARISON NEEDS UPDATE
overwrite = False
confidence_level = 0

out_dir = DETECTION_IMAGE_DIR + 'Unipolarity_Rank/'

if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

for he_date_str in HE_DATE_LIST:
    
    # Optionally overwrite existing files
    comparison_img_file = f'{out_dir}He{he_date_str}.jpg'
    if os.path.isfile(comparison_img_file) and not overwrite:
        print((f'EUV {euv_date_str} comparison already exists.'))
        continue
    
    # Extract He I observation
    he_map = prepare_data.get_nso_sunpy_map(HE_DIR + he_date_str + '.fts')
    if not he_map:
        print(f'{he_date_str} He I observation extraction failed.')
        continue
    
    # Extract saved ensemble map array and convert to Sunpy map
    ensemble_file = f'{DETECTION_NPY_SAVE_DIR}{he_date_str}_ensemble_map.npy'
    ensemble_map_data = np.load(ensemble_file, allow_pickle=True)[-1]
    ensemble_map = sunpy.map.Map(np.flipud(ensemble_map_data), he_map.meta)
    ensemble_map.plot_settings['cmap'] = colormaps['magma']

    # Extract saved processed magnetograms
    mag_date_str = prepare_data.get_nearest_date_str(
        MAG_DATE_LIST, selected_date_str=he_date_str
    )
    mag_fits_name = f'{ROTATED_MAG_SAVE_DIR}Mag{mag_date_str}_He{he_date_str}'
    reprojected_fits_file = f'{mag_fits_name}.fits'
    reprojected_mag_map = sunpy.map.Map(reprojected_fits_file)

    # Compute outcomes by CH and sort from greatest to least
    # TODO: Update get_outcomes_by_ch call to dict format
    outcomes_by_ch = detect.get_outcomes_by_ch(
        ensemble_map, pre_processed_map, reprojected_mag_map, confidence_level
    )
    unipolarity_by_ch = outcomes_by_ch[4]
    sorted_idxs = np.argsort(unipolarity_by_ch)

    # Obtain ranked map
    ranked_map = get_ranked_mag_map(
        ensemble_map, confidence_level, sorted_idxs
    )
    ranked_map.plot_settings['cmap'] = colormaps['magma']

    euv_date_str = prepare_data.get_nearest_date_str(
        EUV_DATE_LIST, selected_date_str=he_date_str
    )

    fig = plt.figure(figsize=(18, 5))
    plot_detection.plot_he_neutral_lines_euv_comparison(
        fig, he_date_str, mag_date_str, euv_date_str, ranked_map
    )
    
    # Save plot
    plt.savefig(comparison_img_file)
    plt.close(fig)
    print(f'{euv_date_str} map comparison saved.')

Skewness Histogram in Single CH

In [None]:
def plot_sym_log_hist(data_list, num_bins):
    # Symmetric log bins
    neg_outcomes = np.where(data_list < 0, np.abs(data_list), 0)
    bin_min = np.ceil(np.log10(np.max(neg_outcomes)))
    bin_max = np.ceil(np.log10(np.max(np.abs(data_list))))
    bins = np.hstack((-np.logspace(bin_min, 0, num_bins//2),
                    np.logspace(0, bin_max, num_bins//2)))

    fig = plt.figure(figsize=(10,6))
    ax = plt.subplot()
    ax.set_xscale('symlog')
    ax.hist(data_list, bins)
    
    return fig, ax

In [None]:
data_list = sorted_outcome_by_ch_dict['pixel_signed_fluxes'][ch_idx]
fig, ax = plot_sym_log_hist(data_list, num_bins=500)
fig.suptitle(f'CH Index: {ch_idx}')
ax.set_title('Pixel Magnetic Flux Histogram')
ax.set_xlabel('Magnetic Flux (Wb)')
ax.set_ylim([0,3500])
f'Summed: {np.sum(data_list):.3e} | Pre-Computed {sorted_outcome_by_ch_dict["signed_flux"][ch_idx]:.3e}'

Line of Sight B

In [None]:
confidence_level = 0

if confidence_level <= 0:
    confidence_level = 1e-3
    
percent_unipolar_by_ch = []
signed_B_by_ch = []
unsigned_B_by_ch = []
unipolarity_by_ch = []

# Thresholded array at the given confidence level
confidence_map_data = np.where(
    ensemble_map.data >= confidence_level, ensemble_map.data, 0
)

# Array with number labels per distinct CH and number of labels
labeled_map_data, num_labels = ndimage.label(confidence_map_data)
num_ch = num_labels - 1

# List of magnetic field data images for distinct CHs.
# Equivalent to obtain Helioprojective coordinates of pixels per CH
# and then obtaining magnetic data at HP coordinates
mag_map_data_by_ch = detect.get_map_data_by_ch(
    reprojected_mag_map.data, confidence_map_data
)

# # Obtain masked magnetic data per CH > Compute on magnetic data per CH
# mask_idxs_by_ch = [np.where(labeled_map_data == label)
#                    for label in range(1, num_labels)]
# mag_data_by_ch = [reprojected_mag_map.data[mask_idxs]
#                   for mask_idxs in mask_idxs_by_ch]

# List of masks for distinct CHs
masks_by_ch = [np.where(labeled_map_data == label, 1, 0)
               for label in range(1, num_labels)]

for ch_label in range(num_ch):
    mag_map_data = mag_map_data_by_ch[ch_label]
    mag_data = mag_map_data[~np.isnan(mag_map_data)]
    
    # Pixel percent unipolarity
    num_positive = np.count_nonzero(mag_data > 0)
    num_negative = np.count_nonzero(mag_data < 0)
    num_px = np.count_nonzero(mag_data)
    percent_unipolarity = max(num_positive, num_negative)*100/num_px
    percent_unipolar_by_ch.append(percent_unipolarity)
    
    # Signed average magnetic field
    signed_B = np.abs(np.mean(mag_data))
    signed_B_by_ch.append(signed_B)

    # Unsigned average magnetic field
    unsigned_B = np.mean(np.abs(mag_data))
    unsigned_B_by_ch.append(unsigned_B)
    
    # Unipolarity
    unipolarity = (unsigned_B - signed_B)/unsigned_B
    unipolarity_by_ch.append(unipolarity)


# Sort outcomes by CH from greatest to least
sorted_idxs = np.flip(np.argsort(percent_unipolar_by_ch))
percent_unipolar_by_ch.sort(reverse=True)

# Sort candidate CHs from greatest to least outcome median
masks_by_ch = [masks_by_ch[i] for i in sorted_idxs]
signed_B_by_ch = [signed_B_by_ch[i] for i in sorted_idxs]
unsigned_B_by_ch = [unsigned_B_by_ch[i] for i in sorted_idxs]
unipolarity_by_ch = [unipolarity_by_ch[i] for i in sorted_idxs]

In [None]:
image_list = [pre_processed_map for _ in range(num_ch)]
axes = plot_detection.plot_image_grid(image_list, num_cols=3, cmap='gray')

for ax, i, mask in zip(axes.values(), range(num_ch), masks_by_ch):
    ax.set_title((f'{percent_unipolar_by_ch[i]:.1f}% Unipolar '
                  + f'| {unipolarity_by_ch[i]:.2f} Unipolarity'))
    ax.contour(np.flipud(mask), cmap=plt.cm.plasma)

#### Test Magnetic

Verify magnetic properties by setting a uniform value to detected regions to verify perfect unipolarity observed

In [None]:
fake_mag_map_data = np.where(ensemble_map.data > 1, 25, reprojected_mag_map.data)
fake_mag_map = sunpy.map.Map(fake_mag_map_data, reprojected_mag_map.meta)
fake_mag_map.plot(vmin=-50, vmax=50)

In [None]:
confidence_level = 0

outcome_by_ch_dict = detect.get_outcomes_by_ch(
    ensemble_map, he_map_data, fake_mag_map, confidence_level
)

# Mask of detected CHs at the given confidence level
confidence_ch_mask = np.where(
    ensemble_map.data >= confidence_level, ensemble_map.data, 0
)

# List of ensemble map data for distinct CHs
ensemble_map_data_by_ch = detect.get_map_data_by_ch(
    ensemble_map.data, confidence_ch_mask
)

[print(f'{unipolarity:.4f}', end='\t')
 for unipolarity in outcome_by_ch_dict['unipolarity']]
print()

Verify vs Global Properties

In [None]:
SOLAR_AREA = 4*np.pi*(1*u.solRad).to(u.Mm)**2

summed_area = np.sum(outcome_by_ch_dict['area'])*u.Mm**2 /SOLAR_AREA*100
global_area = detect.get_open_area(ensemble_map, confidence_level=0)[0]

f'Summed: {summed_area:.6f} % | Global: {global_area:.6f} %'

In [None]:
summed_flux = np.sum(outcome_by_ch_dict['unsigned_flux'])
global_flux = detect.get_unsigned_open_flux(
    ensemble_map, reprojected_mag_map, confidence_level=0
)
f'{summed_flux:.6e} Wb | Global: {global_flux:.6e} Wb'

#### He I Smoothness

In [None]:
def get_ch_band_widths(map_data_by_ch):
    """Retrieve a list of 5th to 95th percentile band widths for each
    detected CH.
    
    Args
        map_data_by_ch: list of isolated CH images from a segmentation
    """
    percentiles = [5, 95]
    bound_list = [np.percentile(map_data[~np.isnan(map_data)], percentiles)
                  for map_data in map_data_by_ch]
    
    hole_band_widths = [bounds[1] - bounds[0]
                        for bounds in bound_list]
    return hole_band_widths


def get_ch_lower_tail_widths(map_data_by_ch):
    """Retrieve a list of lower tail widths for each detected CH.
    
    Args
        map_data_by_ch: list of isolated CH images from a segmentation
    """
    filt_map_data_by_ch = [map_data[~np.isnan(map_data)]
                         for map_data in map_data_by_ch]
    
    # List of the 1st percentile brightness value of each CH
    first_percentile_list = [np.percentile(map_data, 1)
                             for map_data in filt_map_data_by_ch]
        
    # List of peak count of each CH
    peak_counts_value_list = [
        detect.get_peak_counts_loc(map_data, bins_as_percent=False)
        for map_data in filt_map_data_by_ch
    ]

    # List of lower tail widths of each CH
    ch_lower_tail_width_list = [
        peak_count - first_percentile
        for peak_count, first_percentile 
        in zip(peak_counts_value_list, first_percentile_list)]
    
    return ch_lower_tail_width_list


def plot_sorted_ch_hists(array, ch_mask, apply_gradient, hist_stat,
                         descend_sort=False):
    """Plot segmented CH histograms sorted by histogram statistics.
    
    Args
        array: image to process
        ch_mask: binary coronal holes mask
        apply_gradient: boolean to specify taking spatial gradient of image
        hist_stat: str to specify histogram sorting statistic
            'median', 'width', 'tail_width'
        descend_sort: boolean to specify sorting CHs from greatest to least
            statistic
    """
    # Masked array of candidate CHs
    masked_candidates = detect.get_masked_candidates(array, ch_mask)
    if apply_gradient:
        masked_candidates = filters.sobel(masked_candidates)
    
    # Isolated images of detected CHs
    map_data_by_ch = detect.get_map_data_by_ch(
        masked_candidates, ch_mask
    )
    num_ch = len(map_data_by_ch)
    
    # Compute statistics for each CH
    medians = [np.nanmedian(map_data) for map_data in map_data_by_ch]
    ch_band_widths = get_ch_band_widths(map_data_by_ch)
    
    # Histogram x limit bounds
    hist_xlim_min = np.mean(medians)
    if not apply_gradient:
        hist_xlim_min = hist_xlim_min - 2*np.max(ch_band_widths)
    hist_xlim_max = np.mean(medians) + 2*np.max(ch_band_widths)
    
    # Obtain indices of candidates sorted by specifed mode
    if hist_stat == 'median':
        sorted_candidate_idxs = np.argsort(medians)
        titles = [f'Median: {median:.2f}'
                  for median in medians]
    elif hist_stat == 'width':
        sorted_candidate_idxs = np.argsort(ch_band_widths)
        titles = [f'90% Band Width: {ch_band_width:.1f}'
                  for ch_band_width in ch_band_widths]
    elif hist_stat == 'tail_width':
        ch_lower_tail_width_list = get_ch_lower_tail_widths(
            map_data_by_ch
        )
        sorted_candidate_idxs = np.argsort(ch_lower_tail_width_list)
        titles = [f'1% to Peak Width: {ch_lower_tail_width:.1f}'
                  for ch_lower_tail_width in ch_lower_tail_width_list]
    
    if descend_sort:
        sorted_candidate_idxs = np.flip(sorted_candidate_idxs)

    for r in range(int(np.ceil(num_ch/2))):
        fig, axes = plt.subplots(nrows=1, ncols=6, figsize=(60, 10))
        ax = axes.ravel()
        
        for c in range(2):
            i = 2*r + c
            ax_i = 3*c
            if i + 1 > num_ch:
                return
            
            # Retrieve isolated CH image and contour
            ch_num = sorted_candidate_idxs[i]
            ch_im = map_data_by_ch[ch_num]
            ch_contour = np.where(~np.isnan(ch_im), 1, 0)

            # Zoom in on an isolated CH
            y, x = np.where(~np.isnan(ch_im))
            ch_zoom = ch_im[np.min(y) - 10:np.max(y) + 10,
                             np.min(x) - 10:np.max(x) + 10]
                
            hist, edges = detect.get_hist(
                ch_zoom[~np.isnan(ch_zoom)], bins_as_percent=False, n=200
            )
            
            ax[ax_i].set_title(f'Hole {ch_num + 1}', fontsize=32)
            ax[ax_i].imshow(array, cmap='gray', vmin=-100, vmax=100)
            ax[ax_i].contour(ch_contour, cmap='plasma')
            
            if apply_gradient:
                cmap = plt.cm.viridis
            else:
                cmap = plt.cm.magma

            ax[ax_i + 1].imshow(ch_zoom, cmap)

            ax[ax_i + 2].set_title(titles[ch_num], fontsize=32)
            ax[ax_i + 2].bar(edges[0:-1], hist)
            ax[ax_i + 2].set_xlim([hist_xlim_min, hist_xlim_max])

v0.3

In [None]:
# Requires single mask ensemble map
ch_mask = np.where(ensemble_map_data > 0, 1, 0)
plot_sorted_ch_hists(
    he_map_data, ch_mask, apply_gradient=True,
    hist_stat='median'
)

#### Alternate Statistics

In [None]:
# Plot all CHs and histograms for a single date.
# Display ranked maps for all dates.
def get_ranked_map(array, ch_mask, apply_gradient, hist_stat,
                   ascend_sort=True):
    """Retrieve a map of CHs from a single segmentation as ranked by a
    histogram statistic.
    
    Args
        array: image to process
        ch_mask: binary coronal holes mask
        apply_gradient: boolean to specify taking spatial gradient of image
        hist_stat: str to specify histogram sorting statistic
            'median', 'width', 'tail_width'
        ascend_sort: boolean to specify sorting CHs from least to greatest
            statistic
    Returns
        List of isolated CH images from a segmentation.
    """
    # Masked array of candidate CHs
    masked_candidates = detect.get_masked_candidates(array, ch_mask)
    if apply_gradient:
        masked_candidates = filters.sobel(masked_candidates)
    
    # Isolated images of detected CHs
    map_data_by_ch = detect.get_map_data_by_ch(
        masked_candidates, ch_mask
    )
    num_ch = len(map_data_by_ch)
    
    # Rank candidates by histogram statistic
    if hist_stat == 'median':
        medians = [np.nanmedian(map_data) for map_data in map_data_by_ch]
        sorted_candidate_idxs = np.argsort(medians)
    elif hist_stat == 'width':
        ch_band_widths = get_ch_band_widths(map_data_by_ch)
        sorted_candidate_idxs = np.argsort(ch_band_widths)
    elif hist_stat == 'tail_width':
        ch_lower_tail_width_list = get_ch_lower_tail_widths(
            map_data_by_ch
        )
        sorted_candidate_idxs = np.argsort(ch_lower_tail_width_list)
    
    if ascend_sort:
        sorted_candidate_idxs = np.flip(sorted_candidate_idxs)
    
    map_data_by_ch = np.array(map_data_by_ch)
    ranked_ch_ims = map_data_by_ch[sorted_candidate_idxs]
    
    ranked_map = np.where(
        ~np.isnan(array), 0, np.nan
    )
    for isolated_ch_im, ch_num in zip(ranked_ch_ims, range(num_ch)):
        ranked_map = np.where(
            ~np.isnan(isolated_ch_im), (ch_num + 1)*100/num_ch, ranked_map
        )
    return ranked_map


def plot_ensemble_comparison(he_map, ensemble_map, euv_map):
    fig = plt.figure(figsize=(18, 5))
    
    # Plot He observation
    ax = fig.add_subplot(1, 3, 1, projection=he_map)
    he_map.plot(axes=ax, vmin=-100, vmax=100)
    
    # Plot ensemble map with overlayed neutral lines
    ax = fig.add_subplot(1, 3, 2, projection=he_map)
    ensemble_map.plot(axes=ax, title='', cmap='magma')
    
    # Plot EUV observation
    ax = fig.add_subplot(1, 3, 3, projection=euv_map)
    euv_map.plot(axes=ax)

Brightness Width

In [None]:
percent_of_peak = 90
morph_radius = 15

pre_process_v0_1_he = detect.pre_process_v0_1(he_map_data)[0]
ch_mask = detect.get_ch_mask(pre_process_v0_1_he, percent_of_peak, morph_radius)

plot_sorted_ch_hists(pre_process_v0_1_he, ch_mask,
                     apply_gradient=False, hist_stat='width')

Brightness Tail Width

In [None]:
percent_of_peak = 90
morph_radius = 15

pre_process_v0_1_he = detect.pre_process_v0_1(he_map_data)[0]
ch_mask = detect.get_ch_mask(pre_process_v0_1_he, percent_of_peak, morph_radius)

plot_sorted_ch_hists(pre_process_v0_1_he, ch_mask,
                     apply_gradient=False, hist_stat='tail_width')

Gradient Median

In [None]:
percent_of_peak = 90
morph_radius = 15

pre_process_v0_1_he = detect.pre_process_v0_1(he_map_data)[0]
ch_mask = detect.get_ch_mask(pre_process_v0_1_he, percent_of_peak, morph_radius)

plot_sorted_ch_hists(pre_process_v0_1_he, ch_mask,
                     apply_gradient=True, hist_stat='median')

Gradient Width

In [None]:
percent_of_peak = 90
morph_radius = 15

pre_process_v0_1_he = detect.pre_process_v0_1(he_map_data)[0]
ch_mask = detect.get_ch_mask(pre_process_v0_1_he, percent_of_peak, morph_radius)

plot_sorted_ch_hists(pre_process_v0_1_he, ch_mask,
                     apply_gradient=True, hist_stat='width')

Statistic Comparison

In [None]:
# Compare ranking by different smoothness statistics for a single date
percent_of_peak = 90
morph_radius = 15
apply_gradient_list = [False, False, True, True]
hist_stat_list = ['width', 'tail_width', 'median', 'width']


pre_process_v0_1_he = detect.pre_process_v0_1(he_map_data)[0]

ch_mask = detect.get_ch_mask(pre_process_v0_1_he, percent_of_peak, morph_radius)
    
for apply_gradient, hist_stat in zip(apply_gradient_list, hist_stat_list):
    ranked_map_data = get_ranked_map(
        pre_process_v0_1_he, ch_mask, apply_gradient, hist_stat
    )
    
    ranked_map = sunpy.map.Map(np.flipud(ranked_map_data), he_map.meta)
    plot_ensemble_comparison(he_map, ranked_map, euv_map)

### Coordinates: Missing Helioprojective Keywords

In [None]:
# Test without modifying when reading FITS file
with fits.open(he_fits_file) as hdu_list:
    header = hdu_list[-1].header
    num_data_arrays = header.get('NAXIS3')
    
    if not num_data_arrays:
        data = hdu_list[-1].data
    else:
        data = hdu_list[-1].data[0]

he_map = sunpy.map.Map(data, header)

Backup Header Heliocentric delt/pix To Helioprojective delt/pix

In [None]:
# Cartesian distance change per pixel
hc_delta_coords = frames.Heliocentric(
    header['CDELT1A']*u.Unit(header['CUNIT1A']),
    header['CDELT2A']*u.Unit(header['CUNIT2A']),
    z=0*u.m, observer='earth', obstime=header['DATE-OBS']
)

In [None]:
# Sunpy coordinate transform
hp_delta_coords = hc_delta_coords.transform_to(
    frames.Helioprojective(
        header['CRVAL1']*u.arcsec, header['CRVAL2']*u.arcsec,
        observer='earth', obstime=header['DATE-OBS']
    )
)
print(f'HP Scale: {hp_delta_coords.Tx.value:.11f}, {hp_delta_coords.Ty.value:.11f} arcsec/pix')

In [None]:
# Thompson 2005 method section 4.1
earth_hp_coords = frames.Helioprojective(
    header['CRVAL1']*u.arcsec, header['CRVAL2']*u.arcsec,
    observer='earth', obstime=header['DATE-OBS'],
)
earth_header = sunpy.map.make_fitswcs_header(data, earth_hp_coords)

# Sun-Earth distance
Ds = earth_header['dsun_obs']*u.m

hp_delta_Tx = (hc_delta_coords.x/Ds).to(u.dimensionless_unscaled)
hp_delta_Tx = (hp_delta_Tx*u.rad).to(u.arcsec)
hp_delta_Ty = (hc_delta_coords.y/Ds).to(u.dimensionless_unscaled)
hp_delta_Ty = (hp_delta_Ty*u.rad).to(u.arcsec)

print(f'HP Scale: {hp_delta_Tx.value:.11f}, {hp_delta_Ty.value:.11f} arcsec/pix')

### Foreshortening Factor

Testing with ~June maps nearly removes B0 influence

In [None]:
fig = plt.figure(figsize=(4, 4))
ax = plot_detection.plot_he_map(fig, (1, 1, 1), he_map, he_date_str)

he_map.draw_limb(axes=ax, color="k")
he_map.draw_grid(axes=ax, color="k")

B0 = he_map.observer_coordinate.lat
# lon = np.linspace(0,90,5) * u.deg
lon = [0,19,42,90] * u.deg
# lon = [-0.13] * u.deg
# lon = np.arccos(np.linspace(1,0,5)) * u.rad
lat = [0 for _ in range(len(lon))] * u.deg

coords = SkyCoord(
    lon, lat, frame=frames.HeliographicStonyhurst, obstime=he_map.date
)
ax.plot_coord(coords, 'o')

In [None]:
pixel_lons = lon.to(u.rad).value
pixel_lats = lat.to(u.rad).value - B0.to(u.rad).value
foreshort_factors = np.cos(pixel_lons)*np.cos(pixel_lats)
foreshort_factors

### Area

Ensemble Map Masking

In [None]:
fig = plt.figure(figsize=(12, 5))
ax = fig.add_subplot(121, projection=ensemble_map)
ensemble_map.plot(axes=ax)
ensemble_map.draw_grid(axes=ax)
for contour in ensemble_map.contour(35):
    ax.plot_coord(contour, color='white')
    
ax = fig.add_subplot(122, projection=ensemble_map)
ensemble_map.plot(axes=ax)
ensemble_map.draw_grid(axes=ax)
for contour in ensemble_map.contour(65):
    ax.plot_coord(contour, color='white')

In [None]:
# Mask out detections below a CL threshold
fig = plt.figure(figsize=(12, 5))

ensemble_map.mask = (ensemble_map.data > 50)
ax = fig.add_subplot(121)
ax.imshow(np.flipud(ensemble_map.mask), cmap='magma')

ensemble_map.mask = (ensemble_map.data < 50)
ax = fig.add_subplot(122, projection=ensemble_map)
ensemble_map.plot(axes=ax)

ensemble_map.mask = None

Threshold Map by Confidence

In [None]:
confidence_level = 20


confidence_map = np.where(ensemble_map_data >= confidence_level, ensemble_map_data, 0)
labeled_map, num_ch = ndimage.label(confidence_map)

confidence_map = np.where(np.isnan(ensemble_map_data), np.nan, confidence_map)
labeled_map = np.where(np.isnan(ensemble_map_data), np.nan, labeled_map)

fig, ax = plt.subplots(nrows=1, ncols=3, figsize=(30, 10))

ax[0].set_title(he_date_str)
ax[0].imshow(he, cmap=plt.cm.gray)

ax[1].imshow(ensemble_map_data, cmap=plt.cm.magma)
ax[2].imshow(labeled_map, cmap=plt.cm.bone)
print(num_ch)

#### Verification

In [None]:
fake_ensemble_data = np.where(~np.isnan(np.flipud(ensemble_map.data)), 1, np.nan)
ensemble_map = sunpy.map.Map(fake_ensemble_data, pre_processed_map.meta)

fig = plt.figure(figsize=(7, 4))
ax = fig.add_subplot(111, projection=ensemble_map)
ensemble_map.plot(axes=ax, title='')

Open Area: Compare official function with decomposed to investigate failed retrievals

In [None]:
SOLAR_AREA = 4*np.pi *(1*u.solRad).to(u.Mm)**2

confidence_level = 0

if confidence_level <= 0:
    confidence_level = 1e-3

# ----------------------------------------------------------------------------
if ensemble_map.coordinate_frame.name == 'helioprojective':
    hp_delta_coord = frames.Helioprojective(
        ensemble_map.scale.axis1*u.pix,
        ensemble_map.scale.axis2*u.pix,
        observer='earth', obstime=ensemble_map.date
    )
    hc_delta_coord = hp_delta_coord.transform_to(
        frames.Heliocentric(observer='earth', obstime=ensemble_map.date)
    )
    A_per_square_px = np.abs(
        hc_delta_coord.x.to(u.Mm)*hc_delta_coord.y.to(u.Mm)
    )
elif ensemble_map.coordinate_frame.name == 'heliographic_stonyhurst':

    x_scale, y_scale = detect.get_hg_map_dist_scales(pre_processed_map)
    A_per_square_px = x_scale*y_scale
else:
    raise Exception(('Coordinate frame not recognized for obtaining '
                     'area per square pixel.'))

# ----------------------------------------------------------------------------
# Detected pixels at a confidence level
# Flip upside down to align Sunpy coordinates and Numpy indices
detected_px_coords = np.where(
    np.flipud(ensemble_map.data) >= confidence_level
)

if ensemble_map.coordinate_frame.name == 'helioprojective':
    
    # Convert detected pixels to Helioprojective Tx, Ty
    detected_hp_coords = ensemble_map.pixel_to_world(
        detected_px_coords[1]*u.pix, detected_px_coords[0]*u.pix
    )

    # Convert detected Helioprojective Tx, Ty to Heliographic lon, lat
    raw_detected_hg_coords = detected_hp_coords.transform_to(
        frames.HeliographicStonyhurst(obstime=ensemble_map.date)
    )

    # Remove pixels with failed conversion and longitudes outside (-90,90)
    failed_coord_idxs = np.where(
        np.isnan(raw_detected_hg_coords.lon) 
        | (np.abs(raw_detected_hg_coords.lon.to(u.deg).value) >= 90)
    )
    detected_hg_coords = np.delete(
        raw_detected_hg_coords, failed_coord_idxs
    )
    
elif ensemble_map.coordinate_frame.name == 'heliographic_stonyhurst':
    
    # Convert detected pixels to Heliographic lon, lat
    detected_hg_coords = ensemble_map.pixel_to_world(
        detected_px_coords[1]*u.pix, detected_px_coords[0]*u.pix
    )
    failed_coord_idxs = np.array([], dtype=np.int8)

# ----------------------------------------------------------------------------
if ensemble_map.coordinate_frame.name == 'helioprojective':
    
    # B-angle to subtract from latitude
    B0 = ensemble_map.observer_coordinate.lat

    pixel_lons = detected_hg_coords.lon.to(u.rad).value
    pixel_lats = detected_hg_coords.lat.to(u.rad).value - B0.to(u.rad).value
    pixel_areas = A_per_square_px/(np.cos(pixel_lons)*np.cos(pixel_lats))

elif ensemble_map.coordinate_frame.name == 'heliographic_stonyhurst':
    pixel_areas = np.ones(detected_hg_coords.shape)*A_per_square_px

# Sum area detected in all pixels
area = np.sum(pixel_areas)
area_percent = area/SOLAR_AREA*100
area_percent

In [None]:
detect.get_open_area(ensemble_map, confidence_level=0)

In [None]:
np.any(detected_hg_coords.lat > 0)

In [None]:
from astropy.coordinates import CylindricalRepresentation

# WRONG BC RHO IS CYLINDRICAL AS OPPOSED TO SPHERICAL
# # Convert detected Helioprojective Sky Coords to Heliocentric radial rho, psi
# raw_pixel_hc_coords = pixel_hp_coords.transform_to(
#     frames.Heliocentric(observer='earth', obstime=obstime)
# )
# # raw_pixel_hc_coords = raw_pixel_hc_coords.represent_as(CylindricalRepresentation)
# pixel_hc_coords = raw_pixel_hc_coords[np.where(~np.isnan(raw_pixel_hc_coords.x))]

# # Compute area per pixel while accounting for foreshortening
# rho = np.sqrt(pixel_hc_coords.x**2 + pixel_hc_coords.y**2 + pixel_hc_coords.z**2)
# pixel_sol_rad_ratios = (pixel_hc_coords.rho/u.solRad).to(u.dimensionless_unscaled)
# pixel_angles_to_limb = pixel_sol_rad_ratios*np.pi/2 *u.rad
# pixel_areas = A_per_square_px/np.cos(pixel_angles_to_limb.value)

Failed Coordinate Conversion Points

In [None]:
if np.any(failed_coord_idxs):
    failed_hp_coords = detected_hp_coords[failed_coord_idxs]
    failed_pixel_pairs = ensemble_map.world_to_pixel(failed_hp_coords)
else:
    print('No points matched condition')

ensemble_map.mask = None

fig = plt.figure(figsize=(12, 5))
failed_point_color = '#1ed950'

ax = fig.add_subplot(121)
ax.imshow(ensemble_map.data, cmap='magma')
if np.any(failed_coord_idxs):
    ax.scatter(failed_pixel_pairs.x.value, failed_pixel_pairs.y.value,
               color=failed_point_color)

ax.invert_yaxis()

ax = fig.add_subplot(122, projection=ensemble_map)
ensemble_map.plot(axes=ax)
ensemble_map.draw_grid(axes=ax)
for contour in ensemble_map.contour(confidence_level):
    ax.plot_coord(contour, color='white')

if np.any(failed_coord_idxs):
    ax.plot_coord(failed_hp_coords, 'o', color=failed_point_color)

In [None]:
# Heliographic pixels with too large longitude
large_lon_pixel_hg_coords = raw_detected_hg_coords[
    np.where(~np.isnan(raw_detected_hg_coords.lon)
             & (raw_detected_hg_coords.lon.to(u.deg).value >= 90))
]
large_lon_pixel_hg_coords
# large_lon_pixel_hg_coords[np.argsort(large_lon_pixel_hg_coords.lon.to(u.deg).value)]

In [None]:
# All failed conversion pixels
pixel_hg_coords = raw_detected_hg_coords[failed_detect_idxs]
pixel_hg_coords

# HG
# nan_idx = np.argmax(pixel_hg_coords.lon.to(u.deg).value)

#### Errors

Correct B-Angle

In [None]:
# EPH_B0 keyword: [deg] Disk center solar latitude at DATE-AVG
# Yields a lat offset in HG Stonyhurst coordinates
he_map.center
he_map.reference_coordinate
he_map.center.observer.lat

In [None]:
# Earth observer HGLT_OBS keyword
earth_hp_coords = frames.Helioprojective(
    header['CRVAL1']*u.arcsec, header['CRVAL2']*u.arcsec,
    observer='earth', obstime=header['DATE-OBS'],
)
earth_header = sunpy.map.make_fitswcs_header(data, earth_hp_coords)
earth_header['HGLT_OBS']

Limb Size Correction

In [None]:
stonyhurst_frame = frames.HeliographicStonyhurst(obstime=he_map.date)

num_points = 100
lon_value = -50 * u.deg
lat_value = 0 * u.deg
constant_lon = SkyCoord(lon_value, np.linspace(-90, 90, num_points) * u.deg,
                        frame=stonyhurst_frame)
constant_lat = SkyCoord(np.linspace(-90, 90, num_points) * u.deg, lat_value,
                        frame=stonyhurst_frame)

fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(projection=he_map)

# north = SkyCoord(0 * u.deg, 10 * u.deg, frame="heliographic_stonyhurst")
# offset_frame = NorthOffsetFrame(north=north)
# overlay = ax.get_coords_overlay(offset_frame)
# overlay[0].set_ticks(spacing=30. * u.deg)
# overlay.grid(ls='--', color='blue')

ax.plot_coord(constant_lon, color="lightblue")
ax.plot_coord(constant_lat, color="tomato")
# he_map.draw_grid(axes=ax, grid_spacing=10*u.deg)
he_map.draw_limb(axes=ax)
he_map.plot(axes=ax, vmin=-100, vmax=100)

In [None]:
pixel_lons = detected_hg_coords.lon.to(u.rad).value
pixel_lons = detected_hg_coords.lon.to(u.rad).value
pixel_lats = detected_hg_coords.lat.to(u.rad).value - B0.to(u.rad).value
pixel_areas = A_per_square_px/(np.cos(pixel_lons)*np.cos(pixel_lats))
pixel_areas

In [None]:
# Retrieve Off Limb Coords: 
# https://docs.sunpy.org/en/stable/generated/api/sunpy.coordinates.utils.get_limb_coordinates.html

In [None]:
LIMB_FACTOR = 1.5

# def get_nso_sunpy_map(fits_file):
#     """Retrieve a Sunpy map with a Helioprojective Cartesian
#     coordinate system and the first data array in a SOLIS VSM FITS file.
    
#     Args
#         fits_file: path to FITS file
#     Returns
#         Sunpy map object.
#     """
with fits.open(fits_file) as hdu_list:
    header = hdu_list[-1].header
    num_data_arrays = header.get('NAXIS3')
    
    if not num_data_arrays:
        data = hdu_list[-1].data
    else:
        data = hdu_list[-1].data[0]

# # Apply absolute value of coordinate change per pixel such that
# # Solar-X is positive
# header['CDELT1'] = abs(header['CDELT1'])

# Helioprojective Cartesian coordinates must have
# arcsec units for further processing. Warning messages
# will appear but the map will be produced successfully.
if (header['WCSNAME'] == 'Helioprojective-cartesian'
    and header['CUNIT1'] != 'arcsec'):
    pass
    
# Heliocentric Cartesian coordinates must have zero
# centered coordinates
if (header['WCSNAME'] == 'Heliocentric-cartesian (approximate)'
    and (header['CRVAL1'] != 0 or header['CRVAL2'] != 0)):
    print((f'Failed to convert {fits_file} into a Sunpy map.')
            + ('Coordinates were Heliocentric but were not ')
            + ('zero centered.'))
    pass
    
# Specify Earth-based observer for solar radius, distance to Sun,
# and Heliographic coordinates to avoid warning messages due to
# missing keywords
earth_hp_coords = frames.Helioprojective(
    header['CRVAL1']*u.arcsec, header['CRVAL2']*u.arcsec,
    observer='earth', obstime=header['DATE-OBS'],
)
earth_header = sunpy.map.make_fitswcs_header(data, earth_hp_coords)
for earth_coord_key in ['DSUN_OBS', 'HGLN_OBS', 'HGLT_OBS']:
    header[earth_coord_key] = earth_header[earth_coord_key]

# Enlarge solar radius by a factor to account for larger apparent solar
# limb in He I observations
header['RSUN_REF'] = (100 + LIMB_FACTOR)/100 * earth_header['RSUN_REF']

# Change primary World Coordinate System from Heliocentric Cartesian
# to Helioprojective Cartesian for Sunpy to create map
if header['WCSNAME'] == 'Heliocentric-cartesian (approximate)':
    
    # Cartesian coordinate units
    coord_u1 = u.Unit(header['CUNIT1'])
    coord_u2 = u.Unit(header['CUNIT2'])
    
    # Convert center pixel coordinates from distance to angle
    hc_coords = frames.Heliocentric(
        header['CRVAL1']*coord_u1,
        header['CRVAL2']*coord_u2, z=0*u.m,
        observer='earth', obstime=header['DATE-OBS']
    )
    hp_coords = hc_coords.transform_to(earth_hp_coords)
    header['CRVAL1'] = hp_coords.Tx.value
    header['CRVAL2'] = hp_coords.Ty.value
    
    # Convert change per pixel from distance to angle
    hc_delta_coords = frames.Heliocentric(
        header['CDELT1']*coord_u1,
        header['CDELT2']*coord_u2, z=0*u.m,
        observer='earth', obstime=header['DATE-OBS']
    )
    hp_delta_coords = hc_delta_coords.transform_to(earth_hp_coords)
    header['CDELT1'] = hp_delta_coords.Tx.value
    header['CDELT2'] = hp_delta_coords.Ty.value
    
    # Modify keywords
    header['WCSNAME'] = 'Helioprojective-cartesian'
    header['CTYPE1'] = 'HPLN-TAN'
    header['CTYPE2'] = 'HPLT-TAN'
    header['CUNIT1'] = 'arcsec'
    header['CUNIT2'] = 'arcsec'

    # Remove error causing keywords indicate presence of
    # coordinate transformation
    header.pop('PC1_1')
    header.pop('PC2_2')
        
he_map = sunpy.map.Map(data, header)

In [None]:
(he_map.rsun_meters/u.solRad).to(u.dimensionless_unscaled)

### Magnetic Data

Magnetogram Masking

In [None]:
fig = plt.figure(figsize=(12, 5))

ax = fig.add_subplot(121, projection=reprojected_mag_map)
reprojected_mag_map.plot(axes=ax, vmin=-50, vmax=50)
for contour in ensemble_map.contour(0):
    ax.plot_coord(contour, color='yellow')

# Mask out data
reprojected_mag_map.mask = (ensemble_map.data < 1)
ax = fig.add_subplot(122, projection=reprojected_mag_map)
reprojected_mag_map.plot(axes=ax, vmin=-0.1, vmax=0.1, cmap='coolwarm')

reprojected_mag_map.mask = None
# Red: positive

#### Verification

Open Flux: Compare official function with decomposed to investigate failed retrievals

In [None]:
open_flux = detect.get_unsigned_open_flux(
    ensemble_map, reprojected_mag_map, confidence_level=0
)
f'{open_flux:.7e} Wb'

In [None]:
confidence_level = 0

if confidence_level <= 0:
    confidence_level = 1e-3
    
A_per_square_px = detect.get_A_per_square_px(ensemble_map)
        
detected_hg_coords, failed_coord_idxs = detect.get_detected_hg_coords(
    ensemble_map, confidence_level
)

pixel_areas = detect.get_pixel_areas(
    ensemble_map, A_per_square_px, detected_hg_coords
)

# Magnetic field strength per detected pixel
# Flip upside down to align Sunpy coordinates and Numpy indices
detected_idxs = np.where(np.flipud(ensemble_map.data) >= confidence_level)
pixel_B_LOS = reprojected_mag_map.data[detected_idxs]*u.G

# Remove pixels with failed coordinate conversion
pixel_B_LOS = np.delete(pixel_B_LOS, failed_coord_idxs)

# Remove pixels with failed magnetic data retrieval
failed_mag_idxs = np.where(np.isnan(pixel_B_LOS))
pixel_B_LOS = np.delete(pixel_B_LOS, failed_mag_idxs)


pixel_areas = np.delete(pixel_areas, failed_mag_idxs)
    
unsigned_open_flux = np.sum(np.abs(pixel_B_LOS)*pixel_areas).to(u.Wb)

unsigned_open_flux

Failed Magnetic Retrieval Points

In [None]:
if np.any(failed_mag_idxs):
    failed_hp_coords = detected_hp_coords[failed_mag_idxs]
    failed_pixel_pairs = ensemble_map.world_to_pixel(failed_hp_coords)
else:
    print('No points matched condition')

fig = plt.figure(figsize=(12, 5))
failed_point_color = '#1ed950'

ax = fig.add_subplot(121)
ax.imshow(reprojected_mag_map.data, vmin=-50, vmax=50, cmap='gray')
if np.any(failed_mag_idxs):
    ax.scatter(failed_pixel_pairs.x.value, failed_pixel_pairs.y.value,
               color=failed_point_color)

ax.invert_yaxis()

ax = fig.add_subplot(122, projection=he_map)
reprojected_mag_map.plot(axes=ax, vmin=-50, vmax=50)
reprojected_mag_map.draw_grid(axes=ax)
for contour in ensemble_map.contour(confidence_level):
    ax.plot_coord(contour, color='yellow')

if np.any(failed_mag_idxs):
    ax.plot_coord(failed_hp_coords, 'o', color=failed_point_color)

### Boundary Complexity

In [None]:
from lib.CDM.fracstat import *

def get_fractal_D(img, scale_range):
    # Compute fractal dimension
    
    scales, n_filled = box_counting(img, scale_range, f=1.1)[:2]
    if np.any(n_filled == 0):
        D = np.nan
    else:
        fit = power_law(scales, n_filled, scale_range)[0]
        D = -fit[0]
    
    return D
    

def plot_fractal_D(img, scale_range, title_var, contours=False):
    fig = plt.figure(figsize=(8, 10))

    ax = fig.add_subplot(211)
    ax.imshow(img, cmap='gray')
    if contours:
        ax.contour(img, cmap='gray')
    
    ax.set_title(title_var)
    
    # Plot full range
    img_row_num = img.shape[0]
    full_range = [min([5, scale_range[0]]), max(img_row_num, scale_range[1])]
    full_scales, full_n_filled = box_counting(img, scale_range=full_range, f=1.1)[:2]
    
    ax = fig.add_subplot(212)
    full_X = full_scales/img_row_num
    ax.loglog(full_X, full_n_filled, c='k')
    
    # Compute fractal dimension in specified range and plot selected range
    scales, n_filled = box_counting(img, scale_range, f=1.1)[:2]
    X = scales/img_row_num
    fit, cov = power_law(X, n_filled)
    predict = np.poly1d(fit)
    D = -fit[0]
    D_err = np.sqrt(cov[0,0])
    
    X_range = np.array(scale_range)/img_row_num
    ax.loglog(X_range, 10**predict(np.log10(X_range)), c='C0', linewidth=2)
    ax.vlines(X_range, ymin=0, ymax=1e6, linestyles='--', colors='k')
    
    ax.set_xlabel(r'$\epsilon$')
    ax.set_ylabel('Box Number Containing Boundary')
    ax.set_title(f'Fractal Dimension: {D:.3f} +/- {D_err:.4f}')
    ax.set_ylim([1,1e5])

    return fig

In [None]:
contours = measure.find_contours(ch_mask_data)
ch_boundary_data = np.zeros(ch_mask_data.shape)

for contour in contours:
    contour = contour.astype(int)
    ch_boundary_data[contour[:,0], contour[:,1]] = 1

fig = plot_fractal_D(
    ch_boundary_data,
    scale_range=[10, 500],
    title_var=f'SE Disk Radius: {morph_radius_dist} Mm',
    contours=True
)
# plt.savefig(f'{OUTPUT_DIR}Fractal/{percent_of_peak}_{morph_radius_dist}_Boundary')

In [None]:
get_fractal_D(ch_boundary_data, scale_range=[10, 500])

In [None]:
fig = plot_fractal_D(
    ch_mask_data,
    scale_range=[10, 500],
    # scale_range=[10, np.min(ch_mask_data.shape)],
    title_var=f'SE Disk Radius: {morph_radius_dist} Mm'
)
plt.savefig(f'{OUTPUT_DIR}Fractal/{morph_radius_dist}_Mask')

## Outcome Comparison

#### Pre-Process Outcomes vs Method

In [None]:
percent_of_peak_list = [80, 90, 100, 110]
area_percent_df_by_method_list = []
mad_by_thresh_by_method_list = []

for pre_process_save_dir in ['v0_1/', 'v0_4/']:
    pre_process_save_dir = out_dir + PREPROCESS_DIR + pre_process_save_dir
    
    area_percent_df = detect.get_thresh_outcome_time_series_dfs(
        HE_DATE_LIST, percent_of_peak_list, HE_DIR, pre_process_save_dir
    )[1]
    area_percent_df_by_method_list.append(area_percent_df)
    mad_by_thresh_by_method_list.append(
        detect.get_mad_by_confidences(area_percent_df, percent_of_peak_list)
    )

In [None]:
x_ticks = np.arange(len(percent_of_peak_list))
threshold_label_list = [
    f'{thresh_level}% of Peak Threshold'
    for thresh_level in percent_of_peak_list
]

plt.figure(1, figsize=(8,6))

plt.bar(x_ticks - 0.2, mad_by_thresh_by_method_list[0], width=0.2, label='v0.3')
plt.bar(x_ticks, mad_by_thresh_by_method_list[1], width=0.2, label='Band Pass')
plt.bar(x_ticks + 0.2, mad_by_thresh_by_method_list[2], width=0.2, label='Rescaling')
plt.xticks(x_ticks, threshold_label_list, rotation=10)
plt.ylabel(f'MAD of Detected Area Percentage (%)')
plt.legend()

### Ensemble Outcomes vs Method

Compare outcomes between confidence levels and/or methods

In [None]:
out_dir = DETECT_DIR + '_Outcome_Comparison/' + DATE_DIR
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

confidence_level_list = [0, 35, 65, 95]
# confidence_level_list = list(range(0,96,5))

# version_dirs = ['v0_3/', 'Band_Pass/', 'Rescale/', 'Rescale_Center/']
# version_dirs = ['v0_3/', 'Rescale/']
# version_dirs = ['v0_3/', 'Rescale/', 'v0_4/']
# version_dirs = ['v0_3/', 'v0_4/']
# version_dirs = ['v0_4_Single/', 'v0_4/']
# version_dirs = ['v0_4_Unipolar']
version_dirs = ['v0_1', 'v0_2', 'v0_3', 'v0_4', 'v0_5']
descript_list = version_dirs + [f'cl{cl}' for cl in confidence_level_list]

Plot Formatting

In [None]:
cl_dx_list = np.arange(-0.3,0.31,0.2)
method_list = ['Bright & Coherent Mask', 'Ensemble', 'Smoothness',
               'Consistency', 'Unipolarity']

# cl_dx_list = np.arange(-0.9,0.91,0.2)
# method_list = ['Unipolarity']

# cl_dx_list = np.arange(0,1,0.05)
# method_list = ['Unipolarity']

# cl_dx_list = np.arange(-0.3,0.31,0.2)
# # method_list = ['v0.3', 'v0.3 Design + Band Pass', 'v0.3 Design + Rescale',
# #               'v0.3 Design + Rescale & Center']
# method_list = ['v0.1', 'v0.2', 'v0.3', 'v0.4']

# cl_dx_list = [-0.1, 0.1]
# # method_list = ['v0.3', 'v0.3 Design + Rescale']
# # method_list = ['v0.3', 'v0.4']
# method_list = ['v0.4 Single', 'v0.4 Ensemble']

# cl_dx_list = [-0.2, 0, 0.2]
# method_list = ['v0.3', 'v0.3 Design + Rescale', 'v0.4']

cmap = colormaps['viridis']
color_list = cmap(np.linspace(0, 0.75, len(confidence_level_list)))
# cmap = colormaps['plasma_r']
# color_list = cmap(np.linspace(0.25, 1, len(confidence_level_list)))

v0.2-v0.5 Compute Outcomes

In [None]:
area_percent_df_by_method_list = []
autocorr_by_conf_by_method_list = []
mad_by_conf_by_method_list = []
norm_mad_by_conf_by_method_list = []


for version_dir in version_dirs:
    DETECTION_NPY_SAVE_DIR = os.path.join(DETECT_DIR, version_dir, 'Saved_npy_Files/')
    
    outcome_time_series_dict = detect.get_outcome_time_series_dict(
        HE_DATE_LIST, confidence_level_list, DETECTION_NPY_SAVE_DIR
    )
    area_percent_df_by_method_list.append(
        outcome_time_series_dict['area_percent']
    )
    
    autocorr_by_confidences = [
        outcome_time_series_dict['area'][cl].autocorr()
        for cl in confidence_level_list
    ]
    autocorr_by_conf_by_method_list.append(autocorr_by_confidences)
    out = detect.get_mad_by_confidences(
        outcome_time_series_dict['area'], confidence_level_list
    )
    mad_by_confidences, norm_mad_by_confidences = out
    mad_by_conf_by_method_list.append(mad_by_confidences)
    norm_mad_by_conf_by_method_list.append(norm_mad_by_confidences)
    print(f'Outcomes computed for {version_dir}')

descript_list = version_dirs + [f'cl{cl}' for cl in confidence_level_list]
autocorr_file = f'{out_dir}Autocorr_comp_{"_".join(descript_list)}.npy'
np.save(autocorr_file, np.array(autocorr_by_conf_by_method_list),
        allow_pickle=True)

MAD

In [None]:
x_ticks = np.arange(len(confidence_level_list))
confidence_label_list = [
    f'{confidence_level}% Confidence'
    for confidence_level in confidence_level_list
]

plt.figure(1, figsize=(9,6))
for mad_by_confidences, cl_dx, method, color in zip(
    mad_by_conf_by_method_list, cl_dx_list, method_list, color_list):
    plt.bar(x_ticks + cl_dx, mad_by_confidences, width=0.2,
            label=method, color=color)

plt.suptitle(DATE_RANGE_SUPTITLE)
plt.title('Method Comparison')
plt.xticks(x_ticks, confidence_label_list, rotation=10)
plt.ylabel(f'MAD of Detected Area (Mm^2)')
plt.legend()

Normalized MAD

In [None]:
x_ticks = np.arange(len(confidence_level_list))
confidence_label_list = [
    f'{confidence_level}% Confidence'
    for confidence_level in confidence_level_list
]

plt.figure(1, figsize=(9,6))
for norm_mad_by_confidences, cl_dx, method, color in zip(
    norm_mad_by_conf_by_method_list, cl_dx_list, method_list, color_list):
    plt.bar(x_ticks + cl_dx, norm_mad_by_confidences, width=0.2,
            label=method, color=color)
    
plt.suptitle(DATE_RANGE_SUPTITLE)
plt.title('Method Comparison')
plt.xticks(x_ticks, confidence_label_list, rotation=10)
plt.ylim([0, 50])
plt.ylabel(f'Normalized MAD of Detected Area (%)')
plt.legend()

## Contour Plots

### Evaluate Outcomes on Grid

In [None]:
overwrite = False
output_dir = DETECTION_IMAGE_DIR + 'Outcome_Maps/'
save_file = f'{output_dir}{file_date_str}.npy'

# v0.5.1
p_start, p_step, p_num = (70, 10, 5)
r_start, r_step, r_num = (8, 4, 4)

# # vY
# p_start, p_step, p_num = (60, 7.5, 7)
# r_start, r_step, r_num = (8, 2, 6)

PERCENTS_OF_PEAK = np.arange(p_start, p_start + p_num*p_step, p_step)
MORPH_RADII = np.arange(r_start, r_start + r_num*r_step, r_step)

# thresh_step = 5
# radius_step = 2
# PERCENTS_OF_PEAK = np.arange(50,86,thresh_step)
# # PERCENTS_OF_PEAK = np.array(range(65,101,thresh_step))
# # PERCENTS_OF_PEAK = np.array(range(45,101,thresh_step))
# MORPH_RADII = np.arange(14,25,radius_step)

print(PERCENTS_OF_PEAK)
print(MORPH_RADII)

v0.5-

In [None]:
# def get_area_for_date_str_1D(percent_of_peak, he_date_str):
#     """Retrieve detected area percentages for the specified date.
#     """
#     he_map = prepare_data.get_nso_sunpy_map(HE_DIR + he_date_str + '.fts')
#     he = detect.pre_process_he_v0_4(he_map.data)
#     ch_mask_data = detect.get_ch_mask(
#         he, percent_of_peak, MORPH_RADIUS
#     )
#     ch_mask_map = sunpy.map.Map(np.flipud(ch_mask_data), he_map.meta)
#     return detect.get_area(ch_mask_map, 0)[0]


def get_area_for_date_str(percent_of_peak, morph_radius, he_date_str):
    """Retrieve detected area percentages for specified dates.
    """
    he_map = prepare_data.get_nso_sunpy_map(HE_DIR + he_date_str + '.fts')
    he = detect.pre_process_v0_4(he_map.data)
    ch_mask_data = detect.get_ch_mask(
        he, percent_of_peak, morph_radius
    )
    ch_mask_map = sunpy.map.Map(np.flipud(ch_mask_data), he_map.meta)
    return detect.get_open_area(ch_mask_map, 0)
    

def get_outcomes(percent_of_peak, morph_radius):
    """Retrieve segmentation map outcomes with specified design
    variables over time.
    
    Args
        percent_of_peak_list: list of float percentage values
            at which to take threshold
        morph_radius_list: list of int pixel number for radius of disk 
            structuring element in morphological operations
    Returns
        Array of median area percentage.
        Autocorrelation and normalized MAD of the detected area time series.
    """
    print((percent_of_peak, morph_radius), end='  ')
    area_tuple_list = [
        get_area_for_date_str(percent_of_peak, morph_radius, he_date_str)
        for he_date_str in HE_DATE_LIST
    ]
    area_percent_list = [
        area_tuple[0] for area_tuple in area_tuple_list
    ]
    area_list = [
        area_tuple[1] for area_tuple in area_tuple_list
    ]
    # percent_of_peak = design_vars
    # print((design_vars, MORPH_RADIUS))
    # area_percent_list = [
    #     get_area_for_date_str_1D(percent_of_peak, he_date_str)
    #     for he_date_str in HE_DATE_LIST
    # ]
    # area_percent_list = OPTIMIZER.get_area_list(design_vars)
    
    area_percent_median = np.median(area_percent_list)
    autocorr = pd.Series(area_list).autocorr()
    
    # Compute normalized MAD of detected area
    mad = np.median(np.abs(area_list - np.median(area_list)))
    if np.median(area_list) == 0:
        norm_mad = 0
    else:
        norm_mad = mad/np.median(area_list)*100
    
    return np.array([area_percent_median, autocorr, norm_mad])

get_vect_outcomes = np.vectorize(get_outcomes, signature='(),()->(3)')

In [None]:
# (Expensive computation: 31 min for 1 month, 48 nodes)

if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

# Optionally overwrite existing files
if os.path.isfile(save_file) and not overwrite:
    sys.exit(f'{file_date_str} data already exists.')

num_nodes = len(PERCENTS_OF_PEAK)*len(PERCENTS_OF_PEAK)
print(f'Evaluating outcomes for {num_nodes} nodes')

# ~1min/step
X, Y = np.meshgrid(PERCENTS_OF_PEAK, MORPH_RADII)
outcome_array = get_vect_outcomes(X, Y)

np.save(save_file, outcome_array, allow_pickle=True)

In [None]:
# Merge outcome maps
output_dir = DETECTION_IMAGE_DIR + 'Outcome_Maps_L/'
save_file = f'{output_dir}{file_date_str}.npy'
area_percent_median_array_L = np.load(save_file, allow_pickle=True)[:,:,0]

output_dir = DETECTION_IMAGE_DIR + 'Outcome_Maps_R/'
save_file = f'{output_dir}{file_date_str}.npy'
area_percent_median_array_R = np.load(save_file, allow_pickle=True)[:,:,0]

area_percent_median_array = np.hstack([
    area_percent_median_array_L[:,:4], area_percent_median_array_R
])

v0.5.1+

In [None]:
def get_contour_outcomes_for_date_str(he_date_str, percent_of_peak, morph_radius_dist):
    """Retrieve detected area for specified dates.
    """
    pre_process_file = (PREPROCESS_MAP_SAVE_DIR + he_date_str
                        + '_pre_processed_map.fits')
    pre_processed_map = sunpy.map.Map(pre_process_file)

    # Obtain segmentation mask, sunpy map, and boundaries --------------------
    ch_mask_data = detect.get_ch_mask_list_vY(
        pre_processed_map, [percent_of_peak], [morph_radius_dist]
    )[0]
    ch_mask_map = sunpy.map.Map(np.flipud(ch_mask_data), pre_processed_map.meta)
    
    contours = measure.find_contours(ch_mask_data)
    ch_boundary_data = np.zeros(ch_mask_data.shape)

    for contour in contours:
        contour = contour.astype(int)
        ch_boundary_data[contour[:,0], contour[:,1]] = 1
    
    # Compute outcomes -------------------------------------------------------
    area_percent, open_area = detect.get_open_area(ch_mask_map, 0)
    fractal_D = get_fractal_D(ch_boundary_data, scale_range=[10, 500])
    
    return area_percent, open_area, fractal_D


def get_outcomes_v0_5_1(percent_of_peak, morph_radius_dist):
    """Retrieve segmentation map outcomes with specified design
    variables over time.
    
    Args
        percent_of_peak: float percentage measured from the zero
            value up to or beyond the histogram value
        morph_radius_dist: float distances in Mm for radius of
            disk structuring element in morphological operations
    Returns
        Array of median area percentage and autocorrelation of the detected
            area time series.
    """
    print((percent_of_peak, morph_radius_dist), end='  ')
    outcomes_by_dates = [
        get_contour_outcomes_for_date_str(
            he_date_str, percent_of_peak, morph_radius_dist
        )
        for he_date_str in HE_DATE_LIST
    ]
    area_percent_list = [
        date_outcomes[0] for date_outcomes in outcomes_by_dates
    ]
    area_list = [
        date_outcomes[1] for date_outcomes in outcomes_by_dates
    ]
    fractal_D_list = [
        date_outcomes[2] for date_outcomes in outcomes_by_dates
    ]
    
    area_percent_median = np.median(area_percent_list)
    autocorr = pd.Series(area_list).autocorr()
    fractal_D_median = np.nanmedian(fractal_D_list)
    
    return np.array([area_percent_median, autocorr, fractal_D_median])

get_vect_outcomes_v0_5_1 = np.vectorize(get_outcomes_v0_5_1, signature='(),()->(3)')

In [None]:
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

# Optionally overwrite existing files
if os.path.isfile(save_file) and not overwrite:
    sys.exit(f'{file_date_str} data already exists.')

num_nodes = len(PERCENTS_OF_PEAK)*len(PERCENTS_OF_PEAK)
print(f'Evaluating outcomes for {num_nodes} nodes and {num_maps} dates')

# ~1min/step
X, Y = np.meshgrid(PERCENTS_OF_PEAK, MORPH_RADII)
outcome_array = get_vect_outcomes_v0_5_1(X, Y)

np.save(save_file, outcome_array, allow_pickle=True)

### Area

In [None]:
area_percent_median_array = np.load(save_file, allow_pickle=True)[:,:,0]
upper_level = np.ceil(np.max(area_percent_median_array))
# levels = [0, 0.1, 0.5]
# levels.extend(list(np.linspace()))
step = 2
levels = np.arange(0,upper_level + step,step)

plt.figure(figsize=(12,8))
plt.contourf(PERCENTS_OF_PEAK, MORPH_RADII,
             area_percent_median_array, levels, cmap='plasma')
plt.colorbar()

plt.title('Median Detected Area Percentage (%)')
plt.suptitle(DATE_RANGE_SUPTITLE)
plt.xlabel('Threshold Relative to Mode (%)')
# plt.ylabel('SE Disk Radius (px)')
plt.ylabel('SE Disk Radius (Mm)')
plt.savefig(f'{output_dir}Area_fill_{file_date_str}.jpg')

In [None]:
# v0.5.1 Design
percent_of_peak_design = [70, 70, 80, 90]
morph_radius_design = [   15, 17, 13, 13] # Mm

# # v0.5.1 KPVT Design
# percent_of_peak_design = [80, 80, 90, 100]
# morph_radius_design = [   15, 17, 13, 13] # Mm

# # vY Design 1
# percent_of_peak_design = [62, 68, 73, 80]
# morph_radius_design = [   11, 13,  8, 10]

# # vY Design 2
# percent_of_peak_design = [85, 73, 95, 85]
# morph_radius_design = [   10, 14, 10, 14]

area_percent_median_array = np.load(save_file, allow_pickle=True)[:,:,0]
upper_level = np.ceil(np.max(area_percent_median_array))
step = 1
levels_1 = np.arange(0,5 + step,step)
step = 2
levels_2 = np.arange(6,upper_level + step,step)
levels = np.hstack((levels_1, levels_2))

plt.figure(figsize=(10,8))
cp = plt.contour(PERCENTS_OF_PEAK, MORPH_RADII,
                 area_percent_median_array, levels, cmap='plasma')
plt.clabel(cp, fontsize=14)

plt.title('Median Detected Area Percentage (%)')
plt.suptitle(DATE_RANGE_SUPTITLE)
plt.xlabel('Threshold Relative to Mode (%)')
# plt.ylabel('SE Disk Radius (px)')
plt.ylabel('SE Disk Radius (Mm)')
# plt.savefig(f'{output_dir}Area_{file_date_str}.jpg')

plt.scatter(percent_of_peak_design, morph_radius_design, color='k')
plt.savefig(f'{output_dir}Area_Point_{file_date_str}.jpg')

### Robustness

In [None]:
area_percent_median_array = np.load(save_file, allow_pickle=True)[:,:,0]
d_area_d_thresh = np.diff(area_percent_median_array, axis=1)/p_step

plt.figure(figsize=(12,8))
plt.contourf(PERCENTS_OF_PEAK[:-1], MORPH_RADII,
             d_area_d_thresh, cmap='bone')
plt.colorbar()

plt.title('$\partial(Median\ Area)/\partial(Threshold)$ (%/%)')
plt.suptitle(DATE_RANGE_SUPTITLE)
plt.xlabel('Threshold Relative to Mode (%)')
# plt.ylabel('SE Disk Radius (px)')
plt.ylabel('SE Disk Radius (Mm)')
plt.savefig(f'{output_dir}Partial_Thresh_{file_date_str}.jpg')

In [None]:
area_percent_median_array = np.load(save_file, allow_pickle=True)[:,:,0]
d_area_d_radius = np.diff(area_percent_median_array, axis=0)/r_step

plt.figure(figsize=(12,8))
plt.contourf(PERCENTS_OF_PEAK, MORPH_RADII[:-1],
             d_area_d_radius, cmap='bone')
plt.colorbar()

plt.title('$\partial(Median\ Area)/\partial(Radius)$ (%/px)')
plt.suptitle(DATE_RANGE_SUPTITLE)
plt.xlabel('Threshold Relative to Mode (%)')
# plt.ylabel('SE Disk Radius (px)')
plt.ylabel('SE Disk Radius (Mm)')
plt.savefig(f'{output_dir}Partial_Radius_{file_date_str}.jpg')

In [None]:
d_area_d_radius[np.where(MORPH_RADII == 10), np.where(PERCENTS_OF_PEAK == 115)]

### Persistence

In [None]:
# Not available in updated versions
norm_mad_array = np.load(save_file, allow_pickle=True)[:,:,2]
upper_level = np.ceil(np.max(norm_mad_array))
levels = [0, 1, 10]
levels.extend(list(np.linspace(15,upper_level,7)))

plt.figure(figsize=(12,8))
plt.contourf(PERCENTS_OF_PEAK, MORPH_RADII, norm_mad_array, levels)
plt.colorbar()

plt.title('Normalized MAD of Detected Area (%)')
plt.suptitle(DATE_RANGE_SUPTITLE)
plt.xlabel('Threshold Relative to Mode (%)')
plt.ylabel('SE Disk Radius (px)')
plt.savefig(f'{output_dir}Norm_MAD_{file_date_str}.jpg')

In [None]:
autocorr_array = np.load(save_file, allow_pickle=True)[:,:,1]

plt.figure(figsize=(12,8))
plt.contourf(PERCENTS_OF_PEAK, MORPH_RADII, -autocorr_array)
plt.colorbar()

plt.title('Negative Autocorrelation')
plt.suptitle(DATE_RANGE_SUPTITLE)
plt.xlabel('Threshold Relative to Mode (%)')
# plt.ylabel('SE Disk Radius (px)')
plt.ylabel('SE Disk Radius (Mm)')
plt.savefig(f'{output_dir}Autocorr_{file_date_str}.jpg')

### Boundary Complexity

In [None]:
fractal_D_bound_median_array = np.load(save_file, allow_pickle=True)[:,:,2]
# fractal_D_bound_median_array = np.load(save_file, allow_pickle=True)[:,:,3]
lower_level = np.floor(np.nanmin(fractal_D_bound_median_array)*10)/10
upper_level = np.ceil(np.nanmax(fractal_D_bound_median_array)*10)/10
step = 0.025
levels = np.arange(lower_level, upper_level + step, step)

plt.figure(figsize=(12,8))
plt.contourf(PERCENTS_OF_PEAK, MORPH_RADII, fractal_D_bound_median_array, levels)
plt.colorbar()

plt.title('Fractal Dimension')
plt.suptitle(DATE_RANGE_SUPTITLE)
plt.xlabel('Threshold Relative to Mode (%)')
plt.ylabel('SE Disk Radius (Mm)')
plt.savefig(f'{output_dir}Fractal_Dim_{file_date_str}.jpg')

In [None]:
f = interpolate.RegularGridInterpolator(
    (MORPH_RADII, PERCENTS_OF_PEAK), fractal_D_bound_median_array
)
f((8,110))

In [None]:
# Needs verification
interp_points = 500
num_bins = 17

area_interp = interpolate.RegularGridInterpolator(
    (MORPH_RADII, PERCENTS_OF_PEAK), area_percent_median_array
)
fractal_D_interp = interpolate.RegularGridInterpolator(
    (MORPH_RADII, PERCENTS_OF_PEAK), fractal_D_bound_median_array
)

interp_percents_of_peak = np.linspace(np.min(PERCENTS_OF_PEAK), np.max(PERCENTS_OF_PEAK), interp_points)
interp_morph_radii = np.linspace(np.min(MORPH_RADII), np.max(MORPH_RADII), interp_points)
XX, YY = np.meshgrid(interp_percents_of_peak, interp_morph_radii)

interpolated_area = area_interp((YY, XX))
interpolated_fractal_D = fractal_D_interp((YY, XX))

bins = np.linspace(np.min(area_percent_median_array), np.max(area_percent_median_array), num_bins)
binned_idx_array = np.digitize(interpolated_area, bins) - 1

X = np.zeros_like(binned_idx_array, dtype=float)

for bin_num in range(num_bins):
    bin_idxs = np.where(binned_idx_array == bin_num)
    bin_fractal_D = interpolated_fractal_D[bin_idxs]
    
    if not np.any(bin_fractal_D):
        continue

    X[bin_idxs] = bin_fractal_D - np.mean(bin_fractal_D)
    
# X = X

bound = max([np.max(X), np.abs(np.min(X))])
levels = np.linspace(-bound, bound, 15)

plt.figure(figsize=(12,8))
plt.contourf(interp_percents_of_peak, interp_morph_radii, X,
             levels, cmap='RdBu_r')
plt.colorbar()

plt.title('Binned Difference Fractal Boundary Dimension')
plt.suptitle(DATE_RANGE_SUPTITLE)
plt.xlabel('Threshold Relative to Mode (%)')
plt.ylabel('SE Disk Radius (Mm)')

### Objective Functions

v0.5.1

In [None]:
def obj_func_v0_5_1(norm_mad_array, area_percent_median_array, mu, M):
    penalty = mu*(area_percent_median_array - M)**2 + 1/area_percent_median_array
    obj_func_array = norm_mad_array + penalty
    return obj_func_array

def get_optim_vars(mu, M):
    obj_func_array = obj_func(NORM_MAD_ARRAY, AREA_PERCENT_MEDIAN_ARRAY, mu, M)
    xi, yi = np.unravel_index(np.argmin(obj_func_array), obj_func_array.shape)
    return np.array([PERCENTS_OF_PEAK[yi], MORPH_RADII[xi]])

get_vect_optim_vars = np.vectorize(get_optim_vars, signature='(),()->(2)')

In [None]:
# Area penalty weight
mu = 1

# Target median area
M = 3

area_percent_median_array = np.load(save_file, allow_pickle=True)[:,:,0]
norm_mad_array = np.load(save_file, allow_pickle=True)[:,:,2]
obj_func_array = obj_func_v0_5_1(norm_mad_array, area_percent_median_array, mu, M)

z = obj_func_array[~np.isinf(obj_func_array)]
lev_exp = np.linspace(
    np.log10(z.min()), np.log10(z.max()), 10
)
levels = np.power(10, lev_exp)

plt.figure(figsize=(12,8))
cs = plt.contourf(PERCENTS_OF_PEAK, MORPH_RADII, obj_func_array, levels, 
                  norm=colors.LogNorm(), cmap='gray')
plt.colorbar(cs, format=ticker.FuncFormatter(lambda x, pos: f'{x:.1f}'))

plt.suptitle(DATE_RANGE_SUPTITLE)
plt.title(f'Objective Function | $\mu$: {mu} $M$: {M}%')
plt.xlabel('Threshold Relative to Mode (%)')
plt.ylabel('SE Disk Radius (px)')
plt.savefig(f'{output_dir}Obj_Func_Mu{mu}_M{M}_{file_date_str}.jpg')

In [None]:
obj_func_array[np.where(MORPH_RADII == 20), np.where(PERCENTS_OF_PEAK == 115)]

In [None]:
mu_step = 5
M_step = 0.1
mu = np.arange(0, 500 + mu_step, mu_step)
M = np.arange(2, 6 + M_step, M_step)


X, Y = np.meshgrid(mu, M)
AREA_PERCENT_MEDIAN_ARRAY = np.load(save_file, allow_pickle=True)[:,:,0]
NORM_MAD_ARRAY = np.load(save_file, allow_pickle=True)[:,:,1]

optim_var_array = get_vect_optim_vars(X, Y)
optim_var_pairs = optim_var_array.reshape((np.prod(optim_var_array.shape[:2]), 2))
unique_optim_var_pairs, optim_counts = np.unique(
    optim_var_pairs,axis=0, return_counts=True
)

plt.figure(figsize=(12,8))
plt.scatter(unique_optim_var_pairs[:,0], unique_optim_var_pairs[:,1], s=250,
            c=optim_counts, cmap='turbo')
plt.colorbar()
plt.xticks(PERCENTS_OF_PEAK)
plt.yticks(MORPH_RADII)

plt.suptitle(DATE_RANGE_SUPTITLE)
plt.title('Optimal Design Variables with Varied $\mu,\ M$')
plt.xlabel('Threshold Relative to Mode (%)')
plt.ylabel('SE Disk Radius (px)')

In [None]:
plt.figure(figsize=(12,8))
plt.contourf(mu, M, optim_var_array[:,:,0], cmap='gray')
plt.colorbar()

plt.suptitle(DATE_RANGE_SUPTITLE)
plt.title('Optimal Threshold Relative to Mode (%)')
plt.xlabel('Penalty Weight $mu$')
plt.ylabel('Target Median Area Percentage $M$ (%)')

v0.5.2

In [None]:
def obj_func_v0_5_2(autocorr_array, area_percent_median_array, mu, M):
    C = mu*(area_percent_median_array - M)**2
    obj_func_array = -autocorr_array + C
    return obj_func_array

In [None]:
# Target median area
M = 3

output_dir = DETECTION_IMAGE_DIR + 'Outcome_Maps/Obj_Func_v0_5_2/'
if not os.path.isdir(output_dir):
    os.makedirs(output_dir)

area_percent_median_array = np.load(save_file, allow_pickle=True)[:,:,0]
autocorr_array = np.load(save_file, allow_pickle=True)[:,:,1]

# Area penalty weight
for mu in np.logspace(-5,0,20):
    obj_func_array = obj_func_v0_5_2(autocorr_array, area_percent_median_array, mu, M)
    if np.min(obj_func_array) < 0:
        obj_func_array = obj_func_array + np.abs(np.min(obj_func_array)) + 0.1

    z = obj_func_array[~np.isinf(obj_func_array)]
    lev_exp = np.linspace(
        np.log10(z.min()), np.log10(z.max()), 10
    )
    levels = np.power(10, lev_exp)

    fig = plt.figure(figsize=(12,8))
    
    cs = plt.contourf(PERCENTS_OF_PEAK, MORPH_RADII, obj_func_array, levels, 
                    norm=colors.LogNorm(), cmap='gray')
    plt.colorbar(cs, format=ticker.FuncFormatter(lambda x, pos: f'{x:.1f}'))

    plt.suptitle(DATE_RANGE_SUPTITLE)
    plt.title(f'Objective Function | $\mu$: {mu:.2e} $M$: {M}%')
    plt.xlabel('Threshold Relative to Mode (%)')
    plt.ylabel('SE Disk Radius (px)')
    plt.savefig(f'{output_dir}Mu{mu:.6f}_M{M}_{file_date_str}'.replace('.','_') + '.jpg')
    plt.close(fig)
    
    print(f'Mu: {mu:.2e} saved.')

In [None]:
detect.write_ensemble_video(output_dir, fps=8)

### v0.1 Heat Maps

#### Create CH Masks

In [None]:
save_file = f'{DETECTION_IMAGE_DIR}Outcome_Maps/{he_date_str}_maps.npy'

Create CH Mask List (Expensive computation)

In [None]:
# Lower threshold accepts more and lets morphology carry the load in selection and removal
thresh_step = 5
radius_step = 1
percent_of_peak_list = list(np.arange(70,106,thresh_step))
morph_radius_list = list(np.arange(6,21,radius_step))

# List of CHS masks for different files with varied parameters
all_ch_mask_list = [
    detect.get_ch_mask(pre_processed_map_data, percent_of_peak, morph_radius)
    for percent_of_peak, morph_radius
    in zip(percent_of_peak_list, morph_radius_list)
]

save_list = [he_date_str, percent_of_peak_list, morph_radius_list, all_ch_mask_list]
np.save(save_file, np.array(save_list, dtype=object), allow_pickle=True)

Load CH Mask List

In [None]:
save_list = np.load(save_file, allow_pickle=True)
date_str = save_list[0]
percent_of_peak_list = save_list[1]
morph_radius_list = save_list[2]
all_ch_mask_list = save_list[3]

In [None]:
title_list = [f'{percent_of_peak:d}% of Peak | {radius:d}px Radius'
              for percent_of_peak in percent_of_peak_list
              for radius in morph_radius_list]

In [None]:
def plot_heat_map(outcome_list, title, percent_of_peak_list, morph_radius_list, 
                  color_scale='Magma'):
    # Reverse order to facilitate plotting
    y_axis_list = morph_radius_list.copy()
    y_axis_list.reverse()
    
    outcome_map = np.flipud(np.reshape(
        outcome_list, (len(percent_of_peak_list),len(morph_radius_list))).T)

    fig = px.imshow(outcome_map, 
                    labels=dict(x='Threshold Level as Percent of Peak (%)',
                                y='SE Disk Radius (px)'),
                    x=percent_of_peak_list, y=y_axis_list,
                    aspect='auto', color_continuous_scale=color_scale)
    fig.update_layout(title=title, width=700)
    fig.show()
    
    
def plot_heat_map_band(outcome_list, heat_map_title, lower_bound, upper_bound,
                       percent_of_peak_list, morph_radius_list, 
                       array, all_ch_masks_list, title_list, color_scale='Magma'):
    edit_outcome_list = outcome_list.copy()
    
    # Index list of outcomes within bounds 
    idx_list = [i for i in range(len(edit_outcome_list)) 
              if edit_outcome_list[i] >= lower_bound and edit_outcome_list[i] <= upper_bound]
    num_ch_masks = len(idx_list)
    
    if not num_ch_masks:
        print('No masks in outcome range')
        return
    
    max_outcome = max(edit_outcome_list)
    # Highlight outcomes within bounds
    for i in idx_list:
        edit_outcome_list[i] = 2*max_outcome

    plot_heat_map(edit_outcome_list, heat_map_title,
                  percent_of_peak_list, morph_radius_list, color_scale)
    
    if num_ch_masks == 1:
        fig = plt.figure(figsize=(6, 6))
        ax = fig.add_subplot()
        
        outcome_idx = idx_list[0]
        ax.imshow(array, cmap=plt.cm.afmhot)
        ax.contour(all_ch_masks_list[outcome_idx], linewidths=0.5, cmap=plt.cm.gray)
        ax.set_title(title_list[outcome_idx], fontsize=18)
    else:
        fig, axes = plt.subplots(nrows=1, ncols=num_ch_masks, figsize=(6*num_ch_masks, 6))
        ax = axes.ravel()
    
        for i in range(num_ch_masks):
            outcome_idx = idx_list[i]
            ax[i].imshow(array, cmap=plt.cm.afmhot)
            ax[i].contour(all_ch_masks_list[outcome_idx], linewidths=0.5, cmap=plt.cm.gray)
            ax[i].set_title(title_list[outcome_idx], fontsize=18)

#### Pixel Percentage

In [None]:
area_percent_list = detect.get_px_percent_list(all_ch_mask_list)

In [None]:
heat_map_title = f'{date_str} Segmented Area Percentage'

plot_heat_map(
    area_percent_list, heat_map_title, percent_of_peak_list, morph_radius_list
)

In [None]:
fig = plt.figure(figsize=(12, 7))
ax = fig.add_subplot()

ax.hist(area_percent_list[:-1], bins=30)
ax.set_title(f'{date_str} Segmented Area Bins', fontsize=20)
ax.set_xlabel('Area Percentage', fontsize=18)
ax.set_ylabel('Number of Masks in Area Bin', fontsize=18)

Heat Map Bands

In [None]:
lower_bound = 16
upper_bound = 18

heat_map_title = f'{date_str} {lower_bound}-{upper_bound}% Segmented Area'

plot_heat_map_band(
    area_percent_list, heat_map_title, lower_bound, upper_bound,
    percent_of_peak_list, morph_radius_list, 
    he, all_ch_mask_list, title_list
)

In [None]:
lower_bound = 6.8
upper_bound = 7.3

heat_map_title = f'{date_str} {lower_bound}-{upper_bound}% Segmented Area'

plot_heat_map_band(
    area_percent_list, heat_map_title, lower_bound, upper_bound,
    percent_of_peak_list, morph_radius_list, 
    he, all_ch_mask_list, title_list
)

In [None]:
lower_bound = 5.4
upper_bound = 6

heat_map_title = f'{date_str} {lower_bound}-{upper_bound}% Segmented Area'

plot_heat_map_band(
    area_percent_list, heat_map_title, lower_bound, upper_bound,
    percent_of_peak_list, morph_radius_list, 
    he, all_ch_mask_list, title_list
)

In [None]:
lower_bound = 3.2
upper_bound = 3.3

heat_map_title = f'{date_str} {lower_bound}-{upper_bound}% Segmented Area'

plot_heat_map_band(
    area_percent_list, heat_map_title, lower_bound, upper_bound,
    percent_of_peak_list, morph_radius_list, 
    he, all_ch_mask_list, title_list
)

#### CH Number

In [None]:
num_ch_list = detect.get_num_CH_list(all_ch_mask_list)

In [None]:
heat_map_title = f'{date_str} Segmented Hole Number'
color_scale = 'Aggrnyl'

plot_heat_map(
    num_ch_list, heat_map_title, percent_of_peak_list, morph_radius_list, color_scale
)

In [None]:
fig = plt.figure(figsize=(12, 7))
ax = fig.add_subplot()

ax.hist(num_ch_list, bins=30, range=(0,25))
ax.set_title(f'{date_str} Segmented Hole Number Bins', fontsize=20)
ax.set_xlabel('Segemented Hole Number', fontsize=18)
ax.set_ylabel('Number of Masks in Hole Number Bin', fontsize=18)

Heat Map Bands

In [None]:
lower_bound = 17
upper_bound = 17

heat_map_title = f'{date_str} {lower_bound}-{upper_bound} Segmented Holes'
color_scale = 'Aggrnyl'

plot_heat_map_band(
    num_ch_list, heat_map_title, lower_bound, upper_bound,
    percent_of_peak_list, morph_radius_list, 
    he, all_ch_mask_list, title_list, color_scale
)

In [None]:
lower_bound = 11
upper_bound = 11

heat_map_title = f'{date_str} {lower_bound}-{upper_bound} Segmented Holes'
color_scale = 'Aggrnyl'

plot_heat_map_band(
    num_ch_list, heat_map_title, lower_bound, upper_bound,
    percent_of_peak_list, morph_radius_list, 
    he, all_ch_mask_list, title_list, color_scale
)

In [None]:
lower_bound = 9
upper_bound = 9

heat_map_title = f'{date_str} {lower_bound}-{upper_bound} Segmented Holes'
color_scale = 'Aggrnyl'

plot_heat_map_band(
    num_ch_list, heat_map_title, lower_bound, upper_bound,
    percent_of_peak_list, morph_radius_list, 
    he, all_ch_mask_list, title_list, color_scale
)

#### Lower Tail Width

In [None]:
def get_ch_lower_tail_width_list(array, ch_mask_list): 
    """Retrieve the average of the histogram lower tail width across CHs
    for each segmentation in a list.
    
    Args
        array: image to process
        ch_mask_list: binary coronal holes mask list
    Returns
        List of mean histogram tail widths of CHs detected in segmentations.
    """
    labeled_ch_list = [ndimage.label(ch_mask)[0]
                          for ch_mask in ch_mask_list]
    num_ch_list = [get_num_ch(ch_mask) for ch_mask in ch_mask_list]
    
    # List of average lower tail widths across all CH's of each segmentaion
    lower_tail_width_list = []
    
    count = 0
    for labeled_ch_mask, num_ch in zip(labeled_ch_list, num_ch_list):

        map_data_by_ch = get_map_data_by_ch(array, labeled_ch_mask, num_ch)
    
        ch_mask_lower_tail_width_list = get_ch_lower_tail_widths(map_data_by_ch)
        
        mean_lower_tail_width = np.mean(ch_mask_lower_tail_width_list)
        
        lower_tail_width_list.append(mean_lower_tail_width)
        count = count + 1
        
    return lower_tail_width_list

In [None]:
lower_tail_width_list = detect.get_ch_lower_tail_width_list(he, all_ch_mask_list)

In [None]:
heat_map_title = f'{date_str} Mean CH Tail Width'
color_scale = 'ice'

plot_heat_map(
    lower_tail_width_list, heat_map_title, percent_of_peak_list, morph_radius_list, color_scale
)

In [None]:
fig = plt.figure(figsize=(12, 7))
ax = fig.add_subplot()

ax.hist(lower_tail_width_list, bins=30, range=(10,40))
ax.set_title(f'{date_str} Mean CH Tail Width Bins', fontsize=20)
ax.set_xlabel('Mean CH Tail Width', fontsize=18)
ax.set_ylabel('Number of Masks in Tail Width Bin', fontsize=18)

Heat Map Bands

In [None]:
lower_bound = 29
upper_bound = 29.5

heat_map_title = f'{date_str} {lower_bound}-{upper_bound} Mean CH Tail Width'
color_scale = 'ice'

plot_heat_map_band(
    lower_tail_width_list, heat_map_title, lower_bound, upper_bound,
    percent_of_peak_list, morph_radius_list, 
    he, all_ch_mask_list, title_list, color_scale
)

In [None]:
lower_bound = 24.5
upper_bound = 25

heat_map_title = f'{date_str} {lower_bound}-{upper_bound} Mean CH Tail Width'
color_scale = 'ice'

plot_heat_map_band(
    lower_tail_width_list, heat_map_title, lower_bound, upper_bound,
    percent_of_peak_list, morph_radius_list, 
    he, all_ch_mask_list, title_list, color_scale
)

#### Multiplied

In [None]:
heat_map_title = f'{date_str} Multiplied Metrics'
color_scale = 'dense_r'

outcome_list = list(np.array(area_percent_list)*np.array(num_ch_list)*np.array(lower_tail_width_list))

plot_heat_map(
    outcome_list, heat_map_title, percent_of_peak_list, morph_radius_list, color_scale
)

# Multi-Date Outcomes

## Confidence Histograms

Histograms of candidate regions as stratified by confidence level, such as Figure 4 of the paper.

v0.5.1+

Requires setting DETECTION_VERSION_DIR = DETECT_DIR + 'v0_5_1_No_Thresh/'

In [None]:
def get_outcomes_by_all_date_ch_v0_5_1(cl_list):
    """Retrieve outcomes per CH in ensemble maps in all datetimes
    at specified confidence levels from ensemble maps.
    
    See get_outcomes for retrieved outcomes.
    
    Args
        cl_list: list of float confidence levels at which
            to threshold ensemble maps for computing outcomes
    Returns
        Dataframes of outcomes by confidence level over time.
    """
    # Dictionaries for outcomes of distinct CHs at varied confidence levels
    outcomes_by_all_ch_dict = {}
    for outcome_key in detect.OUTCOME_KEY_LIST:
        outcomes_by_all_ch_dict[outcome_key] = {cl:[] for cl in cl_list}


    for he_date_str in HE_DATE_LIST:
        
        # Extract saved ensemble map array and convert to Sunpy map
        ensemble_file = f'{DETECTION_MAP_SAVE_DIR}{he_date_str}_ensemble_map.fits'
        ensemble_map = sunpy.map.Map(ensemble_file)
        
        # Extract saved He I observation
        he_fits_file = DATA_FITS_FORMAT.format(
            data_dir=HE_DIR, date_str=he_date_str
        )
        he_map = prepare_data.get_nso_sunpy_map(he_fits_file)
        he_map_data = np.flipud(he_map.data)
        
        # Extract saved processed magnetogram
        mag_date_str = prepare_data.get_nearest_date_str(
            MAG_DATE_LIST, selected_date_str=he_date_str
        )
        reprojected_fits_file = (f'{ROTATED_MAG_SAVE_DIR}'
                                 + f'Mag{mag_date_str}_He{he_date_str}.fits')
        reprojected_mag_map = sunpy.map.Map(reprojected_fits_file)
        
        # Extract single date outcomes by CH ---------------------------------
        # Applied at varied confidence levels, then extending a main
        # dictionary with outcomes by CH from all dates
        
        # List of varied confidence levels for outcomes per CH detected
        # at the given or greater confidence levels
        outcome_by_ch_dict_by_cl = [
            detect.get_outcomes_by_ch(ensemble_map, he_map_data,
                                      reprojected_mag_map, cl)
            for cl in cl_list
        ]
        
        # Dictionary of outcomes holding dictionaries of
        # confidence levels for outcomes per CH
        single_date_outcome_dict = {}
        for outcome_key in detect.OUTCOME_KEY_LIST:
            single_date_outcome_dict[outcome_key] = {
                cl:outcome_by_ch_dict[outcome_key] for cl, outcome_by_ch_dict
                in zip(cl_list, outcome_by_ch_dict_by_cl)
            }
        
        # Extend main outcomes per CH dictionary by confidence level
        for cl in cl_list:
            for outcome_key in detect.OUTCOME_KEY_LIST:
                outcomes_by_all_ch_dict[outcome_key][cl].extend(
                    single_date_outcome_dict[outcome_key][cl]
                )
            
    return outcomes_by_all_ch_dict

In [None]:
cl_list = [50, 75, 90]
# cl_list = [0]
unipolarity_confidence = True
outcomes_by_all_ch_dict = get_outcomes_by_all_date_ch_v0_5_1(cl_list)

# Number of candidate regions in 1st confidence level
len(outcomes_by_all_ch_dict[detect.OUTCOME_KEY_LIST[0]][cl_list[0]])

# Number of total candidate regions, set cl_list=[0]
# len(outcomes_by_all_ch_dict[detect.OUTCOME_KEY_LIST[0]][0])

Prior to v0.5.1

In [None]:
def get_outcomes_by_all_date_ch(cl_list):
    """Retrieve outcomes per CH in ensemble maps in all datetimes
    at specified confidence levels from ensemble maps.
    
    See get_outcomes for retrieved outcomes.
    
    Args
        cl_list: list of float confidence levels at which
            to threshold ensemble maps for computing outcomes
    Returns
        Dataframes of outcomes by confidence level over time.
    """
    # Dictionaries for outcomes of distinct CHs at varied confidence levels
    area_dict = {cl:[] for cl in cl_list}
    lat_dict = {cl:[] for cl in cl_list}
    lon_dict = {cl:[] for cl in cl_list}
    unsigned_flux_dict = {cl:[] for cl in cl_list}
    signed_flux_dict = {cl:[] for cl in cl_list}
    mag_skew_dict = {cl:[] for cl in cl_list}
    unipolarity_dict = {cl:[] for cl in cl_list}

    for he_date_str in HE_DATE_LIST:
        
        # Extract He I observation
        he_file = f'{HE_DIR}{he_date_str}.fts'
        he_map = prepare_data.get_nso_sunpy_map(he_file)
        if not he_map:
            print(f'{he_date_str} He I observation extraction failed.')
            continue
        
        # Extract saved ensemble map
        ensemble_file = f'{DETECTION_NPY_SAVE_DIR}{he_date_str}_ensemble_map.npy'
        ensemble_map_data = np.load(ensemble_file, allow_pickle=True)[-1]
        ensemble_map = sunpy.map.Map(np.flipud(ensemble_map_data), he_map.meta)
        
        # Extract saved processed magnetograms
        mag_date_str = prepare_data.get_nearest_date_str(
            MAG_DATE_LIST, selected_date_str=he_date_str
        )
        reprojected_fits_file = (f'{ROTATED_MAG_SAVE_DIR}'
                                 + f'Mag{mag_date_str}_He{he_date_str}.fits')
        reprojected_mag_map = sunpy.map.Map(reprojected_fits_file)
        
        # Outcomes per CH detected at the given or greater
        # confidence levels
        outcome_by_ch_dict_by_cl = [
            detect.get_outcomes_by_ch(ensemble_map, pre_processed_map,
                                      reprojected_mag_map, cl)
            for cl in cl_list
        ]
        area_single_date_dict = {
            cl:outcome_by_ch_dict['area'] for cl, outcome_by_ch_dict
            in zip(cl_list, outcome_by_ch_dict_by_cl)
        }
        lat_single_date_dict = {
            cl:outcome_by_ch_dict['cm_lat'] for cl, outcome_by_ch_dict
            in zip(cl_list, outcome_by_ch_dict_by_cl)
        }
        lon_single_date_dict = {
            cl:outcome_by_ch_dict['cm_lon'] for cl, outcome_by_ch_dict
            in zip(cl_list, outcome_by_ch_dict_by_cl)
        }
        unsigned_flux_single_date_dict = {
            cl:outcome_by_ch_dict['unsigned_flux'] for cl, outcome_by_ch_dict
            in zip(cl_list, outcome_by_ch_dict_by_cl)
        }
        signed_flux_single_date_dict = {
            cl:outcome_by_ch_dict['signed_flux'] for cl, outcome_by_ch_dict
            in zip(cl_list, outcome_by_ch_dict_by_cl)
        }
        mag_skew_single_date_dict = {
            cl:outcome_by_ch_dict['mag_skew'] for cl, outcome_by_ch_dict
            in zip(cl_list, outcome_by_ch_dict_by_cl)
        }
        unipolarity_single_date_dict = {
            cl:outcome_by_ch_dict['unipolarity'] for cl, outcome_by_ch_dict
            in zip(cl_list, outcome_by_ch_dict_by_cl)
        }
        
        # Extend outcomes per CH
        for cl in cl_list:
            area_dict[cl].extend(
                area_single_date_dict[cl]
            )
            lat_dict[cl].extend(
                lat_single_date_dict[cl]
            )
            lon_dict[cl].extend(
                lon_single_date_dict[cl]
            )
            unsigned_flux_dict[cl].extend(
                unsigned_flux_single_date_dict[cl]
            )
            signed_flux_dict[cl].extend(
                signed_flux_single_date_dict[cl]
            )
            mag_skew_dict[cl].extend(
                mag_skew_single_date_dict[cl]
            )
            unipolarity_dict[cl].extend(
                unipolarity_single_date_dict[cl]
            )
            
    return area_dict, lat_dict, lon_dict, \
        unsigned_flux_dict, signed_flux_dict, \
        mag_skew_dict, unipolarity_dict


In [None]:
cl_list = [0, 35, 65, 95]
outcome_dicts = get_outcomes_by_all_date_ch(cl_list)
area_dict, lat_dict, lon_dict = outcome_dicts[:3]
unsigned_flux_dict, signed_flux_dict = outcome_dicts[3:5]
mag_skew_dict, unipolarity_dict = outcome_dicts[5:]

### Lat/Lon

Sine Lat/Lon

In [None]:
sz_factor = 0.6

total_width = 13.5
lon_width = 8

# # Uncomment for longitude -----------------------------------------------
# lat_or_lon = 'lon'
# outcome_dict = outcomes_by_all_ch_dict['cm_lon']
# plt.figure(figsize=(lon_width*sz_factor,5*sz_factor), dpi=300)
# plt.title('Longitude Histogram')
# plt.ylabel('Number of CH Detections')
# plt.xlabel(r'sin($\phi$)')
# orientation = 'vertical'
# plt.xlim([-1,1])
# plt.ylim([0,50])
# plt.xticks([-1, -0.5, 0, 0.5, 1])
# loc = 'upper left'

# Uncomment for latitude -----------------------------------------------
lat_or_lon = 'lat'
outcome_dict = outcomes_by_all_ch_dict['cm_lat']
plt.figure(
    figsize=((total_width - lon_width)*sz_factor,5*sz_factor), dpi=300
)
plt.title('Latitude Histogram')
plt.ylabel(r'sin($\theta$)')
plt.xlabel('Number of CH Detections')
orientation = 'horizontal'
plt.ylim([-1,1])
plt.xlim([0,70])
plt.yticks([-1, -0.5, 0, 0.5, 1])
loc = 'upper right'

cmap = colormaps['bone_r']
color_list = cmap(np.linspace(0.25, 1, len(cl_list)))

bins = np.arange(-1,1.01,0.125)
for cl, color, linestyle in zip(cl_list, color_list, ['-', '--', '-.']):
    sine_angle = np.sin(np.deg2rad(outcome_dict[cl]))
    
    if unipolarity_confidence:
        label = fr'$U \geq${cl/100}'
    else:
        label = f'{cl}th % Smoothness'
    
    plt.hist(
        sine_angle, bins, histtype='step',
        color='white', edgecolor=color,
        linestyle=linestyle, linewidth=2,
        orientation=orientation
    )
    plt.plot([2,3], [2,3], color=color, linestyle=linestyle, label=label)
    
# Commented out for cm_lat in paper
# plt.legend(loc=loc)

plt.savefig(
    PAPER_PLOT_DIR + f'center-of-mass-hists-{lat_or_lon}.jpeg',
    bbox_inches='tight'
)

In [None]:
PAPER_PLOT_DIR = 'paper/paper_plots/2024_08_plots/'

No Sine

In [None]:
outcome_dict = outcomes_by_all_ch_dict['cm_lon']
plt.figure(figsize=(10,6))
plt.title('Longitude Histogram')
plt.ylabel('Number of CH Detections')
plt.xlabel('Longitude (deg)')
orientation = 'vertical'
plt.xlim([-90,90])
plt.ylim([0,50])

# plt.ylim([0,120])

# outcome_dict = outcomes_by_all_ch_dict['cm_lat']
# plt.figure(figsize=(7,6))
# plt.title('Latitude Histogram')
# plt.ylabel('Latitude (deg)')
# plt.xlabel('Number of CH Detections')
# orientation = 'horizontal'
# plt.ylim([-90,90])

cmap = colormaps['bone_r']
color_list = cmap(np.linspace(0.25, 1, len(cl_list)))

plt.suptitle(DATE_RANGE_SUPTITLE)

bins = np.arange(-90,90.1,10)
for cl, color in zip(cl_list, color_list):
    if unipolarity_confidence:
        label = f'>= {cl/100} Unipolarity'
    else:
        label = f'{cl}th % Smoothness'
    
    plt.hist(
        outcome_dict[cl], bins, color=color,
        orientation=orientation, label=label
    )
    
plt.legend()

### Unsigned Flux

In [None]:
outcome_dict = outcomes_by_all_ch_dict['unsigned_flux']

bin_min = np.log10(np.min(outcome_dict[0]))
bin_max = np.ceil(np.log10(np.max(outcome_dict[0])))
bins = 10**(np.linspace(bin_min,bin_max,25))

cmap = colormaps['bone_r']
color_list = cmap(np.linspace(0.25, 1, len(cl_list)))

plt.figure(figsize=(10,6))
plt.xscale('log')
plt.title('Unsigned Flux Histogram')
plt.ylabel('Number of CH Candidates')
plt.xlabel('Unsigned Open Flux (Wb)')

for cl, color in zip(cl_list, color_list):
    if unipolarity_confidence:
        label = f'{cl/100} Unipolarity'
    else:
        label = f'{cl}th % Smoothness'
    
    plt.hist(
        outcome_dict[cl], bins, color=color,
        orientation=orientation, label=label
    )
    
plt.legend()

### Unipolarity

In [None]:
unipolarity_threshold = 0.5
smooth_percentile_bounds = [0, 50, 80]

# Outcomes for all candidates (>=0% confidence)
cl = 0
unipolarity_list = outcomes_by_all_ch_dict['unipolarity'][cl]
grad_median_list = outcomes_by_all_ch_dict['grad_median'][cl]

# Smoothness percentile by candidate
# Mapped in [0,100) and reversed order from gradient median quanitfying roughness
smooth_percentiles = 100 - stats.rankdata(grad_median_list)/len(grad_median_list)*100

# Stratify candidate CH unipolarity by percentiles of smoothness
u_by_smooth_pct_dict = {}
    
for smooth_pct_bound in smooth_percentile_bounds:
    
    candidate_u_above_smooth_pct_list = [
        unipolarity for unipolarity, smooth_percentile
        in zip(unipolarity_list, smooth_percentiles)
        if smooth_percentile >= smooth_pct_bound
    ]
    u_by_smooth_pct_dict[smooth_pct_bound] = candidate_u_above_smooth_pct_list

bins = np.arange(0,1.01,0.05)

cmap = colormaps['bone_r']
color_list = cmap(np.linspace(0.25, 1, len(smooth_percentile_bounds)))

line_styles = ['-', '--', '-.']

plt.figure(figsize=(10,6))
plt.suptitle(DATE_RANGE_SUPTITLE)
plt.title('Unipolarity Histogram')
plt.ylabel('Number of CH Candidates')
plt.xlabel('Unipolarity')
plt.xticks([0,0.25,0.5,0.75,1])
plt.xlim([0,1])
plt.ylim([0,140])

for smooth_pct, color, linestyle in zip(
    smooth_percentile_bounds, color_list, line_styles):
    
    plt.hist(
        u_by_smooth_pct_dict[smooth_pct], bins, histtype='step',
        color='white', edgecolor=color,
        linestyle=linestyle, linewidth=3,
        label=f'{smooth_pct}th % Smoothness'
    )

plt.legend()
plt.vlines([unipolarity_threshold, unipolarity_threshold], ymin=-10, ymax=150,
           linestyles=['--'], color='k', linewidth=1)

In [None]:
num_candidates = len(u_by_smooth_pct_dict[0])

u_among_all_candidates = np.array(u_by_smooth_pct_dict[0])
num_bipolar_candidates = np.count_nonzero(u_among_all_candidates < 0.5)
num_unipolar_candidates = np.count_nonzero(u_among_all_candidates >= 0.5)

f'Bipolar candidate fraction: {num_bipolar_candidates/num_candidates*100:.2f}%'

In [None]:
u_among_smooth_candidates = np.array(u_by_smooth_pct_dict[50])

num_bipolar_smooth_candidates = np.count_nonzero(u_among_smooth_candidates < 0.5)
num_unipolar_smooth_candidates = np.count_nonzero(u_among_smooth_candidates >= 0.5)

('Bipolar, unsmooth candidate fraction: '
 f'{num_bipolar_smooth_candidates/num_bipolar_candidates*100:.2f}%')

In [None]:
('Unpolar, smooth candidate fraction: '
 f'{num_unipolar_smooth_candidates/num_unipolar_candidates*100:.2f}%')

In [None]:
len(u_among_all_candidates) - len(u_among_unsmooth_candidates)

In [None]:
outcome_dict = unipolarity_dict

# bins = np.linspace(0,1,25)
bins = np.arange(0,1.01,0.05)

cmap = colormaps['bone_r']
color_list = cmap(np.linspace(0.25, 1, len(cl_list)))

plt.figure(figsize=(10,6))
plt.suptitle(DATE_RANGE_SUPTITLE)
plt.title('Unipolarity Histogram')
plt.ylabel('Number of CH Candidates')
plt.xlabel('Unipolarity')

for cl, color in zip(cl_list, color_list):
    if unipolarity_confidence:
        label = f'{cl/100} Unipolarity'
    else:
        label = f'{cl}th % Smoothness'
    
    plt.hist(
        outcome_dict[cl], bins, color=color, label=label
    )

plt.legend()
plt.xlim([0,1])

## Time Series Outcomes

v0.5.1-1.0

In [None]:
confidence_level_list = [50, 75, 95]
# confidence_level_list = [1, 50, 75, 95]
# confidence_level_list = [0, 35, 65, 95]
outcome_time_series_dict = detect.get_outcome_time_series_dict_v0_5_1(
    HE_DATE_LIST, confidence_level_list, DETECTION_MAP_SAVE_DIR
)
outcome_time_series_dict['area_percent'][50].median()

In [None]:
overwrite = True
region_num_settings = (
    DETECTION_IMAGE_DIR + 'Region_Number/',
    'num_ch', 'viridis', 'Detected CH Number'
)
px_percent_settings = (
    DETECTION_IMAGE_DIR + 'EUV_Px_Percentage/',
    'px_percent', 'plasma', 'Detected Pixel Percentage (%)'
)
area_percent_settings = (
    DETECTION_IMAGE_DIR + 'EUV_Area_Percentage/',
    'area_percent', 'plasma', 'Detected Area Percentage (%)'
)
area_settings = (
    DETECTION_IMAGE_DIR + 'EUV_Area/',
    'area', 'plasma', 'Detected Area (Mm^2)'
)
out_dir, outcome_key, cmap, ylabel = area_percent_settings

In [None]:
# Option to display Heliographic reprojected maps. Leave as False
hg_reproject = False


if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

for euv_date_str in EUV_DATE_LIST[:1]:
    
    # Optionally overwrite existing files
    comparison_img_file = f'{out_dir}EUV{euv_date_str}.jpg'
    if os.path.isfile(comparison_img_file) and not overwrite:
        print((f'EUV {euv_date_str} comparison already exists.'))
        continue
    
    he_date_str = prepare_data.get_latest_date_str(
        HE_DATE_LIST, selected_date_str=euv_date_str
    )
    mag_date_str = prepare_data.get_nearest_date_str(
        MAG_DATE_LIST, selected_date_str=he_date_str
    )
    
    if hg_reproject:
        fig = plt.figure(figsize=(22, 10))
    else:
        fig = plt.figure(figsize=(18, 10))
    
    plot_detection.plot_he_neutral_lines_euv_v0_5_1(
        fig, he_date_str, mag_date_str, euv_date_str,
        nrows=2, hg_reproject=False
    )
    
    if hg_reproject:
        ax = fig.add_subplot(2, 7, (8, 14))
    else:
        ax = fig.add_subplot(2, 3, (4, 6))
    
    plot_detection.plot_outcome_df_vs_time(
        ax, outcome_time_series_dict[outcome_key], he_date_str, cmap,
        ylabel, #ylim=[0,3.75]
    )
    
    # # Save plot
    # plt.savefig(comparison_img_file)
    # plt.close(fig)
    # print(f'{euv_date_str} map comparison saved.')

v0.2-v0.5

In [None]:
confidence_level_list = [1, 50, 75, 95]
# confidence_level_list = [0, 35, 65, 95]
outcome_time_series_dict = detect.get_outcome_time_series_dict(
    HE_DATE_LIST, confidence_level_list, DETECTION_SAVE_DIR
)
outcome_time_series_dict['area_percent'][50].median()

In [None]:
overwrite = True
region_num_settings = (
    DETECTION_IMAGE_DIR + 'Region_Number/',
    'num_ch', 'viridis', 'Detected CH Number'
)
px_percent_settings = (
    DETECTION_IMAGE_DIR + 'EUV_Px_Percentage/',
    'px_percent', 'plasma', 'Detected Pixel Percentage (%)'
)
area_percent_settings = (
    DETECTION_IMAGE_DIR + 'EUV_Area_Percentage/',
    'area_percent', 'plasma', 'Detected Area Percentage (%)'
)
area_settings = (
    DETECTION_IMAGE_DIR + 'EUV_Area/',
    'area', 'plasma', 'Detected Area (Mm^2)'
)
out_dir, outcome_key, cmap, ylabel = area_percent_settings

In [None]:
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

for euv_date_str in EUV_DATE_LIST:
    
    # Optionally overwrite existing files
    comparison_img_file = f'{out_dir}EUV{euv_date_str}.jpg'
    if os.path.isfile(comparison_img_file) and not overwrite:
        print((f'EUV {euv_date_str} comparison already exists.'))
        continue
    
    he_date_str = prepare_data.get_latest_date_str(
        HE_DATE_LIST, selected_date_str=euv_date_str
    )
    mag_date_str = prepare_data.get_nearest_date_str(
        MAG_DATE_LIST, selected_date_str=he_date_str
    )
    
    # Extract He I observation
    he_map = prepare_data.get_nso_sunpy_map(HE_DIR + he_date_str + '.fts')
    if not he_map:
        print(f'{he_date_str} He I observation extraction failed.')
        continue
    
    # Extract saved ensemble map array and convert to Sunpy map
    ensemble_file = f'{DETECTION_SAVE_DIR}{he_date_str}_ensemble_map.npy'
    ensemble_map_data = np.load(ensemble_file, allow_pickle=True)[-1]
    ensemble_map = sunpy.map.Map(np.flipud(ensemble_map_data), he_map.meta)
    ensemble_map.plot_settings['cmap'] = colormaps['magma']
    
    fig = plt.figure(figsize=(18, 10))
    plot_detection.plot_he_neutral_lines_euv_comparison(
        fig, he_date_str, mag_date_str, euv_date_str,
        ROTATED_MAG_SAVE_DIR, nrows=2
    )
    
    ax = fig.add_subplot(2, 3, (4, 6))
    plot_detection.plot_outcome_df_vs_time(
        ax, outcome_time_series_dict[outcome_key], he_date_str, cmap, ylabel
    )
    
    # Save plot
    plt.savefig(comparison_img_file)
    plt.close(fig)
    print(f'{euv_date_str} map comparison saved.')

v0.1

In [None]:
outcome_time_series_dict = detect.get_outcome_time_series_dict_v0_1(
    HE_DATE_LIST, DETECTION_SAVE_DIR
)

In [None]:
overwrite = True
region_num_settings = (
    DETECTION_IMAGE_DIR + 'Region_Number/',
    'num_ch', 'viridis', 'Detected CH Number'
)
px_percent_settings = (
    DETECTION_IMAGE_DIR + 'EUV_Px_Percentage/',
    'px_percent', 'plasma', 'Detected Pixel Percentage (%)'
)
area_percent_settings = (
    DETECTION_IMAGE_DIR + 'EUV_Area_Percentage/',
    'area_percent', 'plasma', 'Detected Area Percentage (%)'
)
area_settings = (
    DETECTION_IMAGE_DIR + 'EUV_Area/',
    'area', 'plasma', 'Detected Area (Mm^2)'
)
out_dir, outcome_key, cmap, ylabel = area_percent_settings

In [None]:
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

for euv_date_str in EUV_DATE_LIST[:1]:
    
    # Optionally overwrite existing files
    comparison_img_file = f'{out_dir}EUV{euv_date_str}.jpg'
    if os.path.isfile(comparison_img_file) and not overwrite:
        print((f'EUV {euv_date_str} comparison already exists.'))
        continue

    he_date_str = prepare_data.get_latest_date_str(
        HE_DATE_LIST, selected_date_str=euv_date_str
    )
    
    # Extract He I observation
    he_map = prepare_data.get_nso_sunpy_map(HE_DIR + he_date_str + '.fts')
    if not he_map:
        print(f'{he_date_str} He I observation extraction failed.')
        continue
    
    # Extract He I observation for mask base and convert to Sunpy map
    he_fits_file = DATA_FITS_FORMAT.format(
        data_dir=HE_DIR, date_str=he_date_str
    )
    raw_he = prepare_data.get_image_from_fits(he_fits_file)
    he_base_data = np.where(raw_he == raw_he[0,0], np.NaN, raw_he)
    he_base_map = sunpy.map.Map(np.flipud(he_base_data), he_map.meta)
    
    # Extract saved single mask array and convert to Sunpy map
    mask_file = f'{DETECTION_SAVE_DIR}{he_date_str}_ensemble_map.npy'
    mask_data = np.load(mask_file, allow_pickle=True)[-1]
    mask_map = sunpy.map.Map(np.flipud(mask_data), he_map.meta)
    mask_map.plot_settings['cmap'] = colormaps['gray']
    
    euv_map = sunpy.map.Map(EUV_DIR + euv_date_str + '.fts')
    
    fig = plt.figure(figsize=(18, 10))
    
    plot_detection.plot_he_map(fig, (2, 3, 1), he_map, he_date_str)
    
    # Plot He I observation with overlayed detection contours
    ax = fig.add_subplot(232, projection=he_map)
    he_base_map.plot(axes=ax, vmin=-100, vmax=100, title=he_date_str,
                     cmap='afmhot')
    for contour in mask_map.contour(0):
        ax.plot_coord(contour, color='black', linewidth=1)
    
    plot_detection.plot_euv_map(fig, (2, 3, 3), euv_map, euv_date_str)
    
    ax = fig.add_subplot(2, 3, (4, 6))
    plot_detection.plot_outcome_series_vs_time(
        ax, outcome_time_series_dict[outcome_key], he_date_str, cmap,
        ylabel, ylim=[0,3.75]
    )
    
    # Save plot
    plt.savefig(comparison_img_file)
    plt.close(fig)
    print(f'{euv_date_str} map comparison saved.')

### Exploration of Time Series Plots

Extract Outcomes

In [None]:
confidence_level_list = [1, 50, 75, 95]
num_ch_df, area_percent_df, px_percent_df = detect.get_outcome_time_series_dfs(
    HE_DATE_LIST[:5], confidence_level_list, DETECTION_NPY_SAVE_DIR
)

In [None]:
outcome_df = px_percent_df

fig = plt.figure(figsize=(18, 5))
ax = fig.add_subplot(111)
plot_detection.plot_outcome_df_vs_time(
    ax, outcome_df, he_date_str, cmap='plasma', ylabel='Detected Pixel Percentage (%)'
)

In [None]:
outcome_df = area_percent_df

fig = plt.figure(figsize=(18, 5))
ax = fig.add_subplot(111)
plot_detection.plot_outcome_df_vs_time(
    ax, outcome_df, he_date_str, cmap='bone', ylabel='Detected Area Percentage (%)'
)

#### Pre-Process

Histogram Moments vs Time

In [None]:
hist_stat_list = []

for he_date_str in HE_DATE_LIST:
    pre_process_file = (PREPROCESS_NPY_SAVE_DIR + he_date_str
                        + '_pre_processed_map.npy')
    pre_processed_map_data = np.load(pre_process_file, allow_pickle=True)[-1]
    
    peak_counts_val = detect.get_peak_counts_loc(
        pre_processed_map_data, bins_as_percent=False
    )
    hist_stat_list.append(
        [peak_counts_val, np.nanstd(pre_processed_map_data)]
    )

# Convert to dataframes
datetime_list = [datetime.strptime(he_date_str, DICT_DATE_STR_FORMAT)
                 for he_date_str in HE_DATE_LIST]
hist_df = pd.DataFrame(
    hist_stat_list, columns=['Peak', 'StDev'],
    index=datetime_list
)

In [None]:
overwrite = True
out_dir = DETECTION_IMAGE_DIR + 'Histogram_Moments/'
cmap = 'plasma'
ylabel = 'Histogram Moments'


if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

for he_date_str in HE_DATE_LIST:
    
    # Optionally overwrite existing files
    img_file = f'{out_dir}{he_date_str}.jpg'
    if os.path.isfile(img_file) and not overwrite:
        print((f'He {he_date_str} map already exists.'))
        continue
    
    pre_process_file = (PREPROCESS_NPY_SAVE_DIR + he_date_str
                        + '_pre_processed_map.npy')
    pre_processed_map = np.load(pre_process_file, allow_pickle=True)[-1]
    
    hist, edges = detect.get_hist(pre_processed_map,
                                         bins_as_percent=False)
    
    ensemble_file = f'{DETECTION_NPY_SAVE_DIR}{he_date_str}_ensemble_map.npy'
    ensemble_map = np.load(ensemble_file, allow_pickle=True)[-1]
    
    fig = plt.figure(figsize=(18, 10))
    
    ax = fig.add_subplot(231)
    ax.set_title(he_date_str)
    ax.imshow(pre_processed_map, cmap=plt.cm.gray)
    
    ax = fig.add_subplot(232)
    ax.set_title('Semilog Histogram')
    ax.semilogy(edges[1:], hist)
    if 'Rescale' in DETECTION_VERSION_DIR:
        ax.set_xlim([-1.3, 1.1])
        ax.set_ylim([1E2, 5E4])
    else:
        ax.set_xlim([-110, 110])
        ax.set_ylim([1E1, 5E4])
    
    ax = fig.add_subplot(233)
    ax.imshow(ensemble_map, cmap=plt.cm.magma)
    
    ax = fig.add_subplot(2, 3, (4, 6))
    datetimes = hist_df.index
    ax.plot(hist_df['StDev'], label='Standard Deviation', linewidth=3)
    ax.plot(hist_df['Peak'], label='Mode', linewidth=3)
    
    # Vertical line for datetime indicator
    vline_datetime = datetime.strptime(he_date_str, DICT_DATE_STR_FORMAT)
    min_moment = min(hist_df.min())
    max_moment = max(hist_df.max())
    ax.vlines(x=[vline_datetime, vline_datetime], ymax=2*max_moment, ymin=0,
              colors='k', linestyles='dashed')
    
    for tick in ax.get_xticklabels():
        tick.set_rotation(45)
    ax.set_ylabel(ylabel)
    
    ax.set_xlim([datetimes[0], datetimes[-1]])
    if 'Rescale' in DETECTION_VERSION_DIR:
        ax.set_ylim([0.4, 0.8])
    else:
        ax.set_ylim([0.9*min_moment, 1.1*max_moment])
    
    ax.legend(reverse=True)

    plt.savefig(img_file)
    plt.close(fig)
    print(f'{he_date_str} map saved.')

Pre-Process Outcomes vs Time

In [None]:
percent_of_peak_list = [80, 90, 100, 110]
num_ch_df, area_percent_df, area_df, px_percent_df = detect.get_thresh_outcome_time_series_dfs(
    HE_DATE_LIST, percent_of_peak_list, HE_DIR, PREPROCESS_NPY_SAVE_DIR
)

In [None]:
overwrite = True
out_dir = DETECTION_IMAGE_DIR + 'Thresh_Area_Percentage/'
outcome_df = area_percent_df
cmap = 'plasma'
ylabel = 'Detected Area Percentage (%)'


if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

for he_date_str in HE_DATE_LIST[24:25]:
    
    # Optionally overwrite existing files
    img_file = f'{out_dir}{he_date_str}.jpg'
    if os.path.isfile(img_file) and not overwrite:
        print((f'He {he_date_str} map already exists.'))
        continue
    
    pre_process_file = (PREPROCESS_NPY_SAVE_DIR + he_date_str
                        + '_pre_processed_map.npy')
    pre_processed_map = np.load(pre_process_file, allow_pickle=True)[-1]
    
    hist, edges = detect.get_hist(pre_processed_map,
                                         bins_as_percent=False)
    
    ensemble_file = f'{DETECTION_NPY_SAVE_DIR}{he_date_str}_ensemble_map.npy'
    ensemble_map = np.load(ensemble_file, allow_pickle=True)[-1]
    
    fig = plt.figure(figsize=(18, 10))
    
    ax = fig.add_subplot(231)
    ax.set_title(he_date_str)
    ax.imshow(pre_processed_map, cmap=plt.cm.gray)
    
    ax = fig.add_subplot(232)
    ax.set_title('Semilog Histogram')
    ax.semilogy(edges[1:], hist)
    if 'Rescale/' in DETECTION_VERSION_DIR:
        ax.set_ylim([1E2, 5E4])
    else:
        ax.set_xlim([-110, 110])
        ax.set_ylim([1E1, 5E4])
    
    ax = fig.add_subplot(233)
    ax.imshow(ensemble_map, cmap=plt.cm.magma)
    
    ax = fig.add_subplot(2, 3, (4, 6))    
    plot_detection.plot_thresh_outcome_vs_time(
        ax, outcome_df, he_date_str, cmap, ylabel)

    plt.savefig(img_file)
    plt.close(fig)
    print(f'{he_date_str} map saved.')

# Version Comparison

Compare outcomes between versions

#### Preparation

Compare outcomes between confidence levels and/or methods

In [None]:
out_dir = DETECT_DIR + '_Outcome_Comparison/' + DATE_DIR
if not os.path.isdir(out_dir):
    os.makedirs(out_dir)

# confidence_level_list = [0, 35, 65, 95]
confidence_level_list = [0, 50, 80]
# confidence_level_list = list(range(0,96,5))

# version_dirs = ['v0_3/', 'Band_Pass/', 'Rescale/', 'Rescale_Center/']
# version_dirs = ['v0_3/', 'Rescale/']
# version_dirs = ['v0_3/', 'Rescale/', 'v0_4/']
# version_dirs = ['v0_3/', 'v0_4/']
# version_dirs = ['v0_4_Single/', 'v0_4/']
# version_dirs = ['v0_4_Unipolar']
# version_dirs = ['v0_1', 'v0_2', 'v0_3', 'v0_4', 'v0_5']
version_dirs = ['v0_1', 'v0_2', 'v0_4', 'v0_5']
descript_list = version_dirs + [f'cl{cl}' for cl in confidence_level_list]

Plot Formatting

In [None]:
# cl_dx_list = np.arange(-0.3,0.31,0.2)
# method_list = ['Bright & Coherent Mask', 'Ensemble', 'Smoothness',
#                'Consistency', 'Unipolarity']

cl_dx_list = np.arange(-0.2,0.21,0.2)
method_list = ['Single Preliminary Mask', 'Area Ensemble',
               'Smoothness Ensemble', 'Unipolarity Ensemble']

# cl_dx_list = np.arange(-0.9,0.91,0.2)
# method_list = ['Unipolarity']

# cl_dx_list = np.arange(0,1,0.05)
# method_list = ['Unipolarity']

# cl_dx_list = np.arange(-0.3,0.31,0.2)
# # method_list = ['v0.3', 'v0.3 Design + Band Pass', 'v0.3 Design + Rescale',
# #               'v0.3 Design + Rescale & Center']
# method_list = ['v0.1', 'v0.2', 'v0.3', 'v0.4']

# cl_dx_list = [-0.1, 0.1]
# # method_list = ['v0.3', 'v0.3 Design + Rescale']
# # method_list = ['v0.3', 'v0.4']
# method_list = ['v0.4 Single', 'v0.4 Ensemble']

# cl_dx_list = [-0.2, 0, 0.2]
# method_list = ['v0.3', 'v0.3 Design + Rescale', 'v0.4']

# cmap = colormaps['viridis']
cmap = colormaps['bone_r']
color_list = cmap(np.linspace(0.25, 0.9, len(confidence_level_list)))
# cmap = colormaps['plasma_r']
# color_list = cmap(np.linspace(0.25, 1, len(confidence_level_list)))

v0.2-v0.5 Compute Outcomes

In [None]:
area_percent_df_by_method_list = []
autocorr_by_conf_by_method_list = []
mad_by_conf_by_method_list = []
norm_mad_by_conf_by_method_list = []


for version_dir in version_dirs:
    detection_save_dir = os.path.join(DETECT_DIR, version_dir, 'Saved_npy_Files/')
    
    outcome_time_series_dict = detect.get_outcome_time_series_dict(
        HE_DATE_LIST, confidence_level_list, detection_save_dir
    )
    area_percent_df_by_method_list.append(
        outcome_time_series_dict['area_percent']
    )
    
    autocorr_by_confidences = [
        outcome_time_series_dict['area'][cl].autocorr()
        for cl in confidence_level_list
    ]
    autocorr_by_conf_by_method_list.append(autocorr_by_confidences)
    out = detect.get_mad_by_confidences(
        outcome_time_series_dict['area'], confidence_level_list
    )
    mad_by_confidences, norm_mad_by_confidences = out
    mad_by_conf_by_method_list.append(mad_by_confidences)
    norm_mad_by_conf_by_method_list.append(norm_mad_by_confidences)
    print(f'Outcomes computed for {version_dir}')

descript_list = version_dirs + [f'cl{cl}' for cl in confidence_level_list]
autocorr_file = f'{out_dir}Autocorr_comp_{"_".join(descript_list)}.npy'
np.save(autocorr_file, np.array(autocorr_by_conf_by_method_list),
        allow_pickle=True)

v0.5.1 Compute Outcomes

In [None]:
area_percent_df_by_method_list = []
autocorr_by_conf_by_method_list = []


for version_dir in version_dirs:
    if 'v0_5_1' in version_dir:
        detection_save_dir = os.path.join(
            DETECT_DIR, version_dir, 'Saved_fits_Files/'
        )
        outcome_time_series_dict = detect.get_outcome_time_series_dict_v0_5_1(
            HE_DATE_LIST, confidence_level_list, detection_save_dir
        )
    else:
        detection_save_dir = os.path.join(
            DETECT_DIR, version_dir, 'Saved_npy_Files/'
        )
        outcome_time_series_dict = detect.get_outcome_time_series_dict(
            HE_DATE_LIST, confidence_level_list, detection_save_dir
        )
    
    area_percent_df_by_method_list.append(
        outcome_time_series_dict['area_percent']
    )
    
    autocorr_by_confidences = [
        outcome_time_series_dict['area'][cl].autocorr()
        for cl in confidence_level_list
    ]
    autocorr_by_conf_by_method_list.append(autocorr_by_confidences)
    print(f'Outcomes computed for {version_dir}')

descript_list = version_dirs + [f'cl{cl}' for cl in confidence_level_list]
autocorr_file = f'{out_dir}Autocorr_comp_{"_".join(descript_list)}.npy'
np.save(autocorr_file, np.array(autocorr_by_conf_by_method_list),
        allow_pickle=True)

#### Design Variable Sweep

Area Sweep

In [None]:
confidence_level = 0
area_percent_df = area_percent_df_by_method_list[confidence_level]
median_area_percent_by_cl = [
    np.median(area_percent_df[cl]) for cl in confidence_level_list
]

x_ticks = np.arange(len(method_list))
plt.figure(1, figsize=(9,6))

# Loop over confidence levels to plot bars for all methods at once
for median_area_percent, cl_dx, color in zip(
    median_area_percent_by_cl, cl_dx_list, color_list):
    plt.bar(x_ticks + cl_dx, median_area_percent, width=0.05, color=color)

plt.suptitle(DATE_RANGE_SUPTITLE)
plt.title('Design Variable Sweep')
plt.ylabel('Median Detected Area Percentage (%)')
plt.xlabel('Unipolarity Threshold')
plt.xlim([-0.025,1.025])

Autocorrelation Sweep

In [None]:
autocorr_file_name = f'{out_dir}Autocorr_comp_{"_".join(descript_list)}'
autocorrs_by_cl_by_method = np.load(autocorr_file_name + '.npy', allow_pickle=True)
autocorrs_by_method_by_cl = autocorrs_by_cl_by_method.T

x_ticks = np.arange(len(method_list))

plt.figure(1, figsize=(9,6))

# Loop over confidence levels to plot bars for all methods at once
for autocorrs_by_method, cl_dx, color in zip(
    autocorrs_by_method_by_cl, cl_dx_list, color_list):
    plt.bar(x_ticks + cl_dx, autocorrs_by_method, width=0.05,
            color=color)

plt.suptitle(DATE_RANGE_SUPTITLE)
plt.title('Design Variable Sweep')
plt.ylabel(f'Autocorrelation')
plt.xlabel('Unipolarity Threshold')
plt.xlim([-0.025,1.025])

#### Method Comparison

Autocorrelation by Method

In [None]:
autocorr_file_name = f'{out_dir}Autocorr_comp_{"_".join(descript_list)}'
autocorrs_by_cl_by_method = np.load(autocorr_file_name + '.npy', allow_pickle=True)
autocorrs_by_method_by_cl = autocorrs_by_cl_by_method.T

x_ticks = np.arange(len(method_list))
confidence_label_list = [
    f'{confidence_level}% Confidence'
    for confidence_level in confidence_level_list
]

plt.figure(1, figsize=(10,6))

# Loop over confidence levels to plot bars for all methods at once
for autocorrs_by_method, cl_dx, confidence, color in zip(
    autocorrs_by_method_by_cl, cl_dx_list, confidence_label_list, color_list):
    plt.bar(x_ticks + cl_dx, autocorrs_by_method, width=0.2,
            label=confidence, color=color)

plt.suptitle(DATE_RANGE_SUPTITLE)
plt.title('Method Comparison')
plt.xticks(x_ticks, method_list)
plt.ylabel(f'Time Series Autocorrelation')

# plt.ylim([0, 0.8])
# plt.ylim([-0.1, 0.8])
# plt.axhline(0, color='k', linestyle='--')
plt.legend(loc='upper left')

# Save plot
plt.savefig(autocorr_file_name + '.png')
plt.close()
print(f'{autocorr_file_name.split("/")[-1]} method comparison saved.')

In [None]:
autocorrs_by_method_by_cl

# CH Labels

Saved in: output/Labels/

In [None]:
import pickle
import plotly.express as px
import plotly.graph_objects as go
from sklearn.discriminant_analysis import (
    LinearDiscriminantAnalysis,
    QuadraticDiscriminantAnalysis,
)
from sklearn import svm
from sklearn.metrics import (
    RocCurveDisplay, make_scorer, recall_score, confusion_matrix
)
from sklearn.inspection import DecisionBoundaryDisplay


LDA_FILE_NAME = 'v1_1_LDA_model.pkl'
DISK_VAL = -0.5
UNLABELED_VAL = 1.5
CH_LABEL_DIR = OUTPUT_DIR + 'Labels/'
CH_LABEL_FILE_FORMAT = CH_LABEL_DIR + '{he_date_str}.dat'
LABEL_FILE_GLOB_PATTERN = CH_LABEL_DIR + '*.dat'
LABEL_CMAP = colors.ListedColormap(
    ['black', 'indianred', 'gold', 'forestgreen', 'gray']
)

cmap_object = colormaps['tab20']
black_start_cmap = np.vstack(([0,0,0,1], cmap_object(np.arange(0,20,1))))
NUMBERED_CMAP = colors.ListedColormap(black_start_cmap)

# ----------------------------------------------------------------
LABEL_OUTCOME_FILE = (
    CH_LABEL_DIR + f'{datetime.today().strftime(DICT_DATE_STR_FORMAT)}_outcomes.csv'
)
LABEL_ID_LIST = [0,0.5,1]
NUM_MASKS = 2

# ----------------------------------------------------------------
LABEL_NAME_LIST = ['False', 'Marginal', 'True']
LABEL_COLOR_LIST = ['indianred', 'gold', 'forestgreen']
MARKER_SIZE_RANGE = [10,20]

CUSTOMDATA_COLS = [
    'he_date_str', 'mask_idx',
    'grad_median', 'unipolarity', 'cm_foreshort',
    'cm_lat', 'cm_lon', 'all_cand_idx', 'area'
]
HOVER_TEMPLATE = (
    '%{customdata[0]} | Mask %{customdata[1]}<br>' +
    'GM: %{customdata[2]:.3f} | U: %{customdata[3]:.2f} | ' +
    'F: %{customdata[4]:.2f}<br>' +
    'Lat: %{customdata[5]:.1f} deg | ' +
    'Lon: %{customdata[6]:.1f} deg<br>' +
    'Idx: %{customdata[7]}'
    + '<br>Area: %{customdata[8]:.1e} Mm^2<br>'
)

def get_numbered_mask_on_disk(pre_processed_map_data, ch_mask):
    """Retrieve 2D array with numbered candidate regions and a removed
    background.
    """
    # Array with number labels per distinct CH
    numbered_mask, _ = ndimage.label(ch_mask)
    
    # Remove background to display CHs on disk
    numbered_mask_on_disk = np.where(
        np.isnan(pre_processed_map_data), np.nan, numbered_mask
    )
    return numbered_mask_on_disk


def get_labeled_mask_on_disk(pre_processed_map_data, ch_mask,
                             ch_label_list, mask_idx, allow_unlabeled):
    """Retrieve 2D array with candidate regions labeled 0, 0.5, or 1
    and a removed background.
    """
    # Array with number labels per distinct CH and number of labels
    numbered_mask, num_candidates = ndimage.label(ch_mask)
    
    # Remove background to display CHs on disk
    numbered_mask_on_disk = np.where(
        np.isnan(pre_processed_map_data), np.nan, numbered_mask
    )
    
    if allow_unlabeled:
        # Extend label list for unlabeled CHs
        ch_label_list = (
            ch_label_list[:num_candidates]
            + [UNLABELED_VAL]*(num_candidates - len(ch_label_list))
        )
    else:
        num_labeled = len([ch_label for ch_label in ch_label_list
                           if ch_label != 2])
        assert num_labeled >= num_candidates, \
            f'Candidates in mask {mask_idx} have been left unlabeled. ' \
            f'There are {num_candidates} candidates.'
    
    # Initialize and iterate to assign labels to CHs
    labeled_mask_on_disk = np.where(
        numbered_mask_on_disk == 0, DISK_VAL, numbered_mask_on_disk
    )
    for ch_idx in range(num_candidates):
        labeled_mask_on_disk = np.where(
            labeled_mask_on_disk == ch_idx + 1,
            ch_label_list[ch_idx], labeled_mask_on_disk
        )
    return labeled_mask_on_disk


def plot_labeled_chs(he_date_str, pre_processed_map_data, smooth_mag_map_data,
                     ch_label_dict, ch_mask_list, numbered_mode):
    """Retrieve numbered (1-num_candidates) or labeled (True, Marginal, False)
    candidate regions from CH masks, then plot them in a row.
    """
    percent_of_peak_list = ch_label_dict['percent_of_peak_list']
    morph_radius_list = ch_label_dict['morph_radius_list']
    
    image_size = 5
    num_cols = len(ch_mask_list)
    fig = plt.figure(figsize=((image_size+1.5)*num_cols, image_size))
    fig.suptitle(he_date_str, fontsize=16)

    for mask_idx, ch_mask in enumerate(ch_mask_list):

        ax = fig.add_subplot(1, num_cols, mask_idx + 1)
        ax.contour(smooth_mag_map_data, levels=0, colors='y')
        
        if numbered_mode:
            numbered_mask_on_disk = get_numbered_mask_on_disk(
                pre_processed_map_data, ch_mask
            )
            im = ax.imshow(numbered_mask_on_disk, NUMBERED_CMAP,
                           interpolation='nearest', vmin=-0.5, vmax=20.5)
            fig.colorbar(im, ticks=np.arange(0,20.5,4))
        else:
            ch_label_list = ch_label_dict['label_list_dict'][f'mask_{mask_idx}']
            labeled_mask_on_disk = get_labeled_mask_on_disk(
                pre_processed_map_data, ch_mask, ch_label_list,
                mask_idx, allow_unlabeled=True
            )
            
            im = ax.imshow(
                labeled_mask_on_disk, LABEL_CMAP, interpolation='nearest',
                vmin=DISK_VAL - 0.25, vmax=UNLABELED_VAL + 0.25
            )
            cb = fig.colorbar(
                im, fraction=0.05, ticks=np.linspace(DISK_VAL, UNLABELED_VAL, 5)
            )
            cb.ax.set_yticklabels(
                ['Disk', '0: False', '0.5: Marginal', '1: True', 'Unlabeled']
            )
        
        ax.axes.xaxis.set_ticks([])
        ax.axes.yaxis.set_ticks([])
        ax.set_title(
            (f'{percent_of_peak_list[mask_idx]:d}% of Mode Threshold | '
             f'{morph_radius_list[mask_idx]:d}Mm SE Disk Radius')
        )


def save_ch_label_dict(pre_processed_map_data, he_date_str, ch_mask_list,
                       ch_label_dict):
    """Save a dictionary with information on candidate region labels from 
    m number of masks to file.
    The dictionary contains 4 key, value pairs:
        'percent_of_peak_list': list of length m with mask percent_of_peak
            design variable values
        'morph_radius_list': list of length m with mask morph_radius
            design variable values
        'label_list_dict': dict with m key, value pairs:
            'mask_{i}': list of 0 (False), 0.5 (Marginal), or 1 (True)
        'labeled_data_dict': dict with m key, value pairs:
            'mask_{i}': 2D array with 0, 0.5, or 1 label values in candidate
            regions, -0.5 on disk background, and NaN off-disk.
    """
    # Array with number labels per distinct CH and number of labels
    for mask_idx, ch_mask in enumerate(ch_mask_list):
        ch_label_list = ch_label_dict['label_list_dict'][f'mask_{mask_idx}']
        labeled_mask_on_disk = get_labeled_mask_on_disk(
            pre_processed_map_data, ch_mask, ch_label_list, mask_idx,
            allow_unlabeled=False
        )
        ch_label_dict['labeled_data_dict'][f'mask_{mask_idx}'] = labeled_mask_on_disk
        
    # Save dictionary
    if not os.path.isdir(CH_LABEL_DIR):
        os.makedirs(CH_LABEL_DIR)
    
    ch_label_file_name = CH_LABEL_FILE_FORMAT.format(
        he_date_str=he_date_str
    )
    with open(ch_label_file_name, 'wb') as ch_label_file:
        pickle.dump(
            ch_label_dict, ch_label_file, protocol=pickle.HIGHEST_PROTOCOL
        )
    
    print(f'{he_date_str} Label Dictionary Saved')
        
    return ch_label_dict


def plot_3d_feature_space(labeled_cand_df, camera_eye, gray_out_inspected=False):
    """Display candidate regions in a 3D feature space, sized by area and colored
    by label or, optionally, greyed if it has been marked as inspected.
    """
    fig = go.Figure()

    # Plot candidate region points ----------------------------------------------
    for label_id, label, color in zip(
        LABEL_ID_LIST, LABEL_NAME_LIST, LABEL_COLOR_LIST):
        
        label_id_df = labeled_cand_df[labeled_cand_df['label_id'] == label_id]
        label_id_df = label_id_df.reset_index(drop=True)
        
        if gray_out_inspected:
            marker_colors = ['gray' if label_id_df['inspected?'][i] else color
                            for i in range(len(label_id_df))]
        else:
            marker_colors = color
        
        fig.add_trace(
            go.Scatter3d(
                x=label_id_df['smooth_quantile'],
                # x=label_id_df['grad_median'],
                y=label_id_df['unipolarity'],
                z=label_id_df['cm_foreshort'], name=label,
                mode='markers', marker_color=marker_colors,
                marker_size=np.interp(
                    label_id_df['area'],
                    [np.min(label_id_df['area']), np.max(label_id_df['area'])],
                    MARKER_SIZE_RANGE
                ),
                customdata=label_id_df[CUSTOMDATA_COLS],
                hovertemplate=HOVER_TEMPLATE
            )
        )

    # Update layout -------------------------------------------------------------
    min_grad_median = np.min(labeled_cand_df['grad_median'])
    max_grad_median = np.max(labeled_cand_df['grad_median'])
    
    # Plot ideal T/F lines
    fig.add_trace(
        go.Scatter3d(
            x=[1, 1],
            # x=[min_grad_median, min_grad_median],
            y=[1,1], z=[-1,2],
            mode='lines', name='Smooth, Unipolar',
            line=dict(width=10, color='lightseagreen')
        )
    )
    fig.add_trace(
        go.Scatter3d(
            x=[0, 0],
            # x=[max_grad_median, max_grad_median],
            y=[0,0], z=[-1,2],
            mode='lines', name='Unsmooth, Bipolar',
            line=dict(width=10, color='crimson')
        )
    )
    axis_range = [-0.1, 1.1]
    fig.update_layout(
        # width=600, height=500,
        width=800, height=700,
        margin=dict(l=20, r=0, b=20, t=20),
        scene=dict(
            xaxis=dict(title='Smoothness Quantile', range=axis_range),
            # xaxis_title='Gradient Median',
            yaxis=dict(title='Unipolarity', range=axis_range),
            zaxis=dict(title='Near Disk Center', range=axis_range),
            camera_eye=camera_eye, aspectmode='cube',
        )
    )
    return fig


def get_roc_rates(confidence_series, y, confidence_thresh):
    """Compute true and false positive rates with sklearn for an ROC curve
    """
    pred = np.where(np.array(confidence_series) > confidence_thresh, 1, 0)
    cm = confusion_matrix(y, pred, labels=[0, 1]
    )
    true_neg, false_pos, false_neg, true_pos = cm.ravel()
    true_pos_rate = true_pos/(true_pos + false_neg)
    false_pos_rate = false_pos/(false_pos + true_neg)
    
    return [true_pos_rate, false_pos_rate]

## Load Saved Outcomes

of Labeled Candidates Across All Dates

In [None]:
labeled_cand_df_file = CH_LABEL_DIR + '2024_06_06__20_29_outcomes.csv'
LDA_FILE_NAME = 'v1_1_LDA_model.pkl'

labeled_cand_df = pd.read_csv(labeled_cand_df_file)

# Track candidates that have been inspected
inspected_all_cand_idxs = [
    7, 11, 63, 66, 68, 70, 83, 101, 104, 120, 135, 142,
    151, 159, 189, 191, 192, 196, 211, 220, 244, 247, 261,
    258, 265, 274, 276, 277, 298, 309, 316, 323,
]

labeled_cand_df['inspected?'] = [False for _ in range(len(labeled_cand_df))]
labeled_cand_df.loc[inspected_all_cand_idxs,'inspected?'] = True

labeled_cand_df['smooth_quantile'] = (
    1 - stats.rankdata(labeled_cand_df['grad_median'])/len(labeled_cand_df)
)
classify_cand_df = labeled_cand_df[labeled_cand_df['label_id'] != 0.5]
num_cands = classify_cand_df.shape[0]

len(labeled_cand_df[labeled_cand_df['inspected?']])

Old Label Sets

In [None]:
labeled_cand_df = pd.read_csv(
    CH_LABEL_DIR + '2024_05_30__12_51_outcomes.csv'
)

# Track candidates that have been inspected
inspected_all_cand_idxs = [
    11, 63, 66, 68, 70,
    83, 101, 104, 120, 135, 142,
    192,
    196, 211, 220, 244, 247,
    261, 274, 276, 277,
    309, 323,
]

labeled_cand_df['inspected?'] = [False for _ in range(len(labeled_cand_df))]
labeled_cand_df.loc[inspected_all_cand_idxs,'inspected?'] = True

labeled_cand_df['smooth_quantile'] = (
    1 - stats.rankdata(labeled_cand_df['grad_median'])/len(labeled_cand_df)
)
classify_cand_df = labeled_cand_df[labeled_cand_df['label_id'] != 0.5]
num_cands = classify_cand_df.shape[0]

len(labeled_cand_df[labeled_cand_df['inspected?']])

In [None]:
labeled_cand_df = pd.read_csv(
    CH_LABEL_DIR + '2024_05_05__07_45_outcomes.csv'
)

# Track candidates that have been inspected
inspected_all_cand_idxs = [
    11, 54, 65, 67,
    81, 100, 103, 119, 140, 195, 213, 220, 243,
    260, 269, 275,
    308, 322
]

labeled_cand_df['inspected?'] = [False for _ in range(len(labeled_cand_df))]
labeled_cand_df.loc[inspected_all_cand_idxs, 'inspected?'] = True

## Assign Labels

1. Load pre-processed maps
2. Obtain preliminary segmentations

In [None]:
# 2012 period, aimed for every 15-20 days
# 0:  2012_04_01__17_03 18 days     15: 2012_04_19__17_56   17 days
# 30: 2012_05_06__18_33 16 days     45: 2012_05_22__18_04   19 days
# 60: 2012_06_10__17_44 16 days     75: 2012_06_26__16_51   11 days
# 80: 2012_07_07__17_24 23 days     90: 2012_07_30__20_19   8 days
# 95: 2012_08_07__21_40 17 days     100: 2012_08_24__22_54

# 2015 period, every 15-20 days
# 0: 2015_01_04__20_30  16 days     1: 2015_01_20__20_25    14 days
# 3: 2015_02_03__18_31  7 days      9: 2015_02_10__18_45    17 days
# 13: 2015_02_27__20_39 17 days     14: 2015_03_15__18_04   15 days
# 16: 2015_03_31__18_13 18 days     17: 2015_04_18__17_22   38 days
# 19: 2015_05_26__20_21 11 days     20: 2015_06_06__16_08

In [None]:
# he_date_str = HE_DATE_LIST[0]
next_he_date = '2012_08_08__00_00'
he_date_str = prepare_data.get_latest_date_str(
    HE_DATE_LIST, selected_date_str=next_he_date
)

mag_date_str = prepare_data.get_nearest_date_str(
    MAG_DATE_LIST, selected_date_str=he_date_str
)

# Aggressive/Conservative Masks from v1.0 SOLIS Design
percent_of_peak_list = [70, 90]
morph_radius_list = [   15, 13] # Mm

# # v1.0 KPVT Design
# percent_of_peak_list = [85, 85, 95, 105]
# morph_radius_list = [   15, 17, 13, 13] # Mm

# Extract FITS file pre-processed map
pre_process_fits_file = (PREPROCESS_MAP_SAVE_DIR + he_date_str
                        + '_pre_processed_map.fits')
pre_processed_map = sunpy.map.Map(pre_process_fits_file)
pre_processed_map_data = np.flipud(pre_processed_map.data)

# Extract saved processed magnetogram
reprojected_smooth_file = (f'{ROTATED_MAG_SAVE_DIR}Mag{mag_date_str}'
                           f'_He{he_date_str}_smooth.fits')
reprojected_smooth_map = sunpy.map.Map(reprojected_smooth_file)
smooth_mag_map_data = np.flipud(reprojected_smooth_map.data)

ch_mask_list = detect.get_ch_mask_list_v0_5_1(
    pre_processed_map, percent_of_peak_list, morph_radius_list
)

# See save_ch_label_dict docstring
ch_label_dict = {
    'percent_of_peak_list': percent_of_peak_list,
    'morph_radius_list': morph_radius_list,
    'label_list_dict': {
        'mask_0': None,
        'mask_1': None
    },
    'labeled_data_dict': {
        'mask_0': None,
        'mask_1': None
    }
}

1. Plot numbered CHs, specify labels, and plot labeled CHs to confirm correct labeling
   - Analyze Current Design Option: Label CHs from 2 of 4 masks
   - Characterize CHs in He I: Vary design variables to get rough boundaries for all present CHs?
2. Save ch_label_dict for a single date

In [None]:
# Option to plot CHs with discrete colors to indicate number.
#   False will plot CHs with False, Marginal, True, or Unlabeled colors
numbered_mode = True

ch_label_dict['label_list_dict']['mask_0'] = [
    2, 2, 2, 2, # blue, orange
    2, 2, 2, 2, # green, red
    2, 2, 2, 2, # purple, brown
    2, 2, 2, 2, # pink, grey
]
ch_label_dict['label_list_dict']['mask_1'] = [
    2, 2, 2, 2, # blue, orange
    2, 2, 2, 2, # green, red
    2, 2, 2, 2, # purple, brown
    2, 2, 2, 2, # pink, grey
]
# numbered_mode = False

plot_labeled_chs(
    he_date_str, pre_processed_map_data, smooth_mag_map_data,
    ch_label_dict, ch_mask_list, numbered_mode
)

In [None]:
# Clean slate
ch_label_dict['label_list_dict']['mask_0'] = [
    2, 2, 2, 2, # blue, orange
    2, 2, 2, 2, # green, red
    2, 2, 2, 2, # purple, brown
    2, 2, 2, 2, # pink, grey
]
ch_label_dict['label_list_dict']['mask_1'] = [
    2, 2, 2, 2, # blue, orange
    2, 2, 2, 2, # green, red
    2, 2, 2, 2, # purple, brown
    2, 2, 2, 2, # pink, grey
]
# numbered_mode = False

In [None]:
ch_label_dict = save_ch_label_dict(
    pre_processed_map_data, he_date_str, ch_mask_list, ch_label_dict
)

## Compute All Date Outcomes

Requires all dates to be selected in settings.py

TODO: Switch inspected cand idxs to persistent identifier if new candidates will be added

In [None]:
# Retrieve dates for which candidate regions have been labeled
label_file_list = glob.glob(LABEL_FILE_GLOB_PATTERN)
LABEL_DATE_LIST = [label_file.split('/')[-1].split('.')[0]
                            for label_file in label_file_list]
LABEL_DATE_LIST.sort()

len(LABEL_DATE_LIST)

In [None]:
# Initialize dictionaries of candidate outcomes and metadata
outcomes_by_cand_dict = {}
for outcome_key in detect.OUTCOME_KEY_LIST:
    outcomes_by_cand_dict[outcome_key] = []

meta_by_cand_dict = {
    'he_date_str': [],
    'mask_idx': [],
    'label_id': []
}

print(f'Computed outcomes for candidate regions on:')

for he_date_str in LABEL_DATE_LIST:
    # Extract saved data for computing outcomes ------------------------------
    # Extract He I map data
    he_fits_file = DATA_FITS_FORMAT.format(
        data_dir=HE_DIR, date_str=he_date_str
    )
    he_map = prepare_data.get_nso_sunpy_map(he_fits_file)
    he_map_data = np.flipud(he_map.data)

    # Extract differentially rotated magnetogram map
    mag_date_str = prepare_data.get_nearest_date_str(
        MAG_DATE_LIST, selected_date_str=he_date_str
    )
    reprojected_fits_file = (f'{ROTATED_MAG_SAVE_DIR}'
                            f'Mag{mag_date_str}_He{he_date_str}.fits')
    reprojected_mag_map = sunpy.map.Map(reprojected_fits_file)
    
    # Extract label dictionary from file
    ch_label_file_name = CH_LABEL_FILE_FORMAT.format(
        he_date_str=he_date_str
    )
    with open(ch_label_file_name, 'rb') as ch_label_file:
        ch_label_dict = pickle.load(ch_label_file)
    labeled_data_dict = ch_label_dict['labeled_data_dict']
    
    # Compute candidate region outcomes for each mask and label ID ----------
    for mask_idx in range(NUM_MASKS):
        # Extract 2D array with labeled candidate regions
        labeled_mask_on_disk = labeled_data_dict[f'mask_{mask_idx}']
        
        for label_id in LABEL_ID_LIST:
            # 2D array with candidate regions of the specified label ID
            label_id_mask = np.where(labeled_mask_on_disk == label_id, 1, 0)

            label_id_map = sunpy.map.Map(
                np.flipud(label_id_mask), he_map.meta
            )
            mask_label_id_cand_outcomes_dict = detect.get_outcomes_by_ch(
                label_id_map, he_map_data, reprojected_mag_map,
                confidence_level=0
            )
            
            # Unsure how to get candidate number from ndimage.label(ch_mask)
            # in global mask for helping reference rainbow plots
            
            # Track metadata of candidates being iterated over
            num_candidates = len(
                mask_label_id_cand_outcomes_dict[detect.OUTCOME_KEY_LIST[0]]
            )
            meta_by_cand_dict['he_date_str'].extend(
                [he_date_str for _ in range(num_candidates)]
            )
            meta_by_cand_dict['mask_idx'].extend(
                [mask_idx for _ in range(num_candidates)]
            )
            meta_by_cand_dict['label_id'].extend(
                [label_id for _ in range(num_candidates)]
            )
            
            # Extend main dictionary with each outcome
            for outcome_key in detect.OUTCOME_KEY_LIST:
                outcomes_by_cand_dict[outcome_key].extend(
                    mask_label_id_cand_outcomes_dict[outcome_key]
                )
    
    print(he_date_str, end='\t')

labeled_cand_dict = outcomes_by_cand_dict.copy()
labeled_cand_dict.update(meta_by_cand_dict)
labeled_cand_df = pd.DataFrame(labeled_cand_dict)

labeled_cand_df['all_cand_idx'] = labeled_cand_df.index

In [None]:
labeled_cand_df

### Save Outcomes

In [None]:
labeled_cand_df.to_csv(LABEL_OUTCOME_FILE, encoding='utf-8', index=False)

## Analyze Saved Labels

### Single Date

In [None]:
# he_date_str = HE_DATE_LIST[19]
next_he_date = '2015_01_21__00_00'
he_date_str = prepare_data.get_latest_date_str(
    HE_DATE_LIST, selected_date_str=next_he_date
)

mag_date_str = prepare_data.get_nearest_date_str(
    MAG_DATE_LIST, selected_date_str=he_date_str
)

# Extract pre-processed He I map
pre_process_fits_file = (PREPROCESS_MAP_SAVE_DIR + he_date_str
                        + '_pre_processed_map.fits')
pre_processed_map = sunpy.map.Map(pre_process_fits_file)
pre_processed_map_data = np.flipud(pre_processed_map.data)

# Extract saved data for plotting labels -----------------------------

# Extract saved processed magnetogram
reprojected_smooth_file = (f'{ROTATED_MAG_SAVE_DIR}Mag{mag_date_str}'
                           f'_He{he_date_str}_smooth.fits')
reprojected_smooth_map = sunpy.map.Map(reprojected_smooth_file)
smooth_mag_map_data = np.flipud(reprojected_smooth_map.data)

# Extract label dictionary from file ---------------------------------
ch_label_file_name = CH_LABEL_FILE_FORMAT.format(
    detection_image_dir=DETECTION_IMAGE_DIR, he_date_str=he_date_str
)
with open(ch_label_file_name, 'rb') as ch_label_file:
    ch_label_dict = pickle.load(ch_label_file)
    
# Recreate masks which were labeled ----------------------------------
percent_of_peak_list =  ch_label_dict['percent_of_peak_list']
morph_radius_list =  ch_label_dict['morph_radius_list']
ch_mask_list = detect.get_ch_mask_list_v0_5_1(
    pre_processed_map, percent_of_peak_list, morph_radius_list
)

plot_labeled_chs(
    he_date_str, pre_processed_map_data, smooth_mag_map_data,
    ch_label_dict, ch_mask_list, numbered_mode=False
)

Downselect candidates from a single mask to those with a label among True/Marginal/False

In [None]:
label_id = 1
mask_idx = 0

# 2D array with candidate regions labeled 0, 0.5, or 1 and a removed background
labeled_mask_on_disk = ch_label_dict['labeled_data_dict'][f'mask_{mask_idx}']

# 2D array with candidate regions of the specified label ID
label_id_mask = np.where(labeled_mask_on_disk == label_id, 1, 0)

label_id_map = sunpy.map.Map(np.flipud(label_id_mask), pre_processed_map.meta)

fig = plt.figure(figsize=(4, 4))
ax = fig.add_subplot(111, projection=label_id_map)
label_id_map.plot(axes=ax)

Compute outcomes of down selected candidate regions

In [None]:
# sort_outcome = 'unipolarity'
sort_outcome = None

# Extract saved data for computing outcomes -----------------------------
# Extract He I map data
he_fits_file = DATA_FITS_FORMAT.format(
    data_dir=HE_DIR, date_str=he_date_str
)
he_map = prepare_data.get_nso_sunpy_map(he_fits_file)
he_map_data = np.flipud(he_map.data)

# Extract differentially rotated magnetogram map
reprojected_fits_file = (f'{ROTATED_MAG_SAVE_DIR}'
                         f'Mag{mag_date_str}_He{he_date_str}.fits')
reprojected_mag_map = sunpy.map.Map(reprojected_fits_file)

# Compute outcomes by CH and retrieve binary masks for distinct CHs -----
outcome_by_ch_dict = detect.get_outcomes_by_ch(
    label_id_map, he_map_data, reprojected_mag_map, confidence_level=0
)
individual_ch_list = detect.get_map_data_by_ch(
    label_id_map.data, label_id_map.data
)

# Optionally sort from greatest to least ---------------------------------
if sort_outcome is not None:
    sorted_idxs = np.flip(np.argsort(outcome_by_ch_dict[sort_outcome]))

    sorted_outcome_by_ch_dict = {}
    for key, outcome_by_ch in zip(outcome_by_ch_dict, outcome_by_ch_dict.values()):
        sorted_outcome_by_ch_dict[key] = [outcome_by_ch[i] for i in sorted_idxs]
    
    outcome_by_ch_dict = sorted_outcome_by_ch_dict

    individual_ch_list = [individual_ch_list[i] for i in sorted_idxs]


[print(f'Grad: {grad_median:.4f}', end='\t')
 for grad_median in outcome_by_ch_dict['grad_median']]
print()
[print(f'U: {unipolarity:.4f}', end='\t')
 for unipolarity in outcome_by_ch_dict['unipolarity']]
print()
[print(f'Foreshort: {cm_foreshort:.2f}', end='\t')
 for cm_foreshort in outcome_by_ch_dict['cm_foreshort']]
print()
[print(f'{area:.1e} Mm^2', end='\t')
 for area in outcome_by_ch_dict['area']]
print()
[print(f'Lat: {cm_lat:.1f} deg', end='\t')
 for cm_lat in outcome_by_ch_dict['cm_lat']]
print()
[print(f'Lon: {cm_lon:.1f} deg', end='\t')
 for cm_lon in outcome_by_ch_dict['cm_lon']]
print()
[print(f'{signed_flux:.4e} Mx', end='\t')
 for signed_flux in outcome_by_ch_dict['signed_flux']]
print()

In [None]:
ch_idx = 3

grad_median = outcome_by_ch_dict['grad_median'][ch_idx]
unipolarity = outcome_by_ch_dict['unipolarity'][ch_idx]
cm_foreshort = outcome_by_ch_dict['cm_foreshort'][ch_idx]

title = f'GM: {grad_median:.3f} | U: {unipolarity:.2f} | F: {cm_foreshort:.2f}'

selected_ch_map_data = individual_ch_list[ch_idx]
selected_ch_map_data = np.where(
    np.isnan(selected_ch_map_data), -100,
    selected_ch_map_data
)
selected_ch_map = sunpy.map.Map(selected_ch_map_data, he_map.meta)

fig = plt.figure(figsize=(6, 6))
fig.suptitle(he_date_str)

ax = fig.add_subplot(projection=label_id_map)
label_id_map.plot(axes=ax, title=title)
label_id_map.draw_grid(axes=ax)
for contour in selected_ch_map.contour(0):
    ax.plot_coord(contour, color='r')

#### Correct Saved Labels

In [None]:
new_label = 0
numbered_mode = True

# Comment to ID region number to correct/Uncomment to correct region label
numbered_mode = False
ch_label_dict['label_list_dict'][f'mask_{mask_idx}'][region_idx] = new_label

plot_labeled_chs(
    he_date_str, pre_processed_map_data, smooth_mag_map_data,
    ch_label_dict, ch_mask_list, numbered_mode
)

In [None]:
# Specify region number
region_num_on_colorbar = 6
region_idx = region_num_on_colorbar - 1

# Verify by checking original label
ch_label_dict['label_list_dict'][f'mask_{mask_idx}'][region_idx]

In [None]:
ch_label_dict = save_ch_label_dict(
    pre_processed_map_data, he_date_str, ch_mask_list, ch_label_dict
)

### All Dates with Outcomes

In [None]:
labeled_cand_df[labeled_cand_df['inspected?']]

In [None]:
num_M = len(labeled_cand_df[labeled_cand_df['label_id'] == 0.5])
num_not_M = len(labeled_cand_df[labeled_cand_df['label_id'] != 0.5])
f'Percent of Regions Labeled Marginal: {num_M/num_not_M*100:.2f}%'

#### Visualize 

In [None]:
# camera_loc=dict(x=0.1, y=2.6, z=0.2) # Distance vs Grad Median
camera_loc=dict(x=0.1, y=-2.6, z=0.2) # Distance vs Smoothness

# camera_loc=dict(x=0.1, y=0, z=2.6) # Grad Median vs Unipolarity
# camera_loc=dict(x=0, y=-0.1, z=2.6) # Smoothness vs Unipolarity

# camera_loc=dict(x=2.6, y=-0.1, z=0.2) # Distance vs Unipolarity
# camera_loc=dict(x=1.4, y=2, z=0.3) # Decision

fig = plot_3d_feature_space(
    labeled_cand_df, camera_eye=camera_loc,
    gray_out_inspected=False
)
fig.show()

In [None]:
outcome = 'smooth_quantile'

# Y-axis by area sum of regions in bin or by count
y_sum = 'area'
# y_sum = None

ascend = True

sorted_df = labeled_cand_df.sort_values(by='label_id', ascending=ascend)
color_list = LABEL_COLOR_LIST.copy()

if not ascend:
    color_list.reverse()

fig = px.histogram(
    sorted_df, x=outcome, y=y_sum,
    color='label_id', marginal='box',
    hover_data=sorted_df.columns, barmode='overlay',
    color_discrete_sequence=color_list,
    nbins=20
)

fig.show()

## Classify

Leave candidates labeled as marginal out of training

### Varied Methods

In [None]:
train_fraction = 0.75
outcomes_for_features = ['unipolarity', 'smooth_quantile']

X = np.array([classify_cand_df[outcome] for outcome in outcomes_for_features]).T
y = np.array(classify_cand_df['label_id'])

num_train_cands = int(train_fraction*num_cands)
X_train = X[:num_train_cands, :]
y_train = y[:num_train_cands]
X_test = X[num_train_cands:, :]
y_test = y[num_train_cands:]

In [None]:
lda = LinearDiscriminantAnalysis()
qda = QuadraticDiscriminantAnalysis()
svc = svm.LinearSVC(C=0.05)
svc_C10 = svm.LinearSVC(C=50)

estimators = [lda, qda, svc, svc_C10]
estimator_names = ['LDA', 'QDA', 'SVM C=0.05', 'SVM C=50']

for estimator in estimators:
    estimator.fit(X_train, y_train)

In [None]:
decision_thresh = 0

nrows = 2
ncols = 2
fig, ax_grid = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*6, nrows*5))
axs = ax_grid.ravel()
cmap = colors.ListedColormap(['tab:red', 'tab:green'])

for estimator, estimator_name, ax in zip(estimators, estimator_names, axs):
    
    DecisionBoundaryDisplay.from_estimator(
        estimator, X, plot_method='contourf', ax=ax, cmap='RdYlGn',
        alpha=0.3,
    )
    DecisionBoundaryDisplay.from_estimator(
        estimator, X, plot_method='contour', ax=ax, alpha=1.0,
        levels=[decision_thresh],
    )
    for X_plot, y_plot, alpha in zip(
        [X_train, X_test], [y_train, y_test], [0.3, 1]):
        
        y_pred = estimator.predict(X_plot)
        X_right, y_right = X_plot[y_plot == y_pred], y_plot[y_plot == y_pred]
        X_wrong, y_wrong = X_plot[y_plot != y_pred], y_plot[y_plot != y_pred]
        
        ax.scatter(
            X_right[:, 0], X_right[:, 1], c=y_right, s=20,
            cmap=cmap, alpha=alpha
        )
        ax.scatter(
            X_wrong[:, 0], X_wrong[:, 1], c=y_wrong, s=30,
            cmap=cmap, marker='x', alpha=alpha
        )
    
    ax.set_title(estimator_name)
    ax.set_xlim([-0.1,1.1])
    ax.set_ylim([-0.1,1.1])
    ax.invert_yaxis()

for i in range(nrows):
    ax_grid[i,0].set_ylabel('Smoothness Quantile')
for i in range(ncols):
    ax_grid[nrows-1,i].set_xlabel('Unipolarity')

### LDA

In [None]:
lda = LinearDiscriminantAnalysis()

Feature Importance & Sklearn Evaluation

In [None]:
outcomes_for_features = detect.OUTCOME_KEY_LIST
outcomes_for_features.append('smooth_quantile')
outcomes_for_features = ['unipolarity', 'smooth_quantile', 'cm_foreshort']

X = np.array([classify_cand_df[outcome] for outcome in outcomes_for_features]).T
y = np.array(classify_cand_df['label_id'])

lda.fit(X, y)

In [None]:
feature_names = np.array(outcomes_for_features)
importance = np.abs(lda.coef_).flatten()

fig = plt.figure(figsize=(9,5))
plt.bar(height=importance, x=feature_names)
plt.title('Feature importances via coefficients')
plt.xticks(rotation=45)
plt.show()

Sklearn Evaluation

In [None]:
outcomes_for_features = ['unipolarity', 'smooth_quantile', 'cm_foreshort']

X = np.array([classify_cand_df[outcome] for outcome in outcomes_for_features]).T
y = np.array(classify_cand_df['label_id'])
X_train = X[:num_train_cands, :]
y_train = y[:num_train_cands]
X_test = X[num_train_cands:, :]
y_test = y[num_train_cands:]

neg_label = 0
pos_label = 1


def fpr_score(y, y_pred, neg_label, pos_label):
    cm = confusion_matrix(y, y_pred, labels=[neg_label, pos_label])
    tn, fp, _, _ = cm.ravel()
    return fp / (tn + fp)

def fnr_score(y, y_pred, neg_label, pos_label):
    cm = confusion_matrix(y, y_pred, labels=[neg_label, pos_label])
    _, _, fn, tp = cm.ravel()
    return fn / (fn + tp)

tpr_score = recall_score  # TPR and recall are the same metric
scoring = {
    'fpr': make_scorer(fpr_score, neg_label=neg_label, pos_label=pos_label),
    'tpr': make_scorer(tpr_score, pos_label=pos_label),
    'fnr': make_scorer(fnr_score, neg_label=neg_label, pos_label=pos_label),
}

vanilla_model = LinearDiscriminantAnalysis()
vanilla_model.fit(X_train, y_train)

In [None]:
from sklearn.model_selection import TunedThresholdClassifierCV
# from sklearn.model_selection import FixedThresholdClassifier

# fixed_model = FixedThresholdClassifier(
#     estimator=vanilla_model, threshold=0.5
# ).fit(X_train, y_train)

tuned_model = TunedThresholdClassifierCV(
    estimator=vanilla_model,
    # scoring=scoring['fnr'],
    store_cv_results=True,  # necessary to inspect all results
)
tuned_model.fit(X_train, y_train)
print(f"{tuned_model.best_threshold_=:0.2f}")

In [None]:
scoring['fnr'](tuned_model, X_test, y_test)

In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(7, 7))
color_list = ('tab:blue', 'tab:orange')

for idx, (model, color) in enumerate(zip([vanilla_model, tuned_model], color_list)):
    decision_threshold = getattr(model, 'best_threshold_', 0.5)
    if idx == 1:
        RocCurveDisplay.from_estimator(
            model, X_test, y_test, pos_label=pos_label, ax=ax, name='LDA',
            plot_chance_level=True,
        )
    ax.plot(
        scoring['fpr'](model, X_test, y_test),
        scoring['tpr'](model, X_test, y_test),
        marker='o',
        markersize=10,
        color=color,
        label=f'Cut-off point at probability of {decision_threshold:.2f}',
    )
ax.legend()

In [None]:
import matplotlib.pyplot as plt

from sklearn.metrics import DetCurveDisplay, RocCurveDisplay

fig, [ax_roc, ax_det] = plt.subplots(1, 2, figsize=(11, 5))

lda.fit(X_train, y_train)

RocCurveDisplay.from_estimator(
    lda, X_test, y_test, ax=ax_roc, name='LDA',
    plot_chance_level=True
)
decision_threshold = getattr(model, 'best_threshold_', 0.5)
# ax_roc.plot(
#     scoring['fpr'](lda, X_test, y_test),
#     scoring['tpr'](lda, X_test, y_test),
#     marker='o',
#     markersize=10,
#     # color=color,
#     label=f'Cut-Off Probability of {decision_threshold:.2f}',
# )
ax_roc.legend()

DetCurveDisplay.from_estimator(lda, X_test, y_test, ax=ax_det, name='LDA',)
# ax_det.plot(
#     scoring['fpr'](lda, X_test, y_test),
#     scoring['fnr'](lda, X_test, y_test),
#     marker='o',
#     markersize=10,
#     # color=color,
#     label=f'Cut-Off Probability of {decision_threshold:.2f}',
# )
ax_det.legend()

ax_roc.set_title('Receiver Operating Characteristic (ROC) curves')
ax_det.set_title('Detection Error Tradeoff (DET) curves')

ax_roc.grid(linestyle='--')
ax_det.grid(linestyle='--')

plt.legend()
plt.show()

Training Dataset Size

In [None]:
train_fraction = 0.479 # 2012 only
# train_fraction = 0.75
num_train_cands = int(train_fraction*num_cands)

classify_cand_df.iloc[:num_train_cands]

In [None]:
# outcomes_for_features = ['unipolarity', 'smooth_quantile']
# ylabel = 'Smoothness Quantile'
outcomes_for_features = ['unipolarity', 'grad_median']
ylabel = 'Gradient Median'

# outcomes_for_features = ['unipolarity', 'cm_foreshort']
# ylabel = 'Near Disk Center'

X = np.array([classify_cand_df[outcome] for outcome in outcomes_for_features]).T
y = np.array(classify_cand_df['label_id'])

In [None]:
decision_thresh = 0

nrows = 2
ncols = 2
fig, ax_grid = plt.subplots(nrows=nrows, ncols=ncols, figsize=(ncols*6, nrows*5))
axs = ax_grid.ravel()
cmap = colors.ListedColormap(['tab:red', 'tab:green'])

for train_fraction, ax in zip(np.arange(0.15, 0.8, 0.2), axs):
    num_train_cands = int(train_fraction*num_cands)
    
    X_train = X[:num_train_cands, :]
    y_train = y[:num_train_cands]
    X_test = X[num_train_cands:, :]
    y_test = y[num_train_cands:]
    
    lda.fit(X_train, y_train)
    
    DecisionBoundaryDisplay.from_estimator(
        lda, X, plot_method='contourf', ax=ax, cmap='RdYlGn',
        alpha=0.3,
    )
    DecisionBoundaryDisplay.from_estimator(
        lda, X, plot_method='contour', ax=ax, alpha=1.0,
        levels=[decision_thresh],
    )
    for X_plot, y_plot, alpha in zip(
        [X_train, X_test], [y_train, y_test], [0.3, 1]):
        
        y_pred = lda.predict(X_plot)
        X_right, y_right = X_plot[y_plot == y_pred], y_plot[y_plot == y_pred]
        X_wrong, y_wrong = X_plot[y_plot != y_pred], y_plot[y_plot != y_pred]
        
        ax.scatter(
            X_right[:, 0], X_right[:, 1], c=y_right, s=20,
            cmap=cmap, alpha=alpha
        )
        ax.scatter(
            X_wrong[:, 0], X_wrong[:, 1], c=y_wrong, s=30,
            cmap=cmap, marker='x', alpha=alpha
        )
    
    ax.set_title(f'{int(train_fraction*100)}% of Data for Training')
    ax.set_xlim([-0.1,1.1])
    # ax.set_ylim([-0.1,1.1])
    ax.set_ylim([0.8,6])
    ax.invert_yaxis()

for i in range(nrows):
    ax_grid[i,0].set_ylabel(ylabel)

for i in range(ncols):
    ax_grid[nrows-1,i].set_xlabel('Unipolarity')

#### Predict

In [None]:
train_fraction = 0.479 # 2012 only
# train_fraction = 0.9
# outcomes_for_features = ['unipolarity', 'smooth_quantile', 'cm_foreshort']
outcomes_for_features = ['unipolarity', 'grad_median', 'cm_foreshort']
# outcomes_for_features = ['unipolarity', 'grad_median']

X = np.array([classify_cand_df[outcome] for outcome in outcomes_for_features]).T
y = np.array(classify_cand_df['label_id'])

num_train_cands = int(train_fraction*num_cands)
X_train = X[:num_train_cands, :]
y_train = y[:num_train_cands]
X_test = X[num_train_cands:, :]
y_test = y[num_train_cands:]

lda = LinearDiscriminantAnalysis()
lda.fit(X_train, y_train)

y_pred = lda.predict(X)

Save LDA object for future predictions

In [None]:
LDA_FILE_NAME = 'v1_1_LDA_model.pkl'

with open(LDA_FILE_NAME, 'wb') as lda_file:
    pickle.dump(lda, lda_file)

Inspect predictions

In [None]:
# Predicted probability near 1 for perfectly unipolar and smooth CH,
# either on disk center or limb
perfect_ch_on_limb = lda.predict_proba(np.array([[1, 0, 0]]))[:,1][0]
perfect_ch_on_disk_center = lda.predict_proba(np.array([[1, 0, 1]]))[:,1][0]
print(f'Limb {perfect_ch_on_limb:.10f}, Center {perfect_ch_on_disk_center:.10f}')

In [None]:
# Predicts probability 1 beyond U, GM, F bounds
extreme_val = 1e0
# extreme_val = 1e6
impossible_ch = lda.predict_proba(
    np.array([[extreme_val, -extreme_val, -extreme_val]])
)[:,1][0]
print(f'{impossible_ch - 1:.4e}')

Histogram

In [None]:
X_all_label_types = np.array(
    [labeled_cand_df[outcome] for outcome in outcomes_for_features]
).T
v1_1_pred_probabilities = lda.predict_proba(X_all_label_types)[:,1]
v1_1_log_probabilities = lda.predict_log_proba(X_all_label_types)[:,1]
labeled_cand_df['v1_1_pred_probability'] = v1_1_pred_probabilities
labeled_cand_df['v1_1_log_probability'] = v1_1_log_probabilities

In [None]:
# outcome = 'v1_1_pred_probability'
# outcome = 'v1_1_log_probability'
# outcome = 'unipolarity'
outcome = 'smooth_quantile'

# Y-axis by area sum of regions in bin or by count
y_sum = 'area'
# y_sum = None

ascend = True

sorted_df = labeled_cand_df.sort_values(by='label_id', ascending=ascend)
color_list = LABEL_COLOR_LIST.copy()

if not ascend:
    color_list.reverse()

fig = px.histogram(
    sorted_df, x=outcome, y=y_sum,
    color='label_id', marginal='box',
    hover_data=sorted_df.columns, barmode='overlay',
    color_discrete_sequence=color_list,
    nbins=20
)

fig.show()

Extract Plane

In [None]:
lda.coef_

In [None]:
lda.intercept_

In [None]:
from skspatial.objects import Plane

coeffs = lda.coef_.flatten()

lda_plane = Plane(point=-lda.intercept_/coeffs, normal=coeffs)
lda_plane.cartesian()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
lda_plane.plot_3d(ax, alpha=0.5)
ax.set_xlim([0,1])
ax.set_ylim([1,6])
ax.set_zlim([0,1])

In [None]:
new_cand_features = X[0]
x, y, z = new_cand_features

In [None]:
np.dot(new_cand_features, lda.coef_.flatten())

In [None]:
x, y, z = new_cand_features
a, b, c = lda.coef_.flatten()
(a*x + b* y + c * z)

In [None]:
def shortest_distance(new_cand_features, lda, a, b, c, d): 
    
    x, y, z = new_cand_features
    a, b, c = lda.coef_.flatten()
    d = abs((a*x + b*y + c*z + d)) 
    e = (math.sqrt(a * a + b * b + c * c))
    print("Perpendicular distance is", d/e)

### ROC Curve

#### v1.0

Predict

In [None]:
# No threshold for confidence
v1_0_confidence_series = classify_cand_df['unipolarity'].copy()

# # Threshold for confidence
# unipolarity_threshold = 0.5
# v1_0_confidence_series = (
#     (classify_cand_df['unipolarity'] - unipolarity_threshold)
#     /(1 - unipolarity_threshold)
# )
# v1_0_confidence_series[v1_0_confidence_series < 0] = 0

# Descriptive statistics of STRIDE v1.0 detected regions at any confidence level
classify_cand_df['unipolarity'][v1_0_confidence_series > 0].describe()

Inspect properties of T/N P/F categories with Pandas Series

In [None]:
v1_0_ch_detected_series = v1_0_confidence_series > unipolarity_threshold
v1_0_true_detect_series = (
    v1_0_ch_detected_series == (classify_cand_df['label_id'] > 0)
)

true_pos_series = (v1_0_true_detect_series & v1_0_ch_detected_series)
true_neg_series = (v1_0_true_detect_series & ~v1_0_ch_detected_series)
false_pos_series = (~v1_0_true_detect_series & v1_0_ch_detected_series)
false_neg_series = (~v1_0_true_detect_series & ~v1_0_ch_detected_series)

# true_pos_rate = true_pos_series.sum()/(true_pos_series.sum() + false_neg_series.sum())
# false_pos_rate = false_pos_series.sum()/(false_pos_series.sum() + true_neg_series.sum())

In [None]:
# Descriptive statistics of correctly STRIDE v1.0 detected regions
classify_cand_df['unipolarity'][v1_0_true_detect_series].describe()

In [None]:
filt = true_pos_series
# filt = true_neg_series
# filt = false_pos_series
# filt = false_neg_series
classify_cand_df['unipolarity'][filt].describe()

#### Plot Curve

Pre-run LDA and v1.0 Predict sections
- COSPAR dataset scatter
- STRIDE U in 0-0.5, STRIDE U in 0.5-1, STRIDE v1.1 lines

In [None]:
scatter_probabilities = np.arange(0,1.05,0.1)

v1_0_sub_thresh_roc_rates = np.array(
    [get_roc_rates(v1_0_confidence_series, y, confidence_thresh)
     for confidence_thresh in np.linspace(0,0.5,500)]
)
v1_0_thresh_roc_rates = np.array(
    [get_roc_rates(v1_0_confidence_series, y, confidence_thresh)
     for confidence_thresh in np.linspace(0.5,1,500)]
)
v1_0_scatter_roc_rates = np.array(
    [get_roc_rates(v1_0_confidence_series, y, confidence_thresh)
     for confidence_thresh in scatter_probabilities]
)
# true_pos_rates = roc_rates_array[:,0]
# false_pos_rates = roc_rates_array[:,1]

In [None]:
v1_1_probabilities = np.linspace(0,1,500)

v1_1_test_probabilities = lda.predict_proba(X_test)[:,1]
v1_1_test_roc_rates = np.array(
    [get_roc_rates(v1_1_test_probabilities, y_test, probability_thresh)
     for probability_thresh in v1_1_probabilities]
)
v1_1_scatter_roc_rates = np.array(
    [get_roc_rates(v1_1_test_probabilities, y_test, probability_thresh)
     for probability_thresh in scatter_probabilities]
)

v1_1_train_probabilities = lda.predict_proba(X_train)[:,1]
v1_1_train_roc_rates = np.array(
    [get_roc_rates(v1_1_train_probabilities, y_train, probability_thresh)
     for probability_thresh in v1_1_probabilities]
)

In [None]:
# scatter_roc_rates = v1_0_scatter_roc_rates
scatter_roc_rates = v1_1_scatter_roc_rates

In [None]:
plt.figure(figsize=(6.3,6.3), dpi=150)
# plt.figure(figsize=(7,7), dpi=150)

plt.title(
    f'2012 Train: {y_train.size} Candidates | 2015 Test: {y_test.size} Candidates'
)
# plt.title(f'{train_fraction} Training Fraction | {y_train.size}-{y_test.size} Train-Test')
# plt.title('Using Gradient Median')


# plt.plot([0,1], [0,1], linestyle='--', color='k', label='Random Classifier')

# v1.1 -------------------------------------------------------------------------
plt.plot(
    v1_1_train_roc_rates[:,1], v1_1_train_roc_rates[:,0], 
    label=r'STRIDE v1.1 Training Data', color='peru', 
    linewidth=1
)
plt.plot(
    v1_1_test_roc_rates[:,1], v1_1_test_roc_rates[:,0], 
    label=r'STRIDE v1.1 Test Data', color='peru', 
    linewidth=2
)
# # v1.0 ---------------------------------------------------------------------------
# plt.plot(
#     v1_0_thresh_roc_rates[:,1], v1_0_thresh_roc_rates[:,0], 
#     label=r'STRIDE v1.0 $U \in [0.5,1]$', color='peru',
#     linewidth=2, linestyle='--'
# )
# plt.plot(
#     v1_0_sub_thresh_roc_rates[:,1], v1_0_sub_thresh_roc_rates[:,0], 
#     label=r'STRIDE v1.0 $U \in [0,0.5)$', color='peru',
#     linewidth=2, linestyle=':'
# )

# Scatter ------------------------------------------------------------------------
plt.scatter(
    scatter_roc_rates[:,1], scatter_roc_rates[:,0],
    color='peru', marker='s'
)
for prob, xi, yi in zip(
        scatter_probabilities, scatter_roc_rates[:,1], scatter_roc_rates[:,0]
    ):
    
    plt.annotate(
        f'{prob:.1f}', xy=(xi, yi), xycoords='data', 
        xytext=(3, -12),    # Corner zoom
        # xytext=(-20, 8),  # Unzoomed
        textcoords='offset points'
    )

# Corner COSPAR ------------------------------------------------------------------
plt.scatter(0.21,0.77,label='35% EUV Threshold', color='k', marker='d')
plt.scatter(0,0.78,label='CHIMERA')
plt.scatter(0.04,0.84,label='SPOCA (o:Base | ^:HEK)')
plt.scatter(0.25,0.87,marker='^',color='tab:orange')    # SPOCA-HEK
plt.scatter(0.07,0.83,label='ACWE  (o:03     | ^:  04)')
plt.scatter(0.29,0.86,marker='^',color='tab:green')     # ACWE-04
plt.scatter(0.13,0.85,label='CRONNOS')
plt.scatter(0.21,0.79,label='CHARM')
plt.scatter(0.35,0.82,label='CNN193')
plt.scatter(0.38,0.98,label='WWWBCS')

# Corner Zoom ---------------------------------------------------------------------
plt.xlim([-0.01,0.45])
plt.ylim([0.55,1.01])

# # Unzoomed ------------------------------------------------------------------------
# plt.xlim([-0.05,1.05])
# plt.ylim([-0.05,1.05])

# Unzoomed with scatter------------------------------------------------------------------------
# plt.xlim([-0.08,1.08])
# plt.ylim([-0.08,1.08])

# # All COSPAR ---------------------------------------------------------------------
# plt.scatter(0.49,0.99,label='CHMAP')
# plt.scatter(0.54,0.86,label='CHIPS')
# plt.scatter(0.62,0.87,label='SYNCH')
# plt.scatter(0.85,0.92,label='CHORTLE', marker='s')


plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.legend(loc='lower right', reverse=True)

# Optimization

In [None]:
# 4s/map * 30 maps/step = 150s/step

In [None]:
percent_of_peak = 90
morph_radius = 10

he_map = prepare_data.get_nso_sunpy_map(HE_DIR + HE_DATE_LIST[0] + '.fts')
he = detect.pre_process_v0_4(he_map.data)
ch_mask_data = detect.get_ch_mask(
    he, percent_of_peak, morph_radius
)
plt.imshow(ch_mask_data)

Eval Functions

In [None]:
import functools

class Optimizer:
    
    @functools.lru_cache(maxsize=None)
    def get_area_list(self, design_vars):
        """Retrieve detected area percentages for specied dates.
        """
        percent_of_peak, morph_radius = design_vars
        area_percent_list = []

        for he_date_str in HE_DATE_LIST:
            he_map = prepare_data.get_nso_sunpy_map(
                HE_DIR + he_date_str + '.fts'
            )
            he = detect.pre_process_v0_4(he_map.data)
            ch_mask_data = detect.get_ch_mask(
                he, percent_of_peak, morph_radius
            )
            ch_mask_map = sunpy.map.Map(np.flipud(ch_mask_data), he_map.meta)
            area_percent = detect.get_open_area(ch_mask_map, 0)[0]
            area_percent_list.append(area_percent)
            
        return area_percent_list

# Initiate optimizer object with cached area percent list
OPTIMIZER = Optimizer()

In [None]:
percent_of_peak = 90
morph_radius = 10
MORPH_RADIUS = 10
MAX_AREA_PERCENT = 4
MIN_AREA_PERCENT = 1
design_vars = (percent_of_peak, morph_radius)


def obj_func():
    """Objective function for persistence optimization.
    Penalizes normalized MAD of detected area
    """
    pass

def constraints(design_vars):
    """Constraints for persistence optimization.
    """
    area_percent_list = [6, 5, 4, 9]
    # OPTIMIZER.get_area_list(design_vars)
    mean_area_percent = np.mean(area_percent_list)
    
    return (mean_area_percent - MIN_AREA_PERCENT,
            MAX_AREA_PERCENT - mean_area_percent)
    # return np.array([mean_area_percent - MIN_AREA_PERCENT],
    #                  MAX_AREA_PERCENT - mean_area_percent])

Test Functions

In [None]:
optimizer.get_area_list(design_vars)

In [None]:
get_area_list(design_vars)

In [None]:
obj_func(90, 10)

In [None]:
constraints(design_vars)

## Visual

## Minimize

### SLSQP

Scipy options

In [None]:
optimize.show_options(solver='minimize', method='SLSQP')

In [None]:
optimize.rosen([0.5, 0, 10])

Execute Minimize

In [None]:
design_vars = (percent_of_peak, morph_radius)
ineq_cons = ({
    'type': 'ineq',
    'fun' : constraints,
    # 'args': (design_vars,)
})
# Unconstrained
res = optimize.minimize(
    obj_func, percent_of_peak, method='SLSQP',
    options={'disp': True, 'finite_diff_rel_step': [0.1]},
)
# Constrained
# res = optimize.minimize(
#     obj_func, design_vars, args=(optimizer,), method='SLSQP', constraints=ineq_cons,
#     options={'ftol': 1e-9, 'disp': True}
# )

### BFGS

In [None]:
percent_of_peak = 100
design_vars = (percent_of_peak, morph_radius)
res = optimize.minimize(
    obj_func, percent_of_peak, method='BFGS',
    options={'disp': True, 'xrtol': 0.01, 'eps': 0.1},
)

In [None]:
res1 = res
res1

# NSO CH Estimates

## Input Level 2 Products: **Stage/Level2/**

10830i equivalent width: **svsm_e3100_S2_yyyymmdd_hhmm.fts.gz**

10830i intensity: **svsm_i3000_S2_yyyymmdd_hhmm.fts.gz**

6302l magnetogram: **svsm_m1100_S2_yyyymmdd_hhmm.fts.gz**

6302l intensity: **svsm_i1000_S2_yyyymmdd_hhmm.fts.gz**

In [None]:
raw_he_eqw_fits_path = NSO_INPUT_DIR + 'svsm_e3100_S2_20140626_1419.fts'

raw_he_intensity_fits_path = NSO_INPUT_DIR + 'svsm_i3000_S2_20140626_1419.fts'

raw_magnetogram_fits_path = NSO_INPUT_DIR + 'svsm_m1100_S2_20140626_1444.fts'

raw_mag_intensity_fits_path = NSO_INPUT_DIR + 'svsm_i1000_S2_20140626_1444.fts'

im_list = plot_detection.plot_raw_fits_content(
    raw_he_eqw_fits_path, header_list=['IMTYPE'],
    # print_header=True
)
plot_detection.plot_raw_fits_content(
    raw_he_intensity_fits_path, header_list=['IMTYPE'],
    # print_header=True
)
plot_detection.plot_raw_fits_content(
    raw_magnetogram_fits_path, header_list=['IMTYPE'],
    print_header=True
)
plot_detection.plot_raw_fits_content(
    raw_mag_intensity_fits_path, header_list=['IMTYPE'],
    # print_header=True
)

## Output Level 3 Products: **Stage/Level3/**
He I EqW Maps List: **hDataList.txt**
- 'Text list of available 10830 He EqW low-res sine-latlon heliographic maps'
- Produced by **mk_datalist**
  - Args: heliographic He I EqW maps

Magnetogram Maps List: **mDataList.txt**
- 'Text list of available 6301.5 low-res sine-latlon heliographic maps'
- Produced by **mk_datalist**
  - Args: heliographic magnetograms

CH Maps List: **oDataList.txt**
- 'Text list of available 10830 solar wind source sine-latlon heliographic maps'
- Produced by **mk_datalist**
  - Args: heliographic CH maps

Carrington Rotation CH Images
- '10830 solar wind sine-latlon daily synoptic map plots'
- High-Res: **chsh.jpg**
- Med-Res: **chsm.jpg**
- Low-Res: **chsl.jpg**
- Produced by **plot_lev3_map**
  - Args: 1 carrington rotation CH map, CH maps list

Unincluded He I Observation Image?: **svsm_o10mr_S3_{yyyymmdd}_{hhmm}.jpg**
- Produced by **mk_obsimg**
  - Args: 1 sky frame disk CH map, CH maps list

#### Single Maps: **../single/{yyyy}/**

Sine-LatLon He I EQW: **svsm_e31hr_B3_{yyyymmdd}_{hhmm}.fts.gz**
- '10830 He EqW high-res sine-latlon heliographic map'
- Produced by **mk_synimg**
  - Args: L2 He I EQW file, L2 He I continuum intensity file

LatLon He I EQW: **svsm_e31lr_L3_{yyyymmdd}_{hhmm}.fts.gz**
- '10830 He EqW low-res latlon heliographic map'
- Produced by **mk_synimg**
  - Args: L2 He I EQW file, L2 He I continuum intensity file

Sine-LatLon Magnetogram: **svsm_m11hr_B3_{yyyymmdd}_{hhmm}.fts.gz**
- '6301.5 high-res sine-latlon heliographic map'
- Produced by **mk_synimg**
  - Args: L2 magnetogram file, L2 6302l magnetogram intensity file

LatLon Magnetogram: **svsm_m11lr_L3_{yyyymmdd}_{hhmm}.fts.gz**
- '6301.5 low-res latlon heliographic map'
- Produced by **mk_synimg**
  - Args: L2 magnetogram file, L2 6302l magnetogram intensity file

Sine-LatLon CH Map: **svsm_o1083_B3_{yyyymmdd}_{hhmm}.fts.gz**
- '10830 solar wind source sine-latlon heliographic map'
- Produced by **mk_holeimg**
  - Args: processed He I EqW, He I EqW maps list, processed magnetogram, magnetogram maps list

Sky Frame Disk CH Map: **svsmgo1083_B3_{yyyymmdd}_{hhmm}.fts.gz**
- '10830 solar wind source sky frame heliocentric map'
- Produced by **mk_dchimg**
  - Args: 1 disk CH map, disk CH maps list

In [None]:
# he_eqw_fits_path = NSO_SINGLE_DIR + 'svsm_e31hr_B3_20140606_1746.fts'
he_eqw_fits_path = NSO_SINGLE_DIR + 'svsm_e31hr_B3_20140626_1419.fts'
# he_eqw_fits_path = NSO_SINGLE_DIR + 'svsm_e31lr_L3_20140626_1419.fts'

# mag_fits_path = NSO_SINGLE_DIR + 'svsm_m11hr_B3_20140606_1605.fts'
mag_fits_path = NSO_SINGLE_DIR + 'svsm_m11hr_B3_20140626_1444.fts'

# E: Empty file
# E ch_fits_map = NSO_SINGLE_DIR + 'svsm_o1083_B3_20140601_1836.fts'
# E ch_fits_map = NSO_SINGLE_DIR + 'svsm_o1083_B3_20140603_1704.fts'
# E ch_fits_map = NSO_SINGLE_DIR + 'svsm_o1083_B3_20140606_1755.fts'
# E ch_fits_map = NSO_SINGLE_DIR + 'svsm_o1083_B3_20140612_1438.fts'
# E ch_fits_map = NSO_SINGLE_DIR + 'svsm_o1083_B3_20140620_1711.fts'
ch_fits_map = NSO_SINGLE_DIR + 'svsm_o1083_B3_20140626_1428.fts'

# E sky_ch_fits_path = NSO_SINGLE_DIR + 'svsmgo1083_B3_20140606_1755.fts'
sky_ch_fits_path = NSO_SINGLE_DIR + 'svsmgo1083_B3_20140626_1428.fts'

plot_detection.plot_raw_fits_content(
    he_eqw_fits_path, header_list=['IMTYPE', 'COMMENT2'],
    cmaps=[plt.cm.gray, plt.cm.gray],
    # print_header=True
)
plot_detection.plot_raw_fits_content(
    mag_fits_path, header_list=['IMTYPE', 'COMMENT1', 'COMMENT2'],
    cmaps=[plt.cm.gray, plt.cm.gray, plt.cm.gray],
    # print_header=True
)
plot_detection.plot_raw_fits_content(
    ch_fits_map, header_list=['IMGTYP01', 'IMGTYP02', 'IMGTYP03'],
    print_header=True
)
im_list = plot_detection.plot_raw_fits_content(
    sky_ch_fits_path, header_list=['COMMENT2'],
    # print_header=True
)

##### NSO Processed EQW

In [None]:
fits = '~/Desktop/out_solarstrm_data/' + 'svsm_e31lr_L3_20140626_1419.fts'
im_list = plot_detection.plot_raw_fits_content(
    fits, header_list=['IMTYPE', 'COMMENT2'],
    cmaps=[plt.cm.gray, plt.cm.gray, plt.cm.gray],
    print_header=True
)

In [None]:
date_str = '2014_06_26__00_00'

raw_nso_eqw = NSO_EQW_DICT[date_str]
nso_eqw_nan = detect.pre_process_eqw_v0_1(raw_nso_eqw)[2]

titles = ['EQW', 'EQW NaN']
plot_detection.plot_hists(
    [raw_nso_eqw, nso_eqw_nan], titles, semilogy=True
)

lower_bounds = [-1600,  0,  0]
upper_bounds = [0,      250, 500]
plot_detection.plot_thresholds(
    nso_eqw_nan, bounds=[lower_bounds, upper_bounds], 
    bounds_as_percent=False, threshold_type='band'
)
plot_detection.plot_thresholds(
    nso_eqw_nan, bounds=[75, 85, 100], bounds_as_percent=True
)

### Merged Maps: **../merged/carr-daily/**
Carrington Rotation CH Map: **svsm_o31hr_B3_cr{RRRR}_{DDD}.fts.gz**
- 'Solar wind source high-res sine-latlon daily synoptic map'
- Produced by **create_crmap**
  - Args: 27 most recent disk CH maps, disk CH maps list

In [None]:
synoptic_ch_fits_path = NSO_MERGED_DIR + 'svsm_o31hr_B3_cr2152_275.fts'

plot_detection.plot_raw_fits_content(
    synoptic_ch_fits_path,
    # header_list=['DATE', 'CARR01', 'IMTYPE'],
    header_list=['IMGTYP01', 'IMGTYP02', 'IMGTYP03', 'IMGTYP04'],
    # print_header=True
)

## Algorithm on Pre-Processed EQW

In [None]:
date_str = '2014_06_26__00_00'

raw_nso_eqw = NSO_EQW_DICT[date_str]
nso_eqw_nan = detect.pre_process_eqw_v0_1(raw_nso_eqw)[2]

percent_of_peak_list = [80,85,90]
radius_list = [6]

ensemble_map, holes_mask_list, confidence_list = detect.get_ensemble_v0_3(
    nso_eqw_nan, percent_of_peak_list, radius_list)

plot_detection.plot_ensemble(
    nso_eqw_nan, ensemble_map, confidence_list, holes_mask_list
)

## NSO Carrington Map Comparison

In [None]:
# Does not work
percent_of_peak_list = [80,90,100]
radius_list = [11,13,15]

for he_date_str in list(reversed(HE_DATE_LIST)):
    raw_he = HE_FITS_DICT[date_str][0]
    he = detect.pre_process_v0_1(raw_he)[0]
    
    ensemble_map = detect.get_ensemble(
        he, percent_of_peak_list, radius_list
    )[0]

    euv = EUV_DICT[he_date_str]

    plot_ensemble_comparison(he, he_date_str, ensemble_map, euv)

In [None]:
def plot_ch_map(date_str_list, cr_str, ch_map_dict):
    """Plot NSO detected CH Carrington map.
    """
    # Display selected column number corresponding to date list
    selected_datetime_list = [
        datetime.strptime(
            date_str, DICT_DATE_STR_FORMAT)
        for date_str in date_str_list
    ]
    selected_cr_list = [
        carrington_rotation_number(selected_datetime)
        for selected_datetime in selected_datetime_list
    ]
    
    cr_str_list = cr_str.split('_')
    cr_num_list = [float(cr_str) for cr_str in cr_str_list]
    
    cr_range = cr_num_list[-1] - cr_num_list[0]
    cr_percent_list = [
        (selected_cr - cr_num_list[0])/cr_range
        for selected_cr in selected_cr_list
    ]
    
    ch_map = ch_map_dict[cr_str]
    rows, cols = ch_map.shape
    
    selected_col_list = [
        cols - cr_percent*cols
        for cr_percent in cr_percent_list
    ]
    
    print('Selected Date Columns:')
    for date_str, selected_col in zip(
        date_str_list, selected_col_list):
        print(f'{date_str}: {selected_col:.1f}px \t', end='')

    # Prepare the figure and axes with map projection
    fig = plt.figure(figsize=(10, 10))

    ax = fig.add_subplot()
    ax.set_title(f'CR{cr_str}', fontsize=20)
    
    ax.imshow(ch_map, extent=[0,cols, rows, 0])
    ax.vlines(x=selected_col_list, ymin=rows, ymax=0, linestyles='dashed',
              colors='black')


def rename_all_gong(gong_dir):
    """Rename all GONG magnetogram FITS files to include observation date in title"""
    glob_pattern = gong_dir + '*.fits'
    
    fits_path_list = glob.glob(glob_pattern)
    
    for fits_path in fits_path_list:
        gong_fits = fits.open(fits_path)
        
        gong_fits_header_keys = list(gong_fits[0].header.keys())
                
        # Pass to next FITS file if header information is missing
        if 'CAR_ROT' not in gong_fits_header_keys:
            continue
        
        # Carrington Rotation
        CR_str = f'CR{gong_fits[0].header["CAR_ROT"]}'
        
        gong_fits.close()
            
        os.rename(fits_path, gong_dir + CR_str + '.fits')

In [None]:
cr_str = '2151.0342_2152.1035'

plot_ch_map(list(reversed(HE_DATE_LIST)), cr_str, CH_MAP_DICT)