In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.interpolate import interp1d
from scipy.optimize import curve_fit
import os

import pickle

## Functions

In [2]:
def assign_source_names(df, mapping_dict, x_col='x_pos', y_col='y_pos',
                       new_col='source_name', default_value='Unknown'):
    """
    Assigns source names to a DataFrame based on position mapping and places the new column first.

    Parameters:
    - df (pd.DataFrame): The input DataFrame containing position columns.
    - mapping_dict (dict): Dictionary mapping source names to position info.
                            Expected format:
                            {
                                'source_name1': {'pos': (x1, y1), ...},
                                'source_name2': {'pos': (x2, y2), ...},
                                ...
                            }
    - x_col (str): Name of the x-coordinate column in df. Default is 'x_pos'.
    - y_col (str): Name of the y-coordinate column in df. Default is 'y_pos'.
    - new_col (str): Name of the new column to be added for source names.
                      Default is 'source_name'.
    - default_value (str): Value to assign if position is not found in mapping.
                           Default is 'Unknown'.

    Returns:
    - pd.DataFrame: The DataFrame with the new source name column added as the first column.
    """
    # Step 1: Create a mapping from (x_pos, y_pos) to source name using dictionary comprehension
    pos_to_source = {tuple(info['pos']): source for source, info in mapping_dict.items()
                    if 'pos' in info and isinstance(info['pos'], (tuple, list)) and len(info['pos']) == 2}
    
    # Optional: Check for any sources with invalid positions and handle them
    invalid_sources = [source for source, info in mapping_dict.items()
                       if 'pos' not in info or not isinstance(info['pos'], (tuple, list)) or len(info['pos']) != 2]
    if invalid_sources:
        raise ValueError(f"Invalid position format for sources: {invalid_sources}")

    # Step 2: Create a Series of position tuples from the DataFrame
    positions = list(zip(df[x_col], df[y_col]))
    
    # Step 3: Map the positions to source names using the pos_to_source dictionary
    source_names = pd.Series(positions).map(pos_to_source).fillna(default_value)
    
    # Step 4: Assign the mapped source names to the new column in the DataFrame
    df[new_col] = source_names
    
    # Step 5: Reorder the columns to place 'source_name' first
    # Only perform reordering if 'source_name' exists
    if new_col in df.columns:
        cols = df.columns.tolist()
        cols.remove(new_col)
        df = df[[new_col] + cols]
    else:
        raise KeyError(f"Column '{new_col}' was not added to the DataFrame.")
    
    return df


In [3]:
def modify_vlsr(df, source_dict, value):
    """
    Modify the 'VLSR' column of the DataFrame based on the 'source' column and dictionary values.

    Parameters:
    df (pd.DataFrame): DataFrame with 'source' and 'VLSR' columns.
    source_dict (dict): Dictionary where keys match df['source'] values and each has a 'vel' attribute.
    value (float): The float value to use for the calculation.
    
    Returns:
    pd.DataFrame: The modified DataFrame.
    """
    # Ensure the 'source' column values exist in the dictionary
    if 'source' not in df.columns or 'VLSR' not in df.columns:
        raise KeyError("'source' or 'VLSR' columns are missing in the DataFrame")
    
    # Apply the modification
    df['VLSR'] = df.apply(lambda row: row['VLSR'] + (source_dict[row['source']]['vel'] - value ), axis=1)
    
    return df


In [4]:
def remove_rows_by_tuples(tuples_list, df, x_col='x_pos', y_col='y_pos'):
    """
    Remove rows from a DataFrame based on a list of (x, y) tuples.

    Parameters:
    tuples_list (list of tuples): A list of (x, y) pairs to match and remove.
    df (pd.DataFrame): The input DataFrame.
    x_col (str): Column name for the 'x' values. Default is 'x_pos'.
    y_col (str): Column name for the 'y' values. Default is 'y_pos'.

    Returns:
    pd.DataFrame: A new DataFrame with the specified rows removed.
    """
    for x, y in tuples_list:
        df = df[~((df[x_col] == x) & (df[y_col] == y))]
    
    return df


In [5]:
def update_fixed_columns(df_main, df_update, common_column):
    """
    Update 'ra (deg)', 'dec (deg)', 'ra (hms)', and 'dec (dms)' columns in df_main with values from df_update 
    based on a common column.
    
    Args:
    df_main (pd.DataFrame): The main DataFrame to be updated.
    df_update (pd.DataFrame): The DataFrame with the updated values.
    common_column (str): The column that both DataFrames have in common to match rows.
    
    Returns:
    pd.DataFrame: Updated DataFrame with values from df_update where available.
    """
    columns_to_update = ['ra (deg)', 'dec (deg)', 'ra (hms)', 'dec (dms)']
    
    # Step 1: Perform a left merge based on the common column
    df_merged = df_main.merge(df_update[[common_column] + columns_to_update], 
                              on=common_column, 
                              how='left', 
                              suffixes=('_main', '_update'))
    
    # Step 2: For each column to update, combine values from df_update where they exist
    for col in columns_to_update:
        df_merged[f'{col}_main'] = df_merged[f'{col}_update'].combine_first(df_merged[f'{col}_main'])
    
    # Step 3: Drop the extra columns from df_update and rename the main columns back
    columns_to_drop = [f'{col}_update' for col in columns_to_update]
    df_merged.drop(columns=columns_to_drop, inplace=True)
    
    # Rename columns back to their original names
    df_merged.rename(columns={f'{col}_main': col for col in columns_to_update}, inplace=True)
    
    return df_merged



In [6]:
def tau_calculation(velocity, amplitude):
        # Measure variability (RMS)
    rms_spect = np.sqrt(np.mean(amplitude**2))
    
    # Mask signal channels
    spectrum = np.copy(amplitude)
    spectrum[spectrum < rms_spect - (np.max(spectrum) - rms_spect)] = rms_spect
    
    # Normalization
    y = spectrum
    x = velocity
    pars = np.polyfit(x, y, 1)
    p = np.poly1d(pars)
    tauhi = np.array(amplitude / p(x))
    # Calculate tau (optical depth)
    tau = np.log(tauhi) * (-1)
    
    # Check for NaNs in tau and interpolate missing values
    if np.isnan(tau).any():
        # Get indices of valid and invalid (NaN) values
        valid_idx = np.where(~np.isnan(tau))[0]
        invalid_idx = np.where(np.isnan(tau))[0]
        
        # Interpolate only over the valid points
        interpolator = interp1d(x[valid_idx], tau[valid_idx], kind='linear', fill_value='extrapolate')
        
        # Apply interpolation to the NaN values
        tau[invalid_idx] = interpolator(x[invalid_idx])

    # Return the tau values with NaNs handled
    return tau

In [7]:
# Define the Gaussian function
def gaussian(x, amp, mean, vel_disp):
    return amp * np.exp(-0.5 * ((x - mean) / vel_disp)**2)

# Function to apply the Gaussian fitting and plot results
def restore_amplitudes_df(df, directory, category, plotting = True):
    # Group by 'source'
    grouped_df = df.groupby('source')
    
    # Loop over each group (each 'source')
    for source, group in grouped_df:
        # Load the ATCA data for this source
        if category == "atca_4k":
            file_path = os.path.join(directory, f'{source}_cube_4k_atca_4k_spectrum.txt')
        elif category == "atca_v2":
            file_path = os.path.join(directory, f'{source}_cube_v2_atca_spectrum.txt')
        elif category == "askap":
            file_path = os.path.join(directory, f'{source}_askap_spectrum.txt')
        
        data = np.loadtxt(file_path, skiprows=1)
        velocity = data[:, 1]  # velocity column
        amplitude = data[:, 2]  # amplitude column
        
        # Apply tau calculation (you should provide this function)
        amplitude = tau_calculation(velocity, amplitude)
        
        # Initialize plot
        if plotting == True:
            plt.figure(figsize=(10, 4))
            plt.plot(velocity, amplitude, label='Signal', color='k', lw=0.4)
            total_fit = np.zeros_like(velocity)
            ncomps = 0
        # Loop through each row in the group (each component)
        for idx, row in group.iterrows():
            amp, mean, vel_disp = row['amp'], row['VLSR'], row['vel_disp']
            # Perform Gaussian fitting
            popt, pcov = curve_fit(gaussian, velocity, amplitude, p0=[amp, mean, vel_disp])
            # Store the fitted parameters in the DataFrame
            df.at[idx, 'amp'] = popt[0]
            df.at[idx, 'VLSR'] = popt[1]
            df.at[idx, 'vel_disp'] = popt[2]
            # Plot the fitted Gaussian curve
            if plotting == True:
                fitted_curve = gaussian(velocity, *popt)
                total_fit += fitted_curve
                plt.plot(velocity, fitted_curve, color='red', lw = 0.4)
                ncomps += 1
        if plotting == True:
            # Add labels and legend
            plt.plot(velocity, total_fit, color='r', lw=1.8)
            plt.axhline(0, color='k', linestyle='-', linewidth=0.8)
            plt.title(f'Gaussian Fit for Source: {source}' r'$\quad N_{comp}=$' f'{ncomps}')
            plt.xlabel('VELO-LSR [km / s]')
            plt.ylabel('Intensity')
            plt.legend()
            plt.show()
    
    return df


## Data I/O

In [8]:
#ASKAP
file_path_hydra_askap = "decomposition_results/ASKAP/decomposition_hydra/gpy_decomposed/spectra_hydra_raw_g+_fit_fin_finalized.dat"
file_path_norma_askap = "decomposition_results/ASKAP/decomposition_norma/gpy_decomposed/spectra_norma_raw_g+_fit_fin_finalized.dat"

#ATCA

file_path_hydra4k_atca = "decomposition_results/ATCA/decomposition_hydra4k/gpy_decomposed/spectra_hydra_4k_g1_g+_fit_fin_finalized.dat"
file_path_hydra1000_atca = "decomposition_results/ATCA/decomposition_hydraV21000/gpy_decomposed/spectra_hydra_v2-1000_g1_g+_fit_fin_finalized.dat"
file_path_hydra1500_atca = "decomposition_results/ATCA/decomposition_hydraV21500/gpy_decomposed/spectra_hydra_v2-1500_g1_g+_fit_fin_finalized.dat"

file_path_norma4k_atca= "decomposition_results/ATCA/decomposition_norma4k/gpy_decomposed/spectra_norma_4k_raw_g+_fit_fin_finalized.dat"
file_path_norma1000_atca = "decomposition_results/ATCA/decomposition_normaV21000/gpy_decomposed/spectra_norma_v2-1000_g1_g+_fit_fin_finalized.dat"
file_path_norma1500_atca = "decomposition_results/ATCA/decomposition_normaV21500/gpy_decomposed/spectra_norma_v2-1500_g1_g+_fit_fin_finalized.dat"

In [9]:
# Read the data from the file into a DataFrame
df_hydra_askap = pd.read_csv(file_path_hydra_askap, delim_whitespace=True)
df_norma_askap = pd.read_csv(file_path_norma_askap, delim_whitespace=True)


df_hydra4k_atca = pd.read_csv(file_path_hydra4k_atca, delim_whitespace=True)
df_h1000_atca = pd.read_csv(file_path_hydra1000_atca, delim_whitespace=True)
df_h1500_atca = pd.read_csv(file_path_hydra1500_atca, delim_whitespace=True)

df_norma4k_atca = pd.read_csv(file_path_norma4k_atca, delim_whitespace=True)
df_n1000_atca = pd.read_csv(file_path_norma1000_atca, delim_whitespace=True)
df_n1500_atca = pd.read_csv(file_path_norma1500_atca, delim_whitespace=True)


In [10]:
with open('data_cubes/ATCA/norma/coordiantes_norma_4k.pkl', 'rb') as f:
    dict_4k_n = pickle.load(f)
with open('data_cubes/ATCA/hydra/coordiantes_hydra_4k.pkl', 'rb') as f:
    dict_4k_h = pickle.load(f)

with open('data_cubes/ATCA/norma/coordiantes_norma_v2_1000.pkl', 'rb') as f:
    dict_v21000_n = pickle.load(f)
with open('data_cubes/ATCA/hydra/coordiantes_hydra_v2_1000.pkl', 'rb') as f:
    dict_v21000_h = pickle.load(f)

with open('data_cubes/ATCA/norma/coordiantes_norma_v2_1500.pkl', 'rb') as f:
    dict_v21500_n = pickle.load(f)
with open('data_cubes/ATCA/hydra/coordiantes_hydra_v2_1500.pkl', 'rb') as f:
    dict_v21500_h = pickle.load(f)


#ASKAP
with open('data_cubes/ASKAP/coordinates_norma.pkl', 'rb') as f:
    dict_n_askap = pickle.load(f)
with open('data_cubes/ASKAP/coordinates_hydra.pkl', 'rb') as f:
    dict_h_askap = pickle.load(f)

## Name assignation 

In [11]:
# Assign source names using the function
df_h1000_atca = assign_source_names(df_h1000_atca, dict_v21000_h,
                                x_col='x_pos', y_col='y_pos',
                                new_col='source', default_value='Unknown')
# Assign source names using the function
df_h1500_atca = assign_source_names(df_h1500_atca, dict_v21500_h,
                                x_col='x_pos', y_col='y_pos',
                                new_col='source', default_value='Unknown')

# Assign source names using the function
df_hydra4k_atca = assign_source_names(df_hydra4k_atca, dict_4k_h,
                                x_col='x_pos', y_col='y_pos',
                                new_col='source', default_value='Unknown')

In [12]:
# Assign source names using the function
df_norma4k_atca = assign_source_names(df_norma4k_atca, dict_4k_n,
                                x_col='x_pos', y_col='y_pos',
                                new_col='source', default_value='Unknown')
df_n1000_atca = assign_source_names(df_n1000_atca, dict_v21000_n,
                                x_col='x_pos', y_col='y_pos',
                                new_col='source', default_value='Unknown')
df_n1500_atca = assign_source_names(df_n1500_atca, dict_v21500_n,
                                x_col='x_pos', y_col='y_pos',
                                new_col='source', default_value='Unknown')

In [13]:
df_hydra_askap = assign_source_names(df_hydra_askap, dict_h_askap,
                                x_col='x_pos', y_col='y_pos',
                                new_col='source', default_value='Unknown')
df_norma_askap = assign_source_names(df_norma_askap, dict_n_askap,
                                x_col='x_pos', y_col='y_pos',
                                new_col='source', default_value='Unknown')

## VLSR and Amplitude correction

In [14]:
df_h1000_atca = modify_vlsr(df_h1000_atca, dict_v21000_h, -112.9290692)
df_h1500_atca = modify_vlsr(df_h1500_atca, dict_v21500_h, -140.6637217)
df_hydra4k_atca = modify_vlsr(df_hydra4k_atca, dict_4k_h, -185.6866523)

df_norma4k_atca = modify_vlsr(df_norma4k_atca, dict_4k_n, -189.3374977)
df_n1000_atca = modify_vlsr(df_n1000_atca, dict_v21000_n, -119.4194126)
df_n1500_atca = modify_vlsr(df_n1500_atca, dict_v21500_n, -146.5113794)

In [15]:
df_n1000_atca[(df_n1000_atca['x_pos']==2) & (df_n1000_atca['y_pos']==1)]

Unnamed: 0,source,x_pos,y_pos,RA,DEC,amp,e_amp,VLSR,e_VLSR,vel_disp,...,pvalue,aicc,rchi2,ncomps,ncomp_wmedian,ncomp_jumps,flag_blended,flag_neg_res_peak,flag_broad,flag_centroid
6,j164039-6004,2,1,,,2.6017,0.2529,-18.599987,0.0496,0.4431,...,0.0,-829.9998,2.5672,2,0,0,0,0,0,0
7,j164039-6004,2,1,,,1.4722,0.061,-1.500887,0.3658,7.6317,...,0.0,-829.9998,2.5672,2,0,0,0,1,1,0


In [16]:
remove_norma_v2_1000 = [(2,1)]
remove_norma_v2_1500 = [(0,5),(2,5)]

remove_hydra_v2_1000 = [(2,0),(1,1)]
remove_hydra_4k = [(2,0)]


In [17]:
df_n1000_atca = remove_rows_by_tuples(remove_norma_v2_1000, df_n1000_atca)
df_n1500_atca = remove_rows_by_tuples(remove_norma_v2_1500, df_n1500_atca)
df_h1000_atca = remove_rows_by_tuples(remove_norma_v2_1000, df_h1000_atca)
df_hydra4k_atca = remove_rows_by_tuples(remove_hydra_4k, df_hydra4k_atca)

In [18]:
df_hydraV2_atca = pd.concat([df_h1000_atca, df_h1500_atca ], axis = 0, ignore_index = True)

df_normaV2_atca = pd.concat([df_n1000_atca, df_n1500_atca], axis = 0, ignore_index = True)

In [None]:
df_hydra4k_atca = restore_amplitudes_df(df_hydra4k_atca, 
             'data/ATCA_HI_spectra/Hydra/', 
             category = "atca_4k",
             plotting = False)

df_hydraV2_atca = restore_amplitudes_df(df_hydraV2_atca, 
             'data/ATCA_HI_spectra/Hydra/', 
             category = "atca_v2",
             plotting = False)

df_normaV2_atca = restore_amplitudes_df(df_normaV2_atca, 
             'data/ATCA_HI_spectra/Norma/', 
             category = "atca_v2",
             plotting = False)

We didn't apply a gaussian smoothing for norma 4km/s from ATCA neither for ASKAP data

In [19]:
print('ASKAP - Hydra - number of spectral signals: ',len(df_hydra_askap))
print('ASKAP - Normal - number of spectral signals: ',len(df_norma_askap))
print('ATCA - Hydra - number of spectral signals v=4: ',len(df_hydra4k_atca))
print('ATCA - Hydra - number of spectral signals v=0.2: ',len(df_hydraV2_atca))
print('ATCA - Norma - number of spectral signals v=4: ',len(df_norma4k_atca))
print('ATCA - Norma - number of spectral signals v=0.2: ',len(df_normaV2_atca))

ASKAP - Hydra - number of spectral signals:  43
ASKAP - Normal - number of spectral signals:  325
ATCA - Hydra - number of spectral signals v=4:  15
ATCA - Hydra - number of spectral signals v=0.2:  26
ATCA - Norma - number of spectral signals v=4:  59
ATCA - Norma - number of spectral signals v=0.2:  41


In [20]:
print('Sources decomposed from ASKAP-Norma: ',df_norma_askap['source'].nunique())
print('Sources decomposed from ASKAP-Hydra: ',df_hydra_askap['source'].nunique())
print('Sources decomposed from ATCA-Norma 4km/s: ',df_norma4k_atca['source'].nunique())
print('Sources decomposed from ATCA-Norma 0.2km/s: ',df_normaV2_atca['source'].nunique())
print('Sources decomposed from ATCA-Hydra 4km/s: ',df_hydra4k_atca['source'].nunique())
print('Sources decomposed from ATCA-Hydra 0.2km/s: ',df_hydraV2_atca['source'].nunique())

Sources decomposed from ASKAP-Norma:  259
Sources decomposed from ASKAP-Hydra:  42
Sources decomposed from ATCA-Norma 4km/s:  40
Sources decomposed from ATCA-Norma 0.2km/s:  18
Sources decomposed from ATCA-Hydra 4km/s:  13
Sources decomposed from ATCA-Hydra 0.2km/s:  17


## Coordinates assignation

In [21]:
# Renaming the columns
df_hydraV2_atca.rename(columns={
    'x_pos': 'ra (deg)',
    'y_pos': 'dec (deg)',
    'RA': 'ra (hms)',
    'DEC': 'dec (dms)'
}, inplace=True)

df_hydra4k_atca.rename(columns={
    'x_pos': 'ra (deg)',
    'y_pos': 'dec (deg)',
    'RA': 'ra (hms)',
    'DEC': 'dec (dms)'
}, inplace=True)

# Renaming the columns
df_normaV2_atca.rename(columns={
    'x_pos': 'ra (deg)',
    'y_pos': 'dec (deg)',
    'RA': 'ra (hms)',
    'DEC': 'dec (dms)'
}, inplace=True)

df_norma4k_atca.rename(columns={
    'x_pos': 'ra (deg)',
    'y_pos': 'dec (deg)',
    'RA': 'ra (hms)',
    'DEC': 'dec (dms)'
}, inplace=True)

In [22]:
sources_h = pd.read_csv(f'data/ATCA_HI_spectra/hydra_coords.csv', index_col=0)

In [23]:
sources_n = pd.read_csv(f'data/ATCA_HI_spectra/norma_coords.csv', index_col=0)

In [24]:
# Example usage
df_normaV2_atca = update_fixed_columns(
    df_main=df_normaV2_atca,   # The main DataFrame
    df_update=sources_n,       # The DataFrame with updated values
    common_column='source'     # The common column to match rows
)
df_norma4k_atca = update_fixed_columns(
    df_main=df_norma4k_atca,   # The main DataFrame
    df_update=sources_n,       # The DataFrame with updated values
    common_column='source'     # The common column to match rows
)

In [25]:
df_hydraV2_atca = update_fixed_columns(
    df_main=df_hydraV2_atca,   # The main DataFrame
    df_update=sources_h,       # The DataFrame with updated values
    common_column='source'     # The common column to match rows
)

df_hydra4k_atca = update_fixed_columns(
    df_main=df_hydra4k_atca,   # The main DataFrame
    df_update=sources_h,       # The DataFrame with updated values
    common_column='source'     # The common column to match rows
)

In [26]:
df_normaV2_atca['source'].nunique()

18

## Coordinates ASKAP

In [27]:
df_hydra_askap = modify_vlsr(df_hydra_askap, dict_h_askap, -446.171913620222)
df_norma_askap = modify_vlsr(df_norma_askap, dict_n_askap, -437.311662453059)

In [28]:
df_norma_askap[df_norma_askap['VLSR'] > 150]

Unnamed: 0,source,x_pos,y_pos,RA,DEC,amp,e_amp,VLSR,e_VLSR,vel_disp,...,pvalue,aicc,rchi2,ncomps,ncomp_wmedian,ncomp_jumps,flag_blended,flag_neg_res_peak,flag_broad,flag_centroid
13,161717.9-584807,8,0,,,0.0122,0.0073,214.601544,2.2893,2.4252,...,0.0,-1549.3005,5.4834,5,1,7,0,0,0,0


In [29]:
remove_norma=[(8,0)]

In [30]:
df_norma_askap = remove_rows_by_tuples(remove_norma, df_norma_askap)

In [31]:
# Renaming the columns
df_hydra_askap.rename(columns={
    'x_pos': 'ra (deg)',
    'y_pos': 'dec (deg)',
    'RA': 'ra (hms)',
    'DEC': 'dec (dms)'
}, inplace=True)

df_norma_askap.rename(columns={
    'x_pos': 'ra (deg)',
    'y_pos': 'dec (deg)',
    'RA': 'ra (hms)',
    'DEC': 'dec (dms)'
}, inplace=True)


In [32]:
# Assuming the 'ASKAP_spectra/Hydra/' directory path
directory_path_hydra = 'data/ASKAP_spectra/Hydra/'
directory_path_norma = 'data/ASKAP_spectra/Norma/'

# List all filenames in the directory
filenames_hydra = [f for f in os.listdir(directory_path_hydra) if f.endswith('_askap_spectrum.txt')]
filenames_norma = [f for f in os.listdir(directory_path_norma) if f.endswith('_askap_spectrum.txt')]

# Function to convert RA in HMS to degrees
def ra_hms_to_deg(h, m, s):
    return 15 * (int(h) + int(m)/60 + float(s)/3600)

# Function to convert Dec in DMS to degrees
def dec_dms_to_deg(sign, d, m, s):
    dec_deg = int(d) + int(m)/60 + float(s)/3600
    return -dec_deg if sign == '-' else dec_deg

# Function to extract the source, RA, and Dec from the filename
def extract_coordinates(filename):
    # Extract the RA and Dec from the filename
    name = filename.split('_')[0]
    
    # Source name (optional)
    source = name[:15]  # first 6 characters as example source
    
    # Right Ascension (RA)
    ra_hours = name[:2]
    ra_minutes = name[2:4]
    ra_seconds = name[4:8]
    ra_hms = f"{ra_hours}:{ra_minutes}:{ra_seconds}"
    ra_deg = ra_hms_to_deg(ra_hours, ra_minutes, ra_seconds)
    
    # Declination (Dec)
    sign = name[8]
    dec_degrees = name[9:11]
    dec_arcminutes = name[11:13]
    dec_arcseconds = name[13:15]
    dec_dms = f"{sign}{dec_degrees}:{dec_arcminutes}:{dec_arcseconds}"
    dec_deg = dec_dms_to_deg(sign, dec_degrees, dec_arcminutes, dec_arcseconds)
    
    return source, ra_deg, dec_deg, ra_hms, dec_dms


In [33]:
# Create a list of tuples with source, RA in deg, Dec in deg, RA in HMS, Dec in DMS
coordinates_hydra = [extract_coordinates(filename) for filename in filenames_hydra]
coordinates_norma = [extract_coordinates(filename) for filename in filenames_norma]


# Create the DataFrame with the desired columns
df_hydra_askap_names = pd.DataFrame(coordinates_hydra, columns=['source', 'ra (deg)', 'dec (deg)', 'ra (hms)', 'dec (dms)'])
df_norma_askap_names = pd.DataFrame(coordinates_norma, columns=['source', 'ra (deg)', 'dec (deg)', 'ra (hms)', 'dec (dms)'])

In [34]:
df_hydra_askap = update_fixed_columns(
    df_main=df_hydra_askap,   # The main DataFrame
    df_update=df_hydra_askap_names,       # The DataFrame with updated values
    common_column='source'     # The common column to match rows
)

df_norma_askap = update_fixed_columns(
    df_main=df_norma_askap,   # The main DataFrame
    df_update=df_norma_askap_names,       # The DataFrame with updated values
    common_column='source'     # The common column to match rows
)

## Save data

In [35]:
# Save DataFrames as pickle files with appropriate names
df_norma_askap.to_pickle('decomposition_results/ASKAP/norma_askap_final.pkl')
df_hydra_askap.to_pickle('decomposition_results/ASKAP/hydra_askap_final.pkl')
df_normaV2_atca.to_pickle('decomposition_results/ATCA/norma_atca_0.2kms_final.pkl')
df_norma4k_atca.to_pickle('decomposition_results/ATCA/norma_atca_4kms_final.pkl')
df_hydraV2_atca.to_pickle('decomposition_results/ATCA/hydra_atca_0.2kms_final.pkl')
df_hydra4k_atca.to_pickle('decomposition_results/ATCA/hydra_atca_4kms_final.pkl')
