In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from spectral_cube import SpectralCube
from matplotlib.backends.backend_pdf import PdfPages
import os
from scipy.interpolate import interp1d


In [2]:
df_norma_askap = pd.read_pickle('decomposition_results/ASKAP/norma_askap_final.pkl')
df_hydra_askap = pd.read_pickle('decomposition_results/ASKAP/hydra_askap_final.pkl')
df_normaV2_atca = pd.read_pickle('decomposition_results/ATCA/norma_atca_0.2kms_final.pkl')
df_norma4k_atca = pd.read_pickle('decomposition_results/ATCA/norma_atca_4kms_final.pkl')
df_hydraV2_atca = pd.read_pickle('decomposition_results/ATCA/hydra_atca_0.2kms_final.pkl')
df_hydra4k_atca = pd.read_pickle('decomposition_results/ATCA/hydra_atca_4kms_final.pkl')

### Functions

In [3]:
def match_sources(df1, df2):
    # Round 'ra (deg)' and 'dec (deg)' columns to 2 decimal places
    df1_rounded = df1.copy()
    df2_rounded = df2.copy()
    
    df1_rounded['ra (deg)'] = df1_rounded['ra (deg)'].round(2)
    df1_rounded['dec (deg)'] = df1_rounded['dec (deg)'].round(2)
    
    df2_rounded['ra (deg)'] = df2_rounded['ra (deg)'].round(2)
    df2_rounded['dec (deg)'] = df2_rounded['dec (deg)'].round(2)
    
    # Merge on 'ra (deg)' and 'dec (deg)'
    merged_df = pd.merge(df1_rounded, df2_rounded, 
                         on=['ra (deg)', 'dec (deg)'], 
                         suffixes=('_df1', '_df2'), 
                         how='inner')
    
    # Return the tuple of matched 'source' columns from both DataFrames
    return list(zip(merged_df['source_df1'], merged_df['source_df2']))


In [4]:
# Function to calculate a single Gaussian component
def gaussian(x, amplitude, mean, stddev):
    return amplitude * np.exp(-0.5 * ((x - mean) / stddev)**2)


In [5]:
def tau_calculation(velocity, amplitude):
        # Measure variability (RMS)
    rms_spect = np.sqrt(np.mean(amplitude**2))
    
    # Mask signal channels
    spectrum = np.copy(amplitude)
    spectrum[spectrum < rms_spect - (np.max(spectrum) - rms_spect)] = rms_spect
    
    # Normalization
    y = spectrum
    x = velocity
    pars = np.polyfit(x, y, 1)
    p = np.poly1d(pars)
    tauhi = np.array(amplitude / p(x))
    
    tau = np.log(tauhi) * (-1)
    
    # Check for NaNs in tau and interpolate missing values
    if np.isnan(tau).any():
        # Get indices of valid and invalid (NaN) values
        valid_idx = np.where(~np.isnan(tau))[0]
        invalid_idx = np.where(np.isnan(tau))[0]
        
        # Interpolate only over the valid points
        interpolator = interp1d(x[valid_idx], tau[valid_idx], kind='linear', fill_value='extrapolate')
        
        # Apply interpolation to the NaN values
        tau[invalid_idx] = interpolator(x[invalid_idx])

    # Return the tau values with NaNs handled
    return tau

In [16]:
def plot_spectra(file_pairs, region, df_1, df_2, df_3,save_as_pdf=False, pdf_filename='test.pdf'):
    # Base directories for the ASKAP and ATCA files
    askap_dir = f'data/ASKAP_spectra/{region}/'
    atca_dir = f'data/ATCA_HI_spectra/{region}/'
    
    # If saving as PDF, create a PdfPages object
    if save_as_pdf:
        pdf = PdfPages(pdf_filename)
    
    # Loop over each file pair (tuple) in the list
    for askap_file, atca_file in file_pairs:
        # Construct the full paths for the ASKAP and ATCA files
        askap_path = os.path.join(askap_dir, f'{askap_file}_askap_spectrum.txt')
        atca_path = os.path.join(atca_dir, f'{atca_file}_cube_4k_atca_4k_spectrum.txt')
        atca_path_v2 = os.path.join(atca_dir, f'{atca_file}_cube_v2_atca_spectrum.txt')  # New path for ATCA v2

        source_df_1 = df_1[df_1['source'] == askap_file]
        source_df_2 = df_2[df_2['source'] == atca_file]
        source_df_3 = df_3[df_3['source'] == atca_file]

        # Load the ASKAP data
        askap_data = np.loadtxt(askap_path, skiprows=1)
        askap_velocity = askap_data[:, 1]  # velocity column
        askap_amplitude = askap_data[:, 2]  # amplitude column
        askap_amplitude = tau_calculation(askap_velocity, askap_amplitude)

        # Load the ATCA data
        atca_data = np.loadtxt(atca_path, skiprows=1)
        atca_velocity = atca_data[:, 1]  # velocity column
        atca_amplitude = atca_data[:, 2]  # amplitude column
        atca_amplitude = tau_calculation(atca_velocity, atca_amplitude)

        # Check if the third file (v2) exists
        v2_exists = os.path.exists(atca_path_v2)

        if v2_exists:
            atca_data_v2 = np.loadtxt(atca_path_v2, skiprows=1)
            atca_velocity_v2 = atca_data_v2[:, 1]  # velocity column
            atca_amplitude_v2 = atca_data_v2[:, 2]  # amplitude column
            atca_amplitude_v2 = tau_calculation(atca_velocity_v2, atca_amplitude_v2)

            # Create the plot with three panels
            fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(8, 9))  # Add a third panel
        else:
            # Create the plot with two panels if the third file doesn't exist
            fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6))

        # Plot each Gaussian component and the total fit in the upper panel (ax1)
        total_fit_1 = np.zeros_like(askap_velocity)
        ncomps1 = 0
        for _, row in source_df_1.iterrows():
            amplitude_1, mean_1, vel_disp_1 = row['amp'], row['VLSR'], row['vel_disp']
            stddev_1 = vel_disp_1  # Assuming vel_disp is the velocity dispersion (σ)
            total_fit_1 += gaussian(askap_velocity, amplitude_1, mean_1, stddev_1)
            ncomps1 += 1
            ax1.plot(askap_velocity, gaussian(askap_velocity, amplitude_1, mean_1, stddev_1), linestyle='-', lw=0.8, color='red')

        # Plot for ASKAP data
        ax1.plot(askap_velocity, askap_amplitude, label=f'ASKAP: {askap_file}', color='k', lw=0.8)
        ax1.plot(askap_velocity, total_fit_1, color='r', lw=1.5)
        ax1.axhline(0, color='k', linestyle='--', linewidth=1)
        ax1.set_xlim(-70, 70)
        ax1.set_ylabel(r'$\tau$')
        ax1.set_title(f'ASKAP Spectrum - {askap_file}' r'$\quad N_{comp}=$' f'{ncomps1}')
        ax1.grid(True)

        # Plot each Gaussian component and the total fit in the second panel (ax2)
        total_fit_2 = np.zeros_like(atca_velocity)
        ncomps2 = 0
        for _, row in source_df_2.iterrows():
            amplitude_2, mean_2, vel_disp_2 = row['amp'], row['VLSR'], row['vel_disp']
            stddev_2 = vel_disp_2
            total_fit_2 += gaussian(atca_velocity, amplitude_2, mean_2, stddev_2)
            ncomps2 += 1
            ax2.plot(atca_velocity, gaussian(atca_velocity, amplitude_2, mean_2, stddev_2), linestyle='-', lw=0.8, color='red')

        # Plot for ATCA data
        ax2.plot(atca_velocity, atca_amplitude, label=f'ATCA: {atca_file}', color='k', lw=0.8)
        ax2.plot(atca_velocity, total_fit_2, color='r', lw=1.5)
        ax2.axhline(0, color='k', linestyle='--', linewidth=1)
        ax2.set_xlim(-70, 70)
        ax2.set_xlabel('Velocity (km/s)')
        ax2.set_ylabel(r'$\tau$')
        ax2.set_title(f'ATCA Spectrum - {atca_file}' r'$\quad N_{comp}=$' f'{ncomps2}')
        ax2.grid(True)

        # If the v2 file exists, plot it in the third panel
        if v2_exists:
            total_fit_3 = np.zeros_like(atca_velocity_v2)
            ncomps3 = 0
            for _, row in source_df_3.iterrows():
                amplitude_3, mean_3, vel_disp_3 = row['amp'], row['VLSR'], row['vel_disp']
                stddev_3 = vel_disp_3
                total_fit_3 += gaussian(atca_velocity_v2, amplitude_3, mean_3, stddev_3)
                ncomps3 += 1
                ax3.plot(atca_velocity_v2, gaussian(atca_velocity_v2, amplitude_3, mean_3, stddev_3), linestyle='-', lw=0.8, color='red')
            ax3.plot(atca_velocity_v2, atca_amplitude_v2, label=f'ATCA v2: {atca_file}', color='k', lw=0.8)
            ax3.plot(atca_velocity_v2, total_fit_3, color='r', lw=1.5)
            ax3.axhline(0, color='k', linestyle='--', linewidth=1)
            ax3.set_xlim(-70, 70)
            ax3.set_xlabel('Velocity (km/s)')
            ax3.set_ylabel(r'$\tau$')
            ax3.set_title(f'ATCA v2 Spectrum - {atca_file}' r'$\quad N_{comp}=$' f'{ncomps3}')
            ax3.grid(True)

        # Show the plot
        plt.tight_layout()
        if save_as_pdf:
            pdf.savefig(fig)  # Save the current figure to the PDF
        else:
            plt.show()
        
        plt.close(fig)

    # Close the PdfPages object if saving as PDF
    if save_as_pdf:
        pdf.close()


### Data

In [7]:
list_common_norma = match_sources(df_norma_askap, df_norma4k_atca)
list_common_norma = list(set(list_common_norma))

list_common_hydra = match_sources(df_hydra_askap, df_hydra4k_atca)
list_common_hydra = list(set(list_common_hydra))

In [17]:
plot_spectra(
    list_common_norma, 
    'Norma',
    df_norma_askap, 
    df_norma4k_atca,
    df_normaV2_atca, 
    save_as_pdf = True,
    pdf_filename='norma_field.pdf')  # Specify 'Norma' or 'Hydra' as the region

  tau = np.log(tauhi) * (-1)


In [18]:
plot_spectra(
    list_common_hydra, 
    'Hydra',
    df_hydra_askap, 
    df_hydra4k_atca,
    df_hydraV2_atca, 
    save_as_pdf = True,
    pdf_filename='hydra_field.pdf')  # Specify 'Norma' or 'Hydra' as the region