In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from spectral_cube import SpectralCube
from matplotlib.backends.backend_pdf import PdfPages

In [2]:
df_norma_askap = pd.read_pickle('RESULTS/norma_askap.pkl')
df_hydra_askap = pd.read_pickle('RESULTS/hydra_askap.pkl')
df_normaV2_atca = pd.read_pickle('RESULTS/norma_atca_0.2kms.pkl')
df_norma4k_atca = pd.read_pickle('RESULTS/norma_atca_4kms.pkl')
df_hydraV2_atca = pd.read_pickle('RESULTS/hydra_atca_0.2kms.pkl')
df_hydra4k_atca = pd.read_pickle('RESULTS/hydra_atca_4kms.pkl')

### Functions

In [3]:
def match_sources(df1, df2):
    # Round 'ra (deg)' and 'dec (deg)' columns to 2 decimal places
    df1_rounded = df1.copy()
    df2_rounded = df2.copy()
    
    df1_rounded['ra (deg)'] = df1_rounded['ra (deg)'].round(2)
    df1_rounded['dec (deg)'] = df1_rounded['dec (deg)'].round(2)
    
    df2_rounded['ra (deg)'] = df2_rounded['ra (deg)'].round(2)
    df2_rounded['dec (deg)'] = df2_rounded['dec (deg)'].round(2)
    
    # Merge on 'ra (deg)' and 'dec (deg)'
    merged_df = pd.merge(df1_rounded, df2_rounded, 
                         on=['ra (deg)', 'dec (deg)'], 
                         suffixes=('_df1', '_df2'), 
                         how='inner')
    
    # Return the tuple of matched 'source' columns from both DataFrames
    return list(zip(merged_df['source_df1'], merged_df['source_df2']))


In [4]:
# Function to calculate a single Gaussian component
def gaussian(x, amplitude, mean, stddev):
    return amplitude * np.exp(-0.5 * ((x - mean) / stddev)**2)


In [5]:
def tau_calculation(velocity, amplitude):
        # Measure variability (RMS)
    rms_spect = np.sqrt(np.mean(amplitude**2))
    
    # Mask signal channels
    spectrum = np.copy(amplitude)
    spectrum[spectrum < rms_spect - (np.max(spectrum) - rms_spect)] = rms_spect
    
    # Normalization
    y = spectrum
    x = velocity
    pars = np.polyfit(x, y, 1)
    p = np.poly1d(pars)
    tauhi = np.array(amplitude / p(x))
    
    # Calculate tau
    return np.log(tauhi) * (-1)

In [6]:
import numpy as np
import matplotlib.pyplot as plt
import os

def plot_spectra(file_pairs, region, df_1, df_2, df_3,save_as_pdf=False, pdf_filename='test.pdf'):
    # Base directories for the ASKAP and ATCA files
    askap_dir = f'ASKAP_spectra/{region}/'
    atca_dir = f'ATCA_HI_spectra/{region}/'
    
    # If saving as PDF, create a PdfPages object
    if save_as_pdf:
        pdf = PdfPages(pdf_filename)
    
    # Loop over each file pair (tuple) in the list
    for askap_file, atca_file in file_pairs:
        # Construct the full paths for the ASKAP and ATCA files
        askap_path = os.path.join(askap_dir, f'{askap_file}_askap_spectrum.txt')
        atca_path = os.path.join(atca_dir, f'{atca_file}_cube_4k_atca_4k_spectrum.txt')
        atca_path_v2 = os.path.join(atca_dir, f'{atca_file}_cube_v2_atca_spectrum.txt')  # New path for ATCA v2

        source_df_1 = df_1[df_1['source'] == askap_file]
        source_df_2 = df_2[df_2['source'] == atca_file]
        source_df_3 = df_3[df_3['source'] == atca_file]

        # Load the ASKAP data
        askap_data = np.loadtxt(askap_path, skiprows=1)
        askap_velocity = askap_data[:, 1]  # velocity column
        askap_amplitude = askap_data[:, 2]  # amplitude column
        askap_amplitude = tau_calculation(askap_velocity, askap_amplitude)

        # Load the ATCA data
        atca_data = np.loadtxt(atca_path, skiprows=1)
        atca_velocity = atca_data[:, 1]  # velocity column
        atca_amplitude = atca_data[:, 2]  # amplitude column
        atca_amplitude = tau_calculation(atca_velocity, atca_amplitude)

        # Check if the third file (v2) exists
        v2_exists = os.path.exists(atca_path_v2)

        if v2_exists:
            atca_data_v2 = np.loadtxt(atca_path_v2, skiprows=1)
            atca_velocity_v2 = atca_data_v2[:, 1]  # velocity column
            atca_amplitude_v2 = atca_data_v2[:, 2]  # amplitude column
            atca_amplitude_v2 = tau_calculation(atca_velocity_v2, atca_amplitude_v2)

            # Create the plot with three panels
            fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(8, 9))  # Add a third panel
        else:
            # Create the plot with two panels if the third file doesn't exist
            fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6))

        # Plot each Gaussian component and the total fit in the upper panel (ax1)
        total_fit_1 = np.zeros_like(askap_velocity)
        ncomps1 = 0
        for _, row in source_df_1.iterrows():
            amplitude_1, mean_1, vel_disp_1 = row['amp'], row['VLSR'], row['vel_disp']
            stddev_1 = vel_disp_1  # Assuming vel_disp is the velocity dispersion (σ)
            total_fit_1 += gaussian(askap_velocity, amplitude_1, mean_1, stddev_1)
            ncomps1 += 1
            ax1.plot(askap_velocity, gaussian(askap_velocity, amplitude_1, mean_1, stddev_1), linestyle='-', lw=0.8, color='red')

        # Plot for ASKAP data
        ax1.plot(askap_velocity, askap_amplitude, label=f'ASKAP: {askap_file}', color='k', lw=0.8)
        ax1.plot(askap_velocity, total_fit_1, color='r', lw=1.5)
        ax1.axhline(0, color='k', linestyle='--', linewidth=1)
        ax1.set_xlim(-150, 150)
        ax1.set_ylabel(r'$\tau$')
        ax1.set_title(f'ASKAP Spectrum - {askap_file}' r'$\quad N_{comp}=$' f'{ncomps1}')
        ax1.grid(True)

        # Plot each Gaussian component and the total fit in the second panel (ax2)
        total_fit_2 = np.zeros_like(atca_velocity)
        ncomps2 = 0
        for _, row in source_df_2.iterrows():
            amplitude_2, mean_2, vel_disp_2 = row['amp'], row['VLSR'], row['vel_disp']
            stddev_2 = vel_disp_2
            total_fit_2 += gaussian(atca_velocity, amplitude_2, mean_2, stddev_2)
            ncomps2 += 1
            ax2.plot(atca_velocity, gaussian(atca_velocity, amplitude_2, mean_2, stddev_2), linestyle='-', lw=0.8, color='red')

        # Plot for ATCA data
        ax2.plot(atca_velocity, atca_amplitude, label=f'ATCA: {atca_file}', color='k', lw=0.8)
        ax2.plot(atca_velocity, total_fit_2, color='r', lw=1.5)
        ax2.axhline(0, color='k', linestyle='--', linewidth=1)
        ax2.set_xlim(-150, 150)
        ax2.set_xlabel('Velocity (km/s)')
        ax2.set_ylabel(r'$\tau$')
        ax2.set_title(f'ATCA Spectrum - {atca_file}' r'$\quad N_{comp}=$' f'{ncomps2}')
        ax2.grid(True)

        # If the v2 file exists, plot it in the third panel
        if v2_exists:
            total_fit_3 = np.zeros_like(atca_velocity_v2)
            ncomps3 = 0
            for _, row in source_df_3.iterrows():
                amplitude_3, mean_3, vel_disp_3 = row['amp'], row['VLSR'], row['vel_disp']
                stddev_3 = vel_disp_3
                total_fit_3 += gaussian(atca_velocity_v2, amplitude_3, mean_3, stddev_3)
                ncomps3 += 1
                ax3.plot(atca_velocity_v2, gaussian(atca_velocity_v2, amplitude_3, mean_3, stddev_3), linestyle='-', lw=0.8, color='red')
            ax3.plot(atca_velocity_v2, atca_amplitude_v2, label=f'ATCA v2: {atca_file}', color='k', lw=0.8)
            ax3.plot(atca_velocity_v2, total_fit_3, color='r', lw=1.5)
            ax3.axhline(0, color='k', linestyle='--', linewidth=1)
            ax3.set_xlim(-150, 150)
            ax3.set_xlabel('Velocity (km/s)')
            ax3.set_ylabel(r'$\tau$')
            ax3.set_title(f'ATCA v2 Spectrum - {atca_file}' r'$\quad N_{comp}=$' f'{ncomps3}')
            ax3.grid(True)

        # Show the plot
        plt.tight_layout()
        if save_as_pdf:
            pdf.savefig(fig)  # Save the current figure to the PDF
        else:
            plt.show()
        
        plt.close(fig)

    # Close the PdfPages object if saving as PDF
    if save_as_pdf:
        pdf.close()


In [7]:
def get_spectral_data(file_path, y_pixel, x_pixel):
    """
    Function to extract the spectral axis (x) and amplitude (spectrum) for a given pixel.
    
    Parameters:
    file_path (str): Path to the FITS file.
    y_pixel (int): The y-coordinate (spatial) in the cube.
    x_pixel (int): The x-coordinate (spatial) in the cube.

    Returns:
    x (numpy array): The spectral axis (e.g., velocity in km/s).
    amplitudes (numpy array): The spectrum (amplitude values) for the given pixel.
    """
    
    # Read the cube using SpectralCube
    cube = SpectralCube.read(file_path)
    
    # Extract the spectral axis (velocity in km/s)
    x = cube.spectral_axis.value  # The x-axis, typically in km/s

    # Extract the amplitude (spectrum) for the specific pixel (y_pixel, x_pixel)
    amplitudes = cube.unmasked_data[:, y_pixel, x_pixel]
    
    return x, amplitudes


In [8]:
import numpy as np
import os
from astropy.convolution import convolve, Gaussian1DKernel

def process_spectrum(file_path, gkernel):
    # Load data from the file, skipping the first row
    data = np.loadtxt(file_path, skiprows=1)

    # Extract velocity and amplitude columns
    velocity = data[:, 1]
    amplitude = data[:, 2]
    
    # Measure variability (RMS)
    rms_spect = np.sqrt(np.mean(amplitude**2))
    
    # Mask signal channels
    spectrum = np.copy(amplitude)
    spectrum[spectrum < rms_spect - (np.max(spectrum) - rms_spect)] = rms_spect
    
    # Normalization
    y = spectrum
    x = velocity
    pars = np.polyfit(x, y, 1)
    p = np.poly1d(pars)
    tauhi = np.array(amplitude / p(x))
    
    # Calculate tau
    tau = np.log(tauhi) * (-1)
    rms = np.std(tau)
    
    # Smoothing with Gaussian Kernel
    g = Gaussian1DKernel(gkernel)
    tau_smooth = convolve(tau, g, boundary='extend')
    
    return tau_smooth, x

def process_files_by_common_name(directory, common_name, common_name2 = None, direcotry2 = None, gkernel_4k=1., gkernel_v2=1.,):
    # Search for files that start with the common name in the given directory
    
    files = [f for f in os.listdir(directory) if f.startswith(common_name) and f.endswith(".txt")]
    if common_name2 is not None:
        files2 = [f for f in os.listdir(directory2) if f.startswith(common_name2) and f.endswith(".txt")]
    #if len(files) != 2:
     #   raise ValueError(f"Expected 2 files with common name '{common_name}', but found {len(files)}.")
    
    file_4k = None
    file_v2 = None
    
    # Determine which file corresponds to _cube_4k and which to _v2
    for file in files:
        if '_cube_4k' in file:
            file_4k = os.path.join(directory, file)
            if directory2 is None:
                file_v2 = os.path.join(directory, file)
    if files2 is not None:
        for file in files2:
            file_v2 = os.path.join(directory2, file)
    
    if file_4k is None or file_v2 is None:
        raise ValueError("Could not find both _cube_4k and _v2 files.")
    
    # Process the files
    tau_smooth_4k, x_4k = process_spectrum(file_4k, gkernel_4k)
    tau_smooth_v2, x_v2 = process_spectrum(file_v2, gkernel_v2)
    
    return tau_smooth_4k, x_4k, tau_smooth_v2, x_v2



### Data

In [9]:
list_common_norma = match_sources(df_norma_askap, df_norma4k_atca)
list_common_norma = list(set(list_common_norma))

list_common_hydra = match_sources(df_hydra_askap, df_hydra4k_atca)
list_common_hydra = list(set(list_common_hydra))

In [10]:
plot_spectra(
    list_common_norma, 
    'Norma',
    df_norma_askap, 
    df_norma4k_atca,
    df_normaV2_atca, 
    save_as_pdf = True,
    pdf_filename='norma_field.pdf')  # Specify 'Norma' or 'Hydra' as the region

  return np.log(tauhi) * (-1)


In [11]:
plot_spectra(
    list_common_hydra, 
    'Hydra',
    df_hydra_askap, 
    df_hydra4k_atca,
    df_hydraV2_atca, 
    save_as_pdf = True,
    pdf_filename='hydra_field.pdf')  # Specify 'Norma' or 'Hydra' as the region

## Extra

In [None]:
def plot_all_spectra(directory, common_sources, df_1, df_2, output_pdf, direcotry2=None):
    # Calculate the number of rows and columns for subplots
    num_files = len(common_sources)
    num_rows = int(np.ceil(num_files / 3))  # 3 subplots per row
    # Create a single figure with multiple subplots
    fig, axes = plt.subplots(num_rows, 3, figsize=(18, 8*num_rows))
    axes = axes.flatten()  # Flatten the 2D array of axes for easier iteration

    # Iterate through matching files and create subplots for each file
    for i, common_name in enumerate(common_sources):
        source_df_1 = df_1[df_1['source_new'] == common_name]
        source_df_2 = df_2[df_2['source_new'] == common_name]
        common_name = df_1.loc[df_1['source_new'] == common_name, 'source'].values[0]
        common_name2 = df_2.loc[df_2['source_new'] == common_name, 'source'].values[0] 

        tau_1, x_1, tau_2, x_2 = process_files_by_common_name(directory, common_name, common_name2, directory2)
        # Read data from the text file into a NumPy array, skipping the header row
        
        # Create a nested grid within each subplot
        outer_ax = axes[i]
        gs = outer_ax.get_gridspec()
        outer_ax.remove()

        inner_gs = gs[i].subgridspec(2, 1, height_ratios=[1, 1], hspace=0.28)
        ax1 = fig.add_subplot(inner_gs[0])
        ax2 = fig.add_subplot(inner_gs[1])

        # Initialize an array to hold the total fit
        total_fit_1 = np.zeros_like(x_1)
        
        # Plot each Gaussian component and the total fit in the lower panel (ax2)
        ncomps1 = 0
        for _, row in source_df_1.iterrows():
            amplitude_1, mean_1, vel_disp_1 = row['amp'], row['VLSR'], row['vel_disp']
            stddev_1 = vel_disp_1  # Assuming vel_disp is the velocity dispersion (σ)
            total_fit_1 += gaussian(x_1, amplitude_1, mean_1, stddev_1)
            ncomps1 += 1
            ax1.plot(x_1, gaussian(x_1, amplitude_1, mean_1, stddev_1), linestyle='-', lw=0.8, color = 'red')
    
        # Plot the total fit in the lower panel (ax2)
        ax1.plot(x_1, tau_1, color='k', label='Original Spectrum', lw=0.8)
        ax1.plot(x_1, total_fit_1, color='red', label='Total Fit', lw=1.2)
        ax1.set_xlabel('Velocity (km/s)')
        ax1.set_ylabel('Amplitude')
        ax1.set_title(f'{common_name} - ATCA ' r'$\quad N_{comp}=$' f'{ncomps1}')
        #ax1.set_xlim(-150,150)
        # Initialize an array to hold the total fit
        total_fit_2 = np.zeros_like(x_2)
        
        # Plot each Gaussian component and the total fit in the lower panel (ax2)
        ncomps2 = 0
        for _, row in source_df_2.iterrows():
            amplitude_2, mean_2, vel_disp_2 = row['amp'], row['VLSR'], row['vel_disp']
            stddev_2 = vel_disp_2  # Assuming vel_disp is the velocity dispersion (σ)
            total_fit_2 += gaussian(x_2, amplitude_2, mean_2, stddev_2)
            ncomps2+=1
            ax2.plot(x_2, gaussian(x_2, amplitude_2, mean_2, stddev_2), linestyle='-', color='red', lw=0.8)
    
        # Plot the total fit in the lower panel (ax2)
        ax2.plot(x_2, tau_2, color='k', label='Original Spectrum', lw=0.8)
        ax2.plot(x_2, total_fit_2, color='red', label='Total Fit', lw=1.2)
        ax2.set_xlabel('Velocity (km/s)')
        ax2.set_ylabel('Amplitude')
        ax2.set_title(f'{common_name} - ASKAP ' r'$\quad N_{comp}=$' f'{ncomps2}')
        ax2.set_xlim(-200,300)
        # Title for the overall plot area
        #outer_ax.set_title(f'File: {file_name}')

    # Hide any remaining empty subplots, if any
    for j in range(num_files, num_rows * 3):
        fig.delaxes(axes[j])

    # Adjust layout and display the figure
    plt.tight_layout()
    if output_pdf is not None:
        plt.savefig(f'{output_pdf}.pdf', format='pdf')

    plt.show()

In [None]:
# 1. Extract the 'source' column from both DataFrames
sources_df1 = set(df_norma4k_atca['source'])
sources_df2 = set(df_normaV2_atca['source'])

# 2. Find the common sources between the two DataFrames
common_sources = sources_df1.intersection(sources_df2)

# 3. Display the common sources
print("Common source names:", common_sources)

In [None]:
# Example usage:
directory = 'ATCA_HI_spectra/Norma/'
output_pdf = 'norma_atca_gaussian_decompositions.pdf'

plot_all_spectra(directory, common_sources, df_norma4k_atca, df_normaV2_atca,  output_pdf)

In [None]:
# 1. Extract the 'source' column from both DataFrames
sources_df1 = set(df_hydra4k_atca['source'])
sources_df2 = set(df_hydraV2_atca['source'])

# 2. Find the common sources between the two DataFrames
common_sources = sources_df1.intersection(sources_df2)

# 3. Display the common sources
print("Common source names:", common_sources)

In [None]:
# Example usage:
directory = 'ATCA_HI_spectra/Hydra/'
output_pdf = 'hydra_atca_gaussian_decompositions.pdf'

plot_all_spectra(directory, common_sources, df_hydra4k_atca, df_hydraV2_atca,  output_pdf)

In [None]:
# 1. Extract the 'source' column from both DataFrames
sources_df1 = set(df_hydra4k_atca['source'])
sources_df2 = set(df_hydra_askap['source'])
def normalize_source_format(source):
    """
    Normalize source format by removing the 'j' if present and truncating to match the desired format.
    """
    if source.startswith('j'):
        # Remove the 'j' and truncate to four decimal places
        return source[0:7] + '-' + source[8:13]
    else:
        # Remove decimal points and truncate to four decimal places
        return 'j'+ source[:6] + '-' + source[9:-2]

# 1. Normalize the 'source' column from both DataFrames
normalized_sources_df1 = {normalize_source_format(source) for source in sources_df1}
normalized_sources_df2 = {normalize_source_format(source) for source in sources_df2}
# 2. Find the common sources between the two DataFrames
common_sources = normalized_sources_df1.intersection(normalized_sources_df2)

# 3. Display the common sources
print("Common source names:", common_sources)

In [None]:
# Assuming df_hydra_askap is your DataFrame
df_hydra_askap['source_new'] = df_hydra_askap['source'].apply(lambda source: 'j' + source[:6] + '-' + source[9:-2])
df_hydra4k_atca['source_new'] = df_hydra4k_atca['source'].apply(lambda source: source[0:7] + '-' + source[8:13])



In [None]:
# 1. Extract the 'source' column from both DataFrames
sources_df1 = set(df_hydra4k_atca['source_new'])
sources_df2 = set(df_hydra_askap['source_new'])

# 2. Find the common sources between the two DataFrames
common_sources = sources_df1.intersection(sources_df2)

# 3. Display the common sources
print("Common source names:", common_sources)

In [None]:
# Example usage:
directory = 'ATCA_HI_spectra/Hydra/'
directory2 = 'ASKAP_spectra/Hydra/'
output_pdf = 'hydra_gaussian_decompositions.pdf'

plot_all_spectra(directory, common_sources, df_hydra4k_atca, df_hydra_askap,  output_pdf, directory2)

In [None]:
# Assuming df_hydra_askap is your DataFrame
df_norma_askap['source_new'] = df_norma_askap['source'].apply(lambda source: 'j' + source[:6] + '-' + source[9:-2])
df_norma4k_atca['source_new'] = df_norma4k_atca['source'].apply(lambda source: source[0:7] + '-' + source[8:13])



In [None]:
# 1. Extract the 'source' column from both DataFrames
sources_df1 = set(df_norma4k_atca['source_new'])
sources_df2 = set(df_norma_askap['source_new'])

# 2. Find the common sources between the two DataFrames
common_sources = sources_df1.intersection(sources_df2)

# 3. Display the common sources
print("Common source names:", common_sources)

In [None]:
# Example usage:
directory = 'ATCA_HI_spectra/Norma/'
directory2 = 'ASKAP_spectra/Norma/'
output_pdf = 'norma_gaussian_decompositions.pdf'

plot_all_spectra(directory, common_sources, df_norma4k_atca, df_norma_askap,  output_pdf, directory2)