In [11]:
import pandas as pd
import numpy as np

# Specify the file path
file_path = 'redshifts.txt'

# Load the DataFrame from the CSV file
df = pd.read_table(file_path, delimiter='\t')  # Assuming the file is tab-delimited, adjust the delimiter if needed

In [12]:
import re

def filter_redshift_by_grb_names(index_file_path, redshift_file_path, output_file_with_redshift, output_file_without_redshift):
    # Read the GRB names from index.txt
    with open(index_file_path, 'r') as index_file:
        grb_names_to_select = [line.strip() for line in index_file if line.strip()]

    # Read the redshift.txt file
    redshift_df = pd.read_csv(redshift_file_path, delimiter='\t')

    # Filter rows based on the selected GRB names
    filtered_redshift_df = redshift_df[redshift_df['GRB'].isin(grb_names_to_select)]

    # Extract numerical values from 'PhotonIndex' column
    filtered_redshift_df['PhotonIndex'] = filtered_redshift_df['PhotonIndex'].apply(
        lambda x: float(x.split(',')[0]) if isinstance(x, str) and ',' in x else 'NaN'
    )

    # Sort the DataFrame by the 'GRB' column
    filtered_redshift_df = filtered_redshift_df.sort_values(by='GRB')

    # Reset the indices
    filtered_redshift_df = filtered_redshift_df.reset_index(drop=True)

    # Select certain rows based on a condition
    selected_columns = ['GRB','Redshift_crosscheck', 'T90', 'Gamma', 'Fluence', 'PhotonIndex', 'NH', 'PeakFlux']  # Adjust the condition as needed

    filtered_redshift_df = filtered_redshift_df[selected_columns]

    # Input 'NaN' string
    filtered_redshift_df = filtered_redshift_df.fillna('NaN')

    # Add 'log10Ta', 'log10Fa', 'alpha', and respective error columns
    # GRB , Ta, TaErr, Fa, FaErr, alpha, alphaErr
    fit_params = [['220611A', '220521A', '210323A', '210517A', '201014A', '200411A', '221226B', '221110A', '210726A', '210610A', '210222B', '210104A', '210222B', '210104A', '201104B', '201024A', '201020A', '200205B', '191011A', '21722A', '210731A', '211024B', '230414B', '230325A', '191031D', '190829A', '201221A'],
                  [1.12971557, 3.30131796, 4.32838093, 3.05765100, 5.54700512, 1.96654768, 0.47461698, 2.20438137, 2.72682434, 3.15453451, 3.24642494, 3.19233419, 3.24642494, 3.19233419, 2.72159651, 3.01817341, 3.96908872, 4.53640048, 3.15321333, 3.94283864, 4.80200435, 3.43921565, 4.87925740, 3.78230810, 2.322042249, 3.40640655, 4.54931529],
                  [0.81046481, 0.13206168, 0.09651687, 0.32132806, 0.16137784, 0.25685294, 0.14697180, 0.68618385, 0.14755536, 1.28215864, 0.04927183, 0.00889405, 0.04927183, 0.00889405, 0.02972805, 0.63285167, 0.04826492, 0.08886983, 0.06597695, 0.06056870, 0.11699725, 0.01167528, 0.06954900, 0.20042492, 0.06348750, 0.00974555, 0.02804575], 
                  [-12.4941008, -10.6276115,  -11.9389232, -10.8415194, -12.43719871, -10.9488278, -7.35134395, -9.52540051, -11.1684430, -10.4044334, -10.4520599, -10.4044334, -10.4520599, -9.42347066, -9.38944702, -10.4667906, -11.0900559, -11.7337376, -10.8132081, -14.4834179, -11.9404653, -8.56429146, -11.9505263, -10.9066853, -9.63606913, -8.58538291, -12.0789806], 
                  [0.22640221, 0.16334814, 0.31176236, 0.25872455, 0.33104417, 1.04551800, 0.11871169, 0.67651915, 0.10610593, 0.85146569, 0.04184217, 0.85146569, 0.04184217, 0.00753937, 0.02110237, 0.58074222, 0.04915322, 0.10035241, 0.005719327, 0.267760622, 0.16085961, 0.01916437, 0.11074106, 0.18754550, 0.11503072, 0.00773590, 0.04385192], 
                  [2.26249671, 1.71067512, 3.40011162, 1.21043545, 2.73545391, 0.84579687, 1.32089449, 1.02202171, 0.62132497, 1.08167850, 1.37828029, 1.08167850, 1.37828029, 1.19649204, 1.19537497, 0.99913283, 1.55158826, 1.56884903, 1.30602449, 1.60481787, 1.74153741, 1.93633006, 2.06160972, 0.931411844, 1.90294890, 1.18850034, 2.93923332], 
                  [0.50304248, 0.12291833, 0.68301762, 0.08060357, 0.94410954, 0.30665794, 0.04333399, 0.03483546, 0.14755536, 0.07850685, 0.005516005, 0.07850685, 0.05516005, 0.01279355, 0.02348264, 0.03486451, 0.08306159, 0.19267266, 0.05756962, 0.06764959, 0.27863334, 0.00965772, 0.21988187, 0.05714513, 0.05586376, 0.00361642, 0.004540267]]

    # Loop through each set of parameters in fit_params
    for i in range(len(fit_params[0])):
        grb_name = fit_params[0][i]
        ta_value = fit_params[1][i]
        ta_err_value = fit_params[2][i]
        fa_value = fit_params[3][i]
        fa_err_value = fit_params[4][i]
        alpha_value = fit_params[5][i]
        alpha_err_value = fit_params[6][i]

        # Check if GRB name is in the filtered DataFrame
        if grb_name in filtered_redshift_df['GRB'].values:

            # Add values to the DataFrame
            filtered_redshift_df.loc[filtered_redshift_df['GRB'] == grb_name, 'log10Ta'] = ta_value
            filtered_redshift_df.loc[filtered_redshift_df['GRB'] == grb_name, 'log10TaErr'] = ta_err_value
            filtered_redshift_df.loc[filtered_redshift_df['GRB'] == grb_name, 'log10Fa'] = fa_err_value
            filtered_redshift_df.loc[filtered_redshift_df['GRB'] == grb_name, 'log10FaErr'] = fa_value
            filtered_redshift_df.loc[filtered_redshift_df['GRB'] == grb_name, 'alpha'] = alpha_value
            filtered_redshift_df.loc[filtered_redshift_df['GRB'] == grb_name, 'alphaErr'] = alpha_err_value

    # Drop rows with missing values
    filtered_redshift_df = filtered_redshift_df.dropna()

    #Init an empty array
    filtered_redshift_df['beta'] = ['NaN']*len(filtered_redshift_df)

    # Add 'GRB' to the start of the 'GRB' column entries
    filtered_redshift_df['GRB'] = 'GRB' + filtered_redshift_df['GRB'].astype(str)

    # Split data into two DataFrames based on the presence of redshift
    with_redshift = filtered_redshift_df[filtered_redshift_df['Redshift_crosscheck']!='NaN']
    without_redshift = filtered_redshift_df[filtered_redshift_df['Redshift_crosscheck']=='NaN']

    # Save DataFrames to separate files
    with_redshift.to_csv(output_file_with_redshift, sep='\t', index=False)
    without_redshift.to_csv(output_file_without_redshift, sep='\t', index=False)

    return with_redshift, without_redshift


    # Save the DataFrame to a new text file
    filtered_redshift_df.to_csv(output_file_path, sep='\t', index=False)

    return filtered_redshift_df



# Example usage
index_file_path = 'indexes.txt'  # Replace with the actual path
redshift_file_path = file_path  # Replace with the actual path
output_file_with_redshift = 'filtered_data_with_redshift.txt'
output_file_without_redshift = 'filtered_data_without_redshift.txt'

result_df = filter_redshift_by_grb_names(index_file_path, redshift_file_path, output_file_with_redshift, output_file_without_redshift)

# Print the resulting DataFrame
print(result_df)

(           GRB Redshift_crosscheck    T90    Gamma Fluence PhotonIndex  \
1   GRB190829A              0.0785   58.2  2.09130      64        2.56   
3   GRB191011A               1.722   7.37  1.81479     3.3        1.94   
7   GRB200205B               1.465  458.0  1.90881      54        1.36   
12  GRB201014A                4.56   36.2  1.97336     3.1        2.55   
14  GRB201020A               2.903  14.17  2.07610     9.2        2.25   
16  GRB201024A               0.999   5.00  2.07522     8.1         2.1   
17  GRB201104B               1.954   8.66  1.55046      18        1.47   
19  GRB201221A                5.70   44.5  1.45068      18         1.4   
23  GRB210222B               2.198  12.82  1.98000     3.4        2.37   
29  GRB210517A               2.486   3.06  1.71079     1.5        1.85   
30  GRB210610A                3.54  13.62  1.83562      10        1.41   
36  GRB210731A              1.2525  22.51  2.06317      22       -0.25   
41  GRB211024B              1.1137  6

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_redshift_df['PhotonIndex'] = filtered_redshift_df['PhotonIndex'].apply(
