In [1]:
import pandas as pd
import numpy as np

# Specify the file path
file_path = 'redshifts.txt'

# Load the DataFrame from the CSV file
df = pd.read_table(file_path, delimiter='\t')  # Assuming the file is tab-delimited, adjust the delimiter if needed

In [2]:
import re

def filter_redshift_by_grb_names(index_file_path, redshift_file_path, output_file_with_redshift, output_file_without_redshift):
    # Read the GRB names from index.txt
    with open(index_file_path, 'r') as index_file:
        grb_names_to_select = [line.strip() for line in index_file if line.strip()]

    # Read the redshift.txt file
    redshift_df = pd.read_csv(redshift_file_path, delimiter='\t')

    # Filter rows based on the selected GRB names
    filtered_redshift_df = redshift_df[redshift_df['GRB'].isin(grb_names_to_select)]

    # Extract numerical values from 'PhotonIndex' column
    filtered_redshift_df['PhotonIndex'] = filtered_redshift_df['PhotonIndex'].apply(
        lambda x: float(x.split(',')[0]) if isinstance(x, str) and ',' in x else 'NaN'
    )

    # Sort the DataFrame by the 'GRB' column
    filtered_redshift_df = filtered_redshift_df.sort_values(by='GRB')

    # Reset the indices
    filtered_redshift_df = filtered_redshift_df.reset_index(drop=True)

    # Select certain rows based on a condition
    selected_columns = ['GRB','Redshift_crosscheck', 'T90', 'Gamma', 'Fluence', 'FluenceError', 'PhotonIndex', 'PhotonIndexError', 'NH', 'PeakFlux', 'PeakFluxError']  # Adjust the condition as needed

    filtered_redshift_df = filtered_redshift_df[selected_columns]

    # Input 'NaN' string
    filtered_redshift_df = filtered_redshift_df.fillna('NaN')

    # Add 'log10Ta', 'log10Fa', 'alpha', and respective error columns
    # GRB , Ta, TaErr, Fa, FaErr, alpha, alphaErr
    fit_params = [['220611A', '220521A', '210323A', '210517A', '201014A', '200411A', '221226B', '221110A', '210726A', '210610A', '210222B', '210104A', '201104B', '201024A', '201020A', '200205B', '191011A', '21722A', '210731A', '211024B', '230414B', '230325A', '191031D', '190829A', '201221A', '190719C', '230506C', '230818A', '231117A', '230328B', '230116D', '220101A', '220117A', '210411C', '210210A', '210619B', '191221B', '210722A', '231118A'],
                  [1.12971557, 3.30131796, 4.32838093, 3.05765100, 5.54700512, 1.96654768, 0.47461698, 2.20438137, 2.72682434, 3.15453451, 3.24642494, 3.19233419, 2.72159651, 3.01817341, 3.96908872, 4.53640048, 3.15321333, 3.94283864, 4.80200435, 3.43921565, 4.87925740, 3.78230810, 2.322042249, 3.40640655, 4.54931529, 4.88123, 4.25859, 3.9564, 3.78095, 3.66388, 3.79761, 3.04134881, 3.43803362, 2.93128506, 3.72413045, 2.91049117, 1.95518066, 3.94283863, 2.20746],
                  [0.81046481, 0.13206168, 0.09651687, 0.32132806, 0.16137784, 0.25685294, 0.14697180, 0.68618385, 0.14755536, 1.28215864, 0.04927183, 0.00889405, 0.02972805, 0.63285167, 0.04826492, 0.08886983, 0.06597695, 0.06056870, 0.11699725, 0.01167528, 0.06954900, 0.20042492, 0.06348750, 0.00974555, 0.02804575, 0.194300, (4.25859-4.14018), (3.9564-3.82846), (3.78095-3.76714), (3.66388-3.63157), (3.79761-3.65937), 0.02317526, 0.04500955, 0.05901459, 0.08857348, 0.01099582, 0.05366691, 0.06056870, (2.20746-2.06798)], 
                  [-12.4941008, -10.6276115,  -11.9389232, -10.8415194, -12.43719871, -10.9488278, -7.35134395, -9.52540051, -11.1684430, -10.4044334, -10.4520599, -9.42347066, -9.38944702, -10.4667906, -11.0900559, -11.7337376, -10.8132081, -14.4834179, -11.9404653, -8.56429146, -11.9505263, -10.9066853, -9.63606913, -8.58538291, -12.0789806, -11.257, -11.4531, -11.0883, -6.90879, -10.1038, -11.1278,  -8.69262974, -10.1070569, -9.72925773,  -10.1403778, -8.55006175, -8.11145684, -14.483417911682453, -8.78983], 
                  [0.22640221, 0.16334814, 0.31176236, 0.25872455, 0.33104417, 1.04551800, 0.11871169, 0.67651915, 0.10610593, 0.85146569, 0.04184217, 0.00753937, 0.02110237, 0.58074222, 0.04915322, 0.10035241, 0.005719327, 0.267760622, 0.16085961, 0.01916437, 0.11074106, 0.18754550, 0.11503072, 0.00773590, 0.04385192, 0.2175, (-11.4531+11.5554), (-11.0883+11.2927), (-6.90879+6.96363), (-10.1038+10.1305), (-11.1278+11.2292), 0.01431841, 0.05084186, 0.05048963, 0.05799791, 0.01027813, 0.06184721, 0.26776062241322135, (-8.78983+8.91085)], 
                  [2.26249671, 1.71067512, 3.40011162, 1.21043545, 2.73545391, 0.84579687, 1.32089449, 1.02202171, 0.62132497, 1.08167850, 1.37828029, 1.19649204, 1.19537497, 0.99913283, 1.55158826, 1.56884903, 1.30602449, 1.60481787, 1.74153741, 1.93633006, 2.06160972, 0.931411844, 1.90294890, 1.18850034, 2.93923332, 1.55953, 1.14181, 1.39195, 0.683779, 1.20938, 1.57289, 1.20052929, 1.68612083, 1.16829405, 1.54518510, 1.12578835, 1.15734724, 1.60481787, 1.00111], 
                  [0.50304248, 0.12291833, 0.68301762, 0.08060357, 0.94410954, 0.30665794, 0.04333399, 0.03483546, 0.14755536, 0.07850685, 0.005516005, 0.01279355, 0.02348264, 0.03486451, 0.08306159, 0.19267266, 0.05756962, 0.06764959, 0.27863334, 0.00965772, 0.21988187, 0.05714513, 0.05586376, 0.00361642, 0.004540267, 0.21085, (1.14181-1.02882), (1.39195-1.1948), (0.683779-0.658857), (1.20938-1.18517), (1.57289-1.33798), 0.01255481, 0.06200239, 0.03209979, 0.10645081, 0.00437126, 0.00551831, 0.06764959, (1.00111-0.959664)]]

    # Loop through each set of parameters in fit_params
    for i in range(len(fit_params[0])):
        grb_name = fit_params[0][i]
        ta_value = fit_params[1][i]
        ta_err_value = fit_params[2][i]
        fa_value = fit_params[3][i]
        fa_err_value = fit_params[4][i]
        alpha_value = fit_params[5][i]
        alpha_err_value = fit_params[6][i]

        # Check if GRB name is in the filtered DataFrame
        if grb_name in filtered_redshift_df['GRB'].values:

            # Add values to the DataFrame
            filtered_redshift_df.loc[filtered_redshift_df['GRB'] == grb_name, 'log10Ta'] = ta_value
            filtered_redshift_df.loc[filtered_redshift_df['GRB'] == grb_name, 'log10TaErr'] = ta_err_value
            filtered_redshift_df.loc[filtered_redshift_df['GRB'] == grb_name, 'log10Fa'] = fa_value
            filtered_redshift_df.loc[filtered_redshift_df['GRB'] == grb_name, 'log10FaErr'] = fa_err_value
            filtered_redshift_df.loc[filtered_redshift_df['GRB'] == grb_name, 'alpha'] = alpha_value
            filtered_redshift_df.loc[filtered_redshift_df['GRB'] == grb_name, 'alphaErr'] = alpha_err_value

    # Drop rows with missing values
    # filtered_redshift_df = filtered_redshift_df.dropna()

    beta_params = [[2.09, 'NaN', 1.9, 2.39, 1.53, 3.1, 1.9, 1.61, 1.3, 'NaN', 'NaN', 2.5, 1.56, 1.71, 1.93, 1.846, 'NaN', 1.86, 'NaN', 'NaN', 'NaN', 'Nan', 1.32, 2.2, 1.8, 1.8, 2, 1.9, 'NaN', 1.9, 'NaN', 'NaN', 1.4],
                   [0.28, 'NaN', 0.5, 0.18, 1.05, 3.3, 0.6, 1.17, 0.6, 'NaN', 'NaN', 0.6, 0.3, 0.26, 0.17, 0.102, 'NaN', 0.39, 'NaN', 'NaN','NaN', 'Nan', 0.43, 0.5, 0.4, 0.36, 0.6, 0.6, 'NaN', 0.6, 'NaN', 'NaN', 0.14],
                   [0.26, 'NaN', 0.4, 0.17, 0.51, 1.2, 0.4, 0.6, 0.4, 'NaN', 'NaN', 0.5, 0.16, 0.16, 0.17, 0.099, 'NaN', 0.27, 'NaN', 'NaN', 'NaN', 'Nan', 0.27, 0.4, 0.4, 0.23, 0.3, 0.5, 'NaN', 0.5, 'Nan', 'NaN', 0.14]]

    #Calculate beta
    filtered_redshift_df['beta'] = [beta_params[0][i] if type(beta_params[0][i])==str else beta_params[0][i]-1 for i in range(len(beta_params[0]))]

    #Calculate beta error
    filtered_redshift_df['betaErr'] = [beta_params[1][i] if type(beta_params[1][i])==str else (beta_params[1][i]+beta_params[2][i])/2 for i in range(len(beta_params[0]))]

    # Add 'GRB' to the start of the 'GRB' column entries
    filtered_redshift_df['GRB'] = 'GRB' + filtered_redshift_df['GRB'].astype(str)

    # Split data into two DataFrames based on the presence of redshift
    with_redshift = filtered_redshift_df[filtered_redshift_df['Redshift_crosscheck']!='NaN']
    without_redshift = filtered_redshift_df[filtered_redshift_df['Redshift_crosscheck']=='NaN']

    # Save DataFrames to separate files
    with_redshift.to_csv(output_file_with_redshift, sep='\t', index=False)
    without_redshift.to_csv(output_file_without_redshift, sep='\t', index=False)

    print(str(len(with_redshift)) + ' GRBs with redshift.')
    print(str(len(without_redshift)) + ' GRBs without redshift.')
    return with_redshift, without_redshift



# Example usage
index_file_path = 'indexes.txt'  # Replace with the actual path
redshift_file_path = file_path  # Replace with the actual path
output_file_with_redshift = 'filtered_data_with_redshift.txt'
output_file_without_redshift = 'filtered_data_without_redshift.txt'

result_df = filter_redshift_by_grb_names(index_file_path, redshift_file_path, output_file_with_redshift, output_file_without_redshift)

33 GRBs with redshift.
0 GRBs without redshift.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_redshift_df['PhotonIndex'] = filtered_redshift_df['PhotonIndex'].apply(


In [3]:
missing_grbs = set(result_df[0]['GRB'])-set(pd.read_csv('redshifts.txt', delimiter='\t')['GRB'])
print(str(len(missing_grbs)) + " Missing GRBs:")
for grb in missing_grbs:
    print(grb)

33 Missing GRBs:
GRB211024B
GRB210210A
GRB210411C
GRB220521A
GRB201104B
GRB230325A
GRB190719C
GRB210222B
GRB230328B
GRB201221A
GRB191011A
GRB201014A
GRB190829A
GRB230818A
GRB201020A
GRB191221B
GRB210517A
GRB231117A
GRB210619B
GRB210722A
GRB230414B
GRB230506C
GRB220101A
GRB210731A
GRB220117A
GRB231118A
GRB210610A
GRB230116D
GRB200205B
GRB201024A
GRB220611A
GRB221110A
GRB221226B


In [4]:
# input_file_path = 'indexes.txt'
# output_file_path = 'indexes.txt'

# # Read the data from the input file
# with open(input_file_path, 'r') as input_file:
#     data = input_file.readlines()

# # Process the data to remove quotes, commas, and plus signs
# processed_data = [line.replace('"', '').replace(',', '').replace('+', '').strip() for line in data]

# # Write the processed data to the output file
# with open(output_file_path, 'w') as output_file:
#     output_file.write('\n'.join(processed_data))