In [1]:
#IMPORTS
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import warnings



In [2]:
warnings.filterwarnings('ignore')

In [6]:
# Load the dataset
data = pd.read_csv('../../../datasets/summary_general-2.txt', sep=r'\s*\|\s*', engine='python')
# Display the first few rows of the dataset
print(data.head())
print(data['T90'])

   ## GRBname    Trig_ID  Trig_time_met               Trig_time_UTC  \
0  GRB231215A  1202522.0   7.243265e+08  2023-12-15T09:47:25.273080   
1  GRB231214A  1202386.0   7.242783e+08  2023-12-14T20:24:28.316380   
2  GRB231210B  1201696.0   7.239366e+08  2023-12-10T21:29:04.499760   
3  GRB231205B  1200812.0   7.234875e+08  2023-12-05T16:43:59.506460   
4  GRB231129A  1199764.0   7.229272e+08  2023-11-29T05:05:59.096760   

   RA_ground  DEC_ground  Image_position_err  Image_SNR      T90    T90_err  \
0    9.72982    57.63370            1.296588   20.98807   22.096   3.472525   
1  305.66880   -72.43144            0.917670   34.38932   27.636   4.568637   
2   95.80293   -48.33463            1.761250   13.55020    7.472   0.647951   
3   54.15053    27.14630            0.978140   31.39288   64.000  16.000000   
4  317.54100    41.53033            0.960403   32.22439  106.324   2.018324   

      T50    T50_err  Evt_start_sincetrig  Evt_stop_sincetrig   pcode  \
0   7.040   1.432603     

In [26]:
# Assuming 'T90' is the column for duration in the dataset
threshold = 2  # typically, GRBs with T90 < 2 seconds are considered short

# Create a new column 'GRB_class' where 0 = short, 1 = long
data['GRB_class'] = data['T90'].apply(lambda x: 0 if x < threshold else 1)

# Select features (you may need to adjust these depending on the dataset)
features = ['T50']  # example feature columns
X = data[features]
y = data['GRB_class']

In [27]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature columns
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [28]:
# Initialize the Random Forest classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier
clf.fit(X_train, y_train)

# Predict the test set results
y_pred = clf.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.99


In [29]:
# Display the classification report
print(classification_report(y_test, y_pred, target_names=['Short GRB', 'Long GRB']))

              precision    recall  f1-score   support

   Short GRB       0.94      1.00      0.97        33
    Long GRB       1.00      0.99      1.00       285

    accuracy                           0.99       318
   macro avg       0.97      1.00      0.98       318
weighted avg       0.99      0.99      0.99       318



In [31]:
import numpy as np
#Basic statistical features
mean_t50 = np.mean(data['T50'])
median_t50 = np.median(data['T50'])
std_t50 = np.std(data['T50'])
skew_t50 = data['T50'].skew()
kurt_t50 = data['T50'].kurtosis()

# Normalized T50
data['Normalized_T50'] = data['T50'] / data['T90']

# Logarithm of T50
data['Log_T50'] = np.log(data['T50'])

# T50 to T90 ratio
data['T50_to_T90'] = data['T50'] / data['T90']

# Display results
print("Mean T50:", mean_t50)
print("Median T50:", median_t50)
print("Standard Deviation of T50:", std_t50)
print("Skewness of T50:", skew_t50)
print("Kurtosis of T50:", kurt_t50)
print(data.head())

Mean T50: 32.90323930791257
Median T50: nan
Standard Deviation of T50: 61.37670318053531
Skewness of T50: 5.7574674039619715
Kurtosis of T50: 47.01474340900766
   ## GRBname    Trig_ID  Trig_time_met               Trig_time_UTC  \
0  GRB231215A  1202522.0   7.243265e+08  2023-12-15T09:47:25.273080   
1  GRB231214A  1202386.0   7.242783e+08  2023-12-14T20:24:28.316380   
2  GRB231210B  1201696.0   7.239366e+08  2023-12-10T21:29:04.499760   
3  GRB231205B  1200812.0   7.234875e+08  2023-12-05T16:43:59.506460   
4  GRB231129A  1199764.0   7.229272e+08  2023-11-29T05:05:59.096760   

   RA_ground  DEC_ground  Image_position_err  Image_SNR      T90    T90_err  \
0    9.72982    57.63370            1.296588   20.98807   22.096   3.472525   
1  305.66880   -72.43144            0.917670   34.38932   27.636   4.568637   
2   95.80293   -48.33463            1.761250   13.55020    7.472   0.647951   
3   54.15053    27.14630            0.978140   31.39288   64.000  16.000000   
4  317.54100    41