In [2]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder, OrdinalEncoder, MinMaxScaler, RobustScaler, MaxAbsScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.decomposition import PCA
from sklearn.utils.class_weight import compute_class_weight

from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

from xgboost import XGBClassifier, XGBRegressor
import xgboost as xgb

  from pandas import MultiIndex, Int64Index


In [3]:
features_num = [
    'Total_flux', 'Peak_flux', 
       'NUV_flux_corr', 'u_flux_corr', 'Bw_flux_corr', 'R_flux_corr',
       'I_flux_corr', 'z_flux_corr', 'y_flux_corr',
       'J_flux_corr', 'H_flux_corr', 'K_flux_corr', 'Ks_flux_corr',
       'ch1_flux_corr', 'ch2_flux_corr', 'ch3_flux_corr', 'ch4_flux_corr',
       'F_MIPS_24', 'F_PACS_100', 'F_PACS_160', 'F_SPIRE_250', 'F_SPIRE_350',
       'F_SPIRE_500', 'Z_BEST', 'r_rcs_flux_corr',
       'g_flux_corr', 'nb921_hsc_flux_corr'
    ]
y_column = "Classification"

classes = ['jet-mode radio AGN/low-excitation radio galaxy', 'quasar-like radio AGN / high-excitation radio galaxy', 
           'radio-quiet AGN', 'star-forming galaxy']

In [4]:
data = pd.read_csv("../../../Data/Fangyou_data/Cleaned/combined_using_similar_columns.csv")

In [5]:
X = data[features_num]
y = data[y_column]

In [6]:
le = LabelEncoder()
labels = np.unique(y.astype(str))
y = le.fit_transform(y)

## BH data

In [7]:
Best_Heckman_data = pd.read_csv("../../../Data/Best&Heckman/BestHeckman+SDSS+wise+LOFAR.csv")

In [8]:
# Only selecting data with a classification
Best_Heckman_data = Best_Heckman_data[Best_Heckman_data['Classification'] != 'Radio-loud AGN'] 

In [9]:
Best_Heckman_X = Best_Heckman_data[[c for c in Best_Heckman_data.columns if c != 'Classification']]
Best_Heckman_y = Best_Heckman_data['Classification']

In [10]:
Best_Heckman_y = le.transform(Best_Heckman_y)

In [11]:

y = np.append(y, Best_Heckman_y)

In [12]:
Best_Heckman_X = Best_Heckman_X[['Z_BEST', 'u_flux_corr',
       'g_flux_corr', 'R_flux_corr', 'I_flux_corr', 'z_flux_corr', 'ch1_flux_corr', 'ch2_flux_corr',
       'J_flux_corr', 'H_flux_corr', 'Ks_flux_corr', 'Peak_flux', 'Total_flux']]

# Copying R column
Best_Heckman_X['r_rcs_flux_corr'] = Best_Heckman_X['R_flux_corr']

# Adding nans to missing columns
Best_Heckman_X[['NUV_flux_corr', 'Bw_flux_corr', 'y_flux_corr', 'K_flux_corr', 
                'F_MIPS_24', 'F_PACS_100', 'F_PACS_160', 'F_SPIRE_250', 'F_SPIRE_350',
                'F_SPIRE_500', 'nb921_hsc_flux_corr', 'ch3_flux_corr', 'ch4_flux_corr']] = np.nan

In [13]:
X = pd.concat([X, Best_Heckman_X]).reset_index(drop=True)

In [14]:
# Creating new features
columns = X.columns
for i in range(len(columns)):
    if columns[i]!='Z_BEST':
        for j in range(i+1, len(columns)):
            if columns[j]!='Z_BEST':
                X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
                X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]

  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[co

  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[co

  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[co

  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[co

  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[co

  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[co

  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[co

  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[co

  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[co

  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[co

  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[co

  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[co

  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[co

  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[co

  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[co

  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[co

  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[co

  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[co

  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[co

  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[co

  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]
  X[f"{columns[i]}/{columns[j]}"] = X[columns[i]]/X[columns[j]]
  X[f"{columns[i]}-{columns[j]}"] = X[columns[i]]-X[columns[j]]


## Running the model

In [17]:
# Creating training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, stratify=y, random_state=42)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, train_size=0.8, stratify=y_test, random_state=42)

from sklearn.utils import class_weight
classes_weights = class_weight.compute_sample_weight(
    class_weight='balanced',
    y=y_train
)

In [16]:
model = XGBClassifier(use_label_encoder=False, 
                      max_depth=12, 
                      #reg_alpha=1,   
                      #min_child_weight=0.5, 
                      #reg_lambda=5,
                      #subsample=0.5,
                      eta=0.3, # Remember to reduce to 0.1 or 0.05 for better results
                      tree_method='gpu_hist', # exact is more precies, but this is much faster
                      gpu_id=0,
                      objective='multi:softprob',
                      eval_metric =['merror'],
                      nthread=8,
                      n_estimators=10000,
                      )

In [17]:
bst = model.fit(X_train, y_train, 
                eval_set=[
                    (X_val, y_val)
                ], 
                early_stopping_rounds=100,
                verbose=True, 
                sample_weight=classes_weights,
               ) #

  from pandas import MultiIndex, Int64Index


[0]	validation_0-merror:0.15918
[1]	validation_0-merror:0.14266
[2]	validation_0-merror:0.13550
[3]	validation_0-merror:0.13275
[4]	validation_0-merror:0.12806
[5]	validation_0-merror:0.12669
[6]	validation_0-merror:0.12173
[7]	validation_0-merror:0.11787
[8]	validation_0-merror:0.11292
[9]	validation_0-merror:0.11347
[10]	validation_0-merror:0.11209
[11]	validation_0-merror:0.10934
[12]	validation_0-merror:0.10741
[13]	validation_0-merror:0.10796
[14]	validation_0-merror:0.10741
[15]	validation_0-merror:0.10548
[16]	validation_0-merror:0.10493
[17]	validation_0-merror:0.10218
[18]	validation_0-merror:0.10107
[19]	validation_0-merror:0.10052
[20]	validation_0-merror:0.09915
[21]	validation_0-merror:0.09887
[22]	validation_0-merror:0.09804
[23]	validation_0-merror:0.09749
[24]	validation_0-merror:0.09832
[25]	validation_0-merror:0.09887
[26]	validation_0-merror:0.09887
[27]	validation_0-merror:0.09915
[28]	validation_0-merror:0.09887
[29]	validation_0-merror:0.09860
[30]	validation_0-me

[245]	validation_0-merror:0.09171
[246]	validation_0-merror:0.09199
[247]	validation_0-merror:0.09226
[248]	validation_0-merror:0.09226
[249]	validation_0-merror:0.09226
[250]	validation_0-merror:0.09226
[251]	validation_0-merror:0.09226
[252]	validation_0-merror:0.09199
[253]	validation_0-merror:0.09254
[254]	validation_0-merror:0.09199
[255]	validation_0-merror:0.09226
[256]	validation_0-merror:0.09226
[257]	validation_0-merror:0.09226


In [18]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=labels, digits=4))

                                                      precision    recall  f1-score   support

      jet-mode radio AGN/low-excitation radio galaxy     0.9053    0.8882    0.8967      3606
quasar-like radio AGN / high-excitation radio galaxy     0.6096    0.4793    0.5367       290
                                     radio-quiet AGN     0.7875    0.7589    0.7729      1099
                                 star-forming galaxy     0.9364    0.9531    0.9447      9529

                                            accuracy                         0.9128     14524
                                           macro avg     0.8097    0.7699    0.7877     14524
                                        weighted avg     0.9109    0.9128    0.9116     14524



### Accuracy report per data set

#### Original

In [19]:
y_pred = model.predict(X_test[X_test.index<77609])
print(classification_report(y_test[X_test.index<77609], y_pred, target_names=labels, digits=4))

  from pandas import MultiIndex, Int64Index


                                                      precision    recall  f1-score   support

      jet-mode radio AGN/low-excitation radio galaxy     0.8713    0.8271    0.8486      2013
quasar-like radio AGN / high-excitation radio galaxy     0.6190    0.5253    0.5683       198
                                     radio-quiet AGN     0.7875    0.7589    0.7729      1099
                                 star-forming galaxy     0.9382    0.9561    0.9471      9057

                                            accuracy                         0.9106     12367
                                           macro avg     0.8040    0.7668    0.7842     12367
                                        weighted avg     0.9088    0.9106    0.9095     12367



#### B&H

In [20]:
y_pred = model.predict(X_test[X_test.index>=77609])
print(classification_report(y_test[X_test.index>=77609], y_pred, target_names=labels[[0,1,3]], digits=4))

                                                      precision    recall  f1-score   support

      jet-mode radio AGN/low-excitation radio galaxy     0.9453    0.9655    0.9553      1593
quasar-like radio AGN / high-excitation radio galaxy     0.5833    0.3804    0.4605        92
                                 star-forming galaxy     0.9000    0.8962    0.8981       472

                                            accuracy                         0.9254      2157
                                           macro avg     0.8095    0.7474    0.7713      2157
                                        weighted avg     0.9199    0.9254    0.9217      2157



  from pandas import MultiIndex, Int64Index


### Bayesian optimisation

In [104]:
from bayes_opt import BayesianOptimization

In [126]:
def optimise_xgboost(max_depth):
    max_depth = int(max_depth)
    
    model = XGBClassifier(use_label_encoder=False, 
                      max_depth=6, 
                      #reg_alpha=reg_alpha,   
                      #min_child_weight=min_child_weight, 
                      #reg_lambda=reg_lambda,
                      eta=0.1, # Remember to reduce to 0.1 or 0.05 for better results
                      tree_method='gpu_hist', # exact is more precies, but this is much faster
                      gpu_id=0,
                      objective='multi:softprob',
                      eval_metric =['merror'],
                      nthread=8,
                      n_estimators=10000,
                      )
    bst = model.fit(X_train, y_train, 
                eval_set=[
                    (X_val, y_val)
                ], 
                early_stopping_rounds=250,
                verbose=False, 
                sample_weight=classes_weights,
               )
    y_pred = model.predict(X_val)
    return accuracy_score(y_val, y_pred)

In [127]:
# Bounded region of parameter space
pbounds = {
    'max_depth': (5, 30),
    #'reg_alpha': (0, 5),
    #'reg_lambda': (0, 10),
    #'#min_child_weight': (0, 5),
}

In [128]:
optimizer = BayesianOptimization(
    f=optimise_xgboost,
    pbounds=pbounds,
    verbose=2, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
    random_state=42,
)

In [129]:
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    optimizer.maximize(
        init_points=5,
        n_iter=100,
    )

|   iter    |  target   | max_depth |
-------------------------------------
| [0m 1       [0m | [0m 0.8967  [0m | [0m 14.36   [0m |
| [95m 2       [0m | [95m 0.8992  [0m | [95m 28.77   [0m |
| [95m 3       [0m | [95m 0.8998  [0m | [95m 23.3    [0m |
| [95m 4       [0m | [95m 0.9009  [0m | [95m 19.97   [0m |
| [0m 5       [0m | [0m 0.8978  [0m | [0m 8.9     [0m |
| [0m 6       [0m | [0m 0.8995  [0m | [0m 18.29   [0m |
| [0m 7       [0m | [0m 0.8998  [0m | [0m 5.0     [0m |
| [0m 8       [0m | [0m 0.8984  [0m | [0m 21.32   [0m |
| [0m 9       [0m | [0m 0.9009  [0m | [0m 5.0     [0m |
| [0m 10      [0m | [0m 0.8995  [0m | [0m 9.548   [0m |
| [0m 11      [0m | [0m 0.8953  [0m | [0m 19.97   [0m |
| [95m 12      [0m | [95m 0.9011  [0m | [95m 19.97   [0m |
| [0m 13      [0m | [0m 0.8992  [0m | [0m 19.97   [0m |
| [0m 14      [0m | [0m 0.9009  [0m | [0m 18.29   [0m |



KeyboardInterrupt



### Automl

In [3]:
from flaml import AutoML
automl = AutoML()

  from ray.tune.suggest import Searcher
  from ray.tune.suggest.optuna import OptunaSearch as GlobalSearch
  from ray.tune.sample import _BackwardsCompatibleNumpyRng
  from ray.tune.suggest.variant_generator import generate_variants
  from pandas import MultiIndex, Int64Index


In [4]:
import warnings

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, stratify=y, random_state=42)

estimators = ['xgb_limitdepth']
#estimators = ['extra_tree']

with warnings.catch_warnings():
    warnings.simplefilter("ignore")

    automl.fit(X_train, y_train, task="classification", metric='z',
                                #X_val=X_val , y_val=y_val,
                                #ensemble=True,
                                #estimator_list=estimators, 
                                time_budget=7200, n_jobs=8,
                                eval_method='cv',
                                #log_file_name='general_extra_features3.log',
                                #starting_points=automl.best_config_per_estimator
              )

NameError: name 'X_train' is not defined