In [None]:
import h5py
from sklearn.metrics import mean_squared_error as mse
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import lightgbm as lgb
import warnings
import os
import xgboost
from sklearn.svm import SVR
from sklearn.linear_model import Ridge
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import ParameterGrid

from DNN import *
from processing_results import * 
from stacking_models import *

# Warning because some galaxies (those are not of orur interest have mass/ halo concentration 0, which we logarithmize)
warnings.filterwarnings("ignore")

%matplotlib inline
%load_ext autoreload
%autoreload 2

# Select dataset

In [None]:
path= "../data/datasets" # save datasets on the right location first (run 'data_preprocessing/main.py')

# Example of one dataset selection
ELGs = pd.read_csv(os.path.join(path, "ELG", "n5e-4_ssfr908.csv"))
LRGs = pd.read_csv(os.path.join(path, "LRG", "n2e-4_ssfr908.csv"))

In [None]:
train, test, val = train_test(ELGs, val=True) # change for LRGs

In [None]:
features = ['neigh0.5','neigh1','neigh2','neigh3','neigh4','neigh5','lum_z','lum_r','lum_g','mass','sum_m','sum_V', "anisotropy"]
target = "b_phi"

# Xgboost

In [None]:
# First iterate over broad and then fine range of the specified parames (because of time complexity)
xgboost_params = {
    "n_estimators": [100, 250, 500, 750, 1000, 2000],
    "learning_rate": [0.1, 0.01, 0.015, 0.005],
    "gamma": [0, 0.5, 1, 2, 5],
    "colsample_bylevel" : [0, 0.3, 0.5, 1],
    "subsample": [0, 0.3, 0.5, 1],
    "max_depth" : [2, 3, 5,6,8, 10,11],
    "min_child_weight" : [0,1,2,5],
    "reg_lambda": [0, 0.3, 0.5, 1, 5, 10],
    "alpha": [0, 0.5, 1 , 5, 10],
}

# objective is always squarederror
model = xgboost.XGBRegressor(n_estimators=1000, n_jobs = -1, objective ='reg:squarederror', early_stopping_rounds=20)
grid_search = GridSearchCV(
    estimator=model,
    param_grid=xgboost_params,
    cv = 3,
)
eval_set = [(val[features], val[target])]
grid_search.fit(train[features], train[target], eval_set = eval_set)

print(grid_search.best_params_)
print(grid_search.scorer_)
print(grid_search.best_score_)
model = grid_search.best_estimator_
model

# SVR

In [None]:
# First iterate over broad and then fine range of the specified parames (because of time complexity)
svr_params = {
    "C": [0,0.1,0.5,0.75,1.,2,5,10],
}

# objective is always squarederror
model =SVR()
grid_search = GridSearchCV(
    estimator=model,
    param_grid=svr_params,
    cv = 3,
)
eval_set = [(val[features], val[target])]
grid_search.fit(train[features], train[target], eval_set = eval_set)

print(grid_search.best_params_)
print(grid_search.scorer_)
print(grid_search.best_score_)
model = grid_search.best_estimator_
model

# DNN

In [None]:
# use GPUs if available
if torch.cuda.is_available():
    print("CUDA Available")
    device = torch.device('cuda')
else:
    print('CUDA Not Available, using CPU')
    device = torch.device('cpu')

In [None]:
train_loader = convert_to_tensors(train[features], train[target], batch=10)
test_loader = convert_to_tensors(test[features], test[target], batch=10)
val_loader = convert_to_tensors(val[features], val[target], batch=10)

In [None]:
dnn_params ={"n_epoch": [80,100,120,150,200],
        "hidden_layers": [2,4,6,8,10],
        "hidden_layer_size": [5,10,20,30,50],
        "dropout": [0.4,0.5,0.6,0.8,0.9,0.95,1],
        "activation": [torch.nn.LeakyReLU(), torch.nn.ReLU()]}

LR, WD = 1e-7,1e-7 #Adam optimizer

performances = list()
for params in  ParameterGrid(dnn_params):
    model = make_network(dnn_params["hidden_layers"], dnn_params["hidden_layer_size"], len(features), dropout=dnn_params["dropout"], activation=dnn_params["activation"], bias=True)
    train_losses, valid_losses = main_training(model, train_loader, val_loader, LR, WD, dnn_params["n_epoch"])
    # Print learning curve if one want
    loss = test_(val_loader, model)
    performances.append(loss
                        
best_params = list(parameter_grid)[performances.index(min(performances))]