# **Demonstration: Generating multiclass redshift probability distribution**
# Import libraries

In [1]:
from pathlib import Path
from json import load
from PDF_MC import PDF_MC 

# Import default configurations

model_config.json has default hyperparameters for the PDF_MC model: 
* number of input features
* nubmer of hidden neurons for each layer
* nubmer of hidden layers
* nubmer of training epochs
* learning rate
* batch size
* momentum

In [2]:
PACKAGE_PATH = Path.cwd()
with open("./model_config.json", "r") as f:
    json_dict = load(f)
    PDF_MC_config = json_dict["model"]["PDF_MC"]
PDF_MC_config

{'num_input_features': 5,
 'num_hidden_neurons': 128,
 'num_hidden_layers': 5,
 'num_epochs': 300,
 'learning_rate': 0.001,
 'size_batch': 32,
 'momentum': 0.9}

# Update the config

Input/output paths and training-specific hyperparameters should be updated before traning

* Input/output paths
    * "input_csv_path": the path of the input csv file of photometric redshift
    * "output_pdf_path"
    * "model_path": the path of the bese model to be saved

* Training-specific hyperparameters
    * CO_ratio
    * weights: written in the format of [[range...], [weights...]]
    * train_ratio: the ratio of a training set -- a value in between 0 and 1
    * rebalance: perform rebalancing if set to True
    * rebalance_list: a list of ranges and weights to rebalance; written in the same format as weights
    * evaluation: perform base-evaluation if set to True
    * evaluation_ratio: the ratio of a base evaluation set -- a value in between 0 and 1

* Others
    * model_no: model number (not a hyperparameter) 

In [None]:
PDF_MC_config.update({
    "input_csv_path": PACKAGE_PATH / 'data/relz_LR.csv',
    "output_dir_path": PACKAGE_PATH / 'results/',
    "model_dir_path": PACKAGE_PATH / 'results/',

    # hyperparameters
    "CO_ratio": 0.25,
    "weights_list": [[1, 2], [2, 15]],
    "train_ratio": 0.999,

    # rebalance
    "rebalance": True,
    # "rebalance_list": [[1,2,3], [0.5, 0.5, 0.5]], # for hsc
    "rebalance_list": [[2,3,4], [0.5, 0.5, 0.5]], # for relz

    # base-evaluation
    "evaluation": True,
    "evaluation_ratio": 0.3,
    
    'model_no': 0
})

# Train a model

Result of training is printed out and saved in PDF and CSV during the `PDF_MC.save_results()` function call

"model_no" is incremented by 1 every loop of trianing, which is specified in the file names of a saved model and a result file, which can be found in `PZ_CO_ID/PZ_CO_ID/results`

In [None]:
for i in range(1): # change the number to train models for multiple times
    PDF_MC_config['model_no'] += 1

    PDF_MC_model = PDF_MC(**PDF_MC_config)
    
    data = PDF_MC_model.preprocess_data()
    dl = PDF_MC_model.build_dl(data)
    best_model = PDF_MC_model.train_model(dl)
    results = PDF_MC_model.evaluate_model(data, dl)
    PDF_MC_model.save_results(data, results)

Epoch [10/300], Loss: 1.663168787956

best model
Epoch [19/300], Loss: 0.844408035278

Epoch [20/300], Loss: 0.856896340847

best model
Epoch [22/300], Loss: 0.823677659035

best model
Epoch [23/300], Loss: 0.656186401844

Epoch [30/300], Loss: 0.923953235149

best model
Epoch [34/300], Loss: 0.557160913944

Epoch [40/300], Loss: 1.021317005157

Epoch [50/300], Loss: 0.706067442894

best model
Epoch [54/300], Loss: 0.548342347145

best model
Epoch [58/300], Loss: 0.481449127197

Epoch [60/300], Loss: 0.739801228046

best model
Epoch [63/300], Loss: 0.436113357544

Epoch [70/300], Loss: 0.606230854988

Epoch [80/300], Loss: 0.854492783546

best model
Epoch [88/300], Loss: 0.416743010283

Epoch [90/300], Loss: 0.670374095440

best model
Epoch [99/300], Loss: 0.395379841328

Epoch [100/300], Loss: 0.929262161255

Epoch [110/300], Loss: 0.848576188087

Epoch [120/300], Loss: 0.494164466858

best model
Epoch [129/300], Loss: 0.301378130913

Epoch [130/300], Loss: 0.800472795963

Epoch [140/