In [None]:
"""
This example is uses the official
huggingface transformers `hyperparameter_search` API.
"""
import os

import ray
from ray import tune
from ray.tune import CLIReporter
from ray.tune.schedulers import PopulationBasedTraining
from transformers import AutoConfig, \
    AutoModelForSequenceClassification, AutoTokenizer, Trainer, GlueDataset, \
    GlueDataTrainingArguments, TrainingArguments

from constant import *
from entity_prep import *
from metrics import * 


def tune_transformer(num_samples=8, gpus_per_trial=0, smoke_test=False):
    data_dir_name = "./hp_search" if not smoke_test else "./hp_search_test"
    data_dir = os.path.abspath(os.path.join(os.getcwd(), data_dir_name))
    cache_dir = './cache'
    if not os.path.exists(data_dir):
        os.mkdir(data_dir, 0o755)

    # Change these as needed.
    model_name = "klue/roberta-large" if not smoke_test \
        else "klue/roberta-small"
    task_name = "re"

    task_data_dir = os.path.join(data_dir, task_name.upper())

    num_labels = len(CLASS_NAMES)

    config = AutoConfig.from_pretrained(
        model_name, num_labels=num_labels)

    # setting model hyperparameter
    model_config =  AutoConfig.from_pretrained(MODEL_NAME)
    model_config.num_labels = 30
    model_config.cache_dir = cache_dir
    model_config.id2label = IDX2LABEL
    model_config.label2id = LABEL2IDX

    # Download and cache tokenizer, model, and features
    print("Downloading and caching Tokenizer")
    tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
    tokenizer.add_special_tokens(
    {"additional_special_tokens": list(markers.values())}
    )
    convert_example_to_features = partial(
    convert_example_to_features,
    tokenizer=tokenizer,
    )
    
    # Triggers tokenizer download to cache
    print("Downloading and caching pre-trained model")
    AutoModelForSequenceClassification.from_pretrained(
        model_name,
        config=config,
        cache_dir=cache_dir
    )

    def get_model():
        return AutoModelForSequenceClassification.from_pretrained(
            model_name,
            config=config,
            cache_dir=cache_dir
        )

    # Download data.
    # klue_re = datasets.load_dataset("jinmang2/load_klue_re",script_version="v1.0.1", cache_dir='cache')
#########
    dataset = pd.read_csv("../dataset/train/train.csv")
    label = label_to_num(dataset['label'].values)

    dataset['id'] = dataset['id'].astype(str)
    dataset['subject_entity'] = dataset['subject_entity'].apply(lambda x : eval(x))
    dataset['object_entity'] = dataset['object_entity'].apply(lambda x : eval(x))
    dataset['label'] = dataset['label'].map(label2id)

    with open('train_eval_idx.pkl', 'rb') as f:
        train_eval_idx = pickle.load(f)

    train_dataset = dataset.iloc[train_eval_idx['train']]
    dev_dataset = dataset.iloc[train_eval_idx['eval']]

    train_label = np.array(label)[train_eval_idx['train']]
    dev_label = np.array(label)[train_eval_idx['eval']]
    
    from datasets import Dataset, DatasetDict, Features, Value, ClassLabel
    class_labels = ClassLabel(num_classes=30, names=class_names)

    hf_train_dataset = Dataset.from_pandas(train_dataset)
    hf_dev_dataset = Dataset.from_pandas(dev_dataset)

    hf_train_dataset = hf_train_dataset.remove_columns('__index_level_0__')
    hf_dev_dataset = hf_dev_dataset.remove_columns('__index_level_0__')

    klue_re = DatasetDict({'train' : hf_train_dataset, 'dev' : hf_dev_dataset})

    klue_re['train'].features['label'] = ClassLabel(num_classes=30, names=class_names)
    klue_re['dev'].features['label'] = ClassLabel(num_classes=30, names=class_names)
#########
    
    examples = klue_re.map(mark_entity_spans)
    tokenized_datasets = examples.map(convert_example_to_features)

# 사용한 option 외에도 다양한 option들이 있습니다.
# https://huggingface.co/transformers/main_classes/trainer.html#trainingarguments 참고해주세요.
    training_args = TrainingArguments(
                # Checkpoint
                output_dir=".",
                save_strategy="epoch",
                # Run
                do_train=True,
                do_eval=True,
                # Training
                num_train_epochs=4,            
                max_steps=-1,
                learning_rate=5e-5, # config
                per_device_train_batch_size=32, # config
                per_device_eval_batch_size=32,  # config
                ## Learning rate scheduling
    #             lr_scheduler_type = 'cosine',
                warmup_steps=0,
    #             warmup_ratio = 0.2,
                ## Regularization
                weight_decay=0.01, # config
                # Logging
                logging_dir='./logs',
                report_to ="none",
                # Evaluation
                metric_for_best_model = 'auprc',
                evaluation_strategy='epoch',
                eval_steps = 500,           
                # ETC    
                load_best_model_at_end = True,
                seed = 42,        
                skip_memory_metrics=True,
                # GPU
                fp16 = True,
                no_cuda=gpus_per_trial <= 0,
                )

    trainer = Trainer(
        model_init=get_model,
        args=training_args,
        train_dataset=tokenized_datasets['train'],
        eval_dataset=tokenized_datasets['dev'],
        compute_metrics=build_compute_metrics_fn())

    tune_config = {
        "per_device_train_batch_size": 32,
        "per_device_eval_batch_size": 32,
        "num_train_epochs": tune.choice([2, 3, 4, 5]),
        "max_steps": 1 if smoke_test else -1,  # Used for smoke test.
    }

    scheduler = PopulationBasedTraining(
        time_attr="training_iteration",
        metric="eval_acc",
        mode="max",
        perturbation_interval=1,
        hyperparam_mutations={
            "weight_decay": tune.uniform(0.0, 0.3),
            "learning_rate": tune.uniform(1e-5, 5e-5),
            "per_device_train_batch_size": [16, 32, 64],
        })

    reporter = CLIReporter(
        parameter_columns={
            "weight_decay": "w_decay",
            "learning_rate": "lr",
            "per_device_train_batch_size": "train_bs/gpu",
            "num_train_epochs": "num_epochs"
        },
        metric_columns=[
            "eval_acc", "eval_loss", "epoch", "training_iteration"
        ])

    trainer.hyperparameter_search(
        hp_space=lambda _: tune_config,
        backend="ray",
        n_trials=num_samples,
        resources_per_trial={
            "cpu": 1,
            "gpu": gpus_per_trial
        },
        scheduler=scheduler,
        keep_checkpoints_num=1,
        checkpoint_score_attr="training_iteration",
        stop={"training_iteration": 1} if smoke_test else None,
        progress_reporter=reporter,
        local_dir="~/ray_results/",
        name="tune_transformer_pbt",
        log_to_file=True)


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--smoke-test", action="store_true", help="Finish quickly for testing")
    parser.add_argument(
        "--ray-address",
        type=str,
        default=None,
        help="Address to use for Ray. "
        "Use \"auto\" for cluster. "
        "Defaults to None for local.")
    parser.add_argument(
        "--server-address",
        type=str,
        default=None,
        required=False,
        help="The address of server to connect to if using "
        "Ray Client.")

    args, _ = parser.parse_known_args()

    if args.smoke_test:
        ray.init()
    elif args.server_address:
        ray.init(f"ray://{args.server_address}")
    else:
        ray.init(args.ray_address)

    if args.smoke_test:
        tune_transformer(num_samples=1, gpus_per_trial=0, smoke_test=True)
    else:
        # You can change the number of GPUs here:
        tune_transformer(num_samples=8, gpus_per_trial=1)