In [30]:
from transformers import Trainer, TrainingArguments
import torch
from torch import nn
from torch.utils.data import Dataset
import numpy as np
import math


In [31]:
import numpy as np

data1 = np.loadtxt("ENDF_B-VIII.1_LI-6(N,T)HE-4.txt")
data2 = np.loadtxt("ENDF_B-VIII.1_AU-197(N,G)AU-198.yaml")



In [32]:
print(data1.shape)
print(data2.shape)

(395, 2)
(20330, 2)


In [54]:
from datasets import Dataset
dataset = Dataset.from_dict({
    "input": data2[:, 0].astype(np.float32).tolist(),
    "labels": data2[:, 1].astype(np.float32).tolist()
})
ds_split = dataset.train_test_split(test_size=0.1, seed=42)
train_dataset = ds_split["train"]
eval_dataset = ds_split["test"]

In [55]:
ds_split

DatasetDict({
    train: Dataset({
        features: ['input', 'labels'],
        num_rows: 18297
    })
    test: Dataset({
        features: ['input', 'labels'],
        num_rows: 2033
    })
})

In [56]:
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [57]:
class NeuralNetwork(nn.Module):
    def __init__(self,input_size=1,output_size=1):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.Tanh(),nn.Linear(512, 512),
            nn.Tanh(),nn.Linear(512, 512),
            nn.Tanh(),nn.Linear(512, 512),
            nn.Tanh(),nn.Linear(512, 512),
            nn.Tanh(),nn.Linear(512, 512),
            nn.Tanh(),nn.Linear(512, 512),
            nn.Tanh(),
            nn.Linear(512, output_size,bias=False),
        )
        self.loss_fn = nn.MSELoss()

    def forward(self, input: torch.Tensor, labels: torch.Tensor = None):
        logits = self.linear_relu_stack(input)
        if labels is not None:
            loss = self.loss_fn(logits, labels)
            return {"loss": loss, "logits": logits}
        return {"logits": logits}
        
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=1, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): Tanh()
    (4): Linear(in_features=512, out_features=512, bias=True)
    (5): Tanh()
    (6): Linear(in_features=512, out_features=512, bias=True)
    (7): Tanh()
    (8): Linear(in_features=512, out_features=512, bias=True)
    (9): Tanh()
    (10): Linear(in_features=512, out_features=512, bias=True)
    (11): Tanh()
    (12): Linear(in_features=512, out_features=512, bias=True)
    (13): Tanh()
    (14): Linear(in_features=512, out_features=512, bias=True)
    (15): Tanh()
    (16): Linear(in_features=512, out_features=1, bias=False)
  )
  (loss_fn): MSELoss()
)


In [58]:
def compute_metrics(eval_pred):
    preds = eval_pred.predictions
    if isinstance(preds, tuple):
        preds = preds[0]
    preds = np.array(preds).reshape(-1)
    labels = eval_pred.label_ids.reshape(-1)

    mse  = float(np.mean((preds - labels) ** 2))
    rmse = float(math.sqrt(mse))
    mae  = float(np.mean(np.abs(preds - labels)))
    ss_res = float(np.sum((labels - preds) ** 2))
    ss_tot = float(np.sum((labels - np.mean(labels)) ** 2))
    r2 = 1.0 - ss_res / ss_tot if ss_tot > 0 else 0.0
    return {"mse": mse, "rmse": rmse, "mae": mae, "r2": r2}


def collate_scalar_to_column(batch):
    inputs = torch.tensor([ex["input"] for ex in batch], dtype=torch.float32).unsqueeze(-1)
    labels = torch.tensor([ex["labels"] for ex in batch], dtype=torch.float32).unsqueeze(-1)
    return {"input": inputs, "labels": labels}


In [59]:
training_args = TrainingArguments(
            output_dir='./results',
            learning_rate=1e-4,
            per_device_train_batch_size=4,  
            per_device_eval_batch_size=4,
            max_steps=10000,  # Replace with your desired number of steps
            weight_decay=0.02,
            eval_strategy='steps', 
            eval_steps=1000,  #the save step should be a multiple of eval step, savestep=500 by default
            lr_scheduler_type="cosine",
            warmup_ratio=0.1    

        )
# ---------------- 7) Trainer ----------------
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=collate_scalar_to_column
)

In [60]:
trainer.train()

Step,Training Loss,Validation Loss,Mse,Rmse,Mae,R2
1000,366358.272,197843.578125,197843.578125,444.796108,56.819332,-0.009396
2000,1502514.56,197791.796875,197791.765625,444.737862,57.095562,-0.009131
3000,497659.456,197753.515625,197753.515625,444.694857,57.271534,-0.008936
4000,2141834.752,197703.234375,197703.265625,444.638354,57.390121,-0.00868
5000,243753.376,197708.6875,197708.703125,444.644468,57.340412,-0.008708
6000,838428.864,197730.75,197730.75,444.669259,57.202141,-0.00882
7000,1657979.648,197738.359375,197738.375,444.677833,57.191605,-0.008859
8000,1115418.88,197717.625,197717.65625,444.654536,57.296505,-0.008753
9000,907720.704,197725.28125,197725.28125,444.66311,57.251106,-0.008792
10000,962070.464,197727.703125,197727.671875,444.665798,57.238884,-0.008804




TrainOutput(global_step=10000, training_loss=858406.0244, metrics={'train_runtime': 249.0441, 'train_samples_per_second': 160.614, 'train_steps_per_second': 40.154, 'total_flos': 0.0, 'train_loss': 858406.0244, 'epoch': 2.185792349726776})