### main_pretrain

In [2]:
from main_pretrains.main_gpt2 import main_pretrain
from models.llama import LlamaSOH
from data_provider import data_provider
import torch
import torch.optim as optim
import random
import numpy as np

source_set_train, source_loader_train = data_provider(name = 'GOTION', shuffle_flag=True, batch_size=10, flag = 'train')
source_set_test, source_loader_test = data_provider(name = 'GOTION', shuffle_flag=False, batch_size=10, flag = 'test')

# Model parameters
input_dim = 1
llama_model_path = './llms/llama'

# Training parameters
mask_ratio = 0.3
num_epochs = 15
LR = 1e-3

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
f_g = LlamaSOH(input_dim, llama_model_path, PPA=True, soft_prompt_len=10).to(device)
optimizer = optim.AdamW(filter(lambda p: p.requires_grad, f_g.parameters()), lr=LR)
set_seed(0)
# Assuming train_loader and test_loader are already defined
main_pretrain(train_loader= source_loader_train, test_loader= source_loader_test, model=f_g, optimizer=optimizer, device=device, mask_ratio=mask_ratio, num_epochs=num_epochs)
torch.save(f_g.state_dict(), 'saved_models/llama/f_g.pt')

GOTION train 2840
GOTION test 1419
cuda


RuntimeError: [enforce fail at ..\c10\core\impl\alloc_cpu.cpp:72] data. DefaultCPUAllocator: not enough memory: you tried to allocate 180355072 bytes.

### prob (PG-SSL)

In [None]:
from main_pretrains.main_gpt2_prob import main_prob
import torch.nn as nn

target_set_train, target_loader_train = data_provider(name = 'CALCE', shuffle_flag=False, batch_size=10, flag = 'train')
target_set_test, target_loader_test = data_provider(name = 'CALCE', shuffle_flag=False, batch_size=10, flag = 'test')

LR = 1e-3
EPOCH = 3

f_g = GPT2SOH(input_dim, gpt2_model_path, PPA=True, soft_prompt_len=10).to(device)
f_g.load_state_dict(torch.load("saved_models/gpt2+ppa/f_g.pt"))

regressor = nn.Linear(f_g.input_linear.out_features, 1).to(device)
optimizer = optim.AdamW(regressor.parameters(), lr=LR)

set_seed(2)
main_prob(source_loader_train, target_loader_train, combined_training=True, model=f_g, regressor=regressor, optimizer=optimizer, device=device, num_epochs=EPOCH)
torch.save(regressor.state_dict(), 'saved_models/gpt2+ppa/regressor.pt')

In [None]:
from drawings import drawDegradation
drawDegradation(target_loader_test=target_loader_test, model=f_g, regressor=regressor, device = device, model_name = 'gpt2')

### tta (PPA)

In [None]:
from main_pretrains.main_gpt2_tta import main_tta
from evaluator import evaluate

f_g = GPT2SOH(input_dim, gpt2_model_path, PPA=True, soft_prompt_len=10).to(device)
f_g.load_state_dict(torch.load("saved_models/gpt2+ppa/f_g.pt"))
regressor = nn.Linear(f_g.input_linear.out_features, 1).to(device)
regressor.load_state_dict(torch.load("saved_models/gpt2+ppa/regressor.pt"))

mae, rmse = evaluate(model = f_g, regressor=regressor, target_loader=target_loader_test, device=device, model_name='gpt2')
print(f"(Before TTA) MAE: {mae}, RMSE: {rmse}")

# 冻结GPT-2模型的所有参数
# for param in f_g.gpt2.parameters():
#     param.requires_grad = False


for name, param in f_g.gpt2.named_parameters():
    if not any(layer in name.lower() for layer in ['ln', 'wpe', 'wte']):
        param.requires_grad = False

# Print trainable and frozen parameters
print("Trainable parameters:")
trainable_params_count = 0
for name, param in f_g.named_parameters():
    if name =='soft_prompt':
        print('PPA params:', param.numel())
    if param.requires_grad:
        print(name)
        trainable_params_count += param.numel()

print("\nFrozen parameters:")
frozen_params_count = 0
for name, param in f_g.named_parameters():
    if not param.requires_grad:
        print(name)
        frozen_params_count += param.numel()

print(f"\nNumber of trainable parameters: {trainable_params_count}")
print(f"Total number of parameters: {trainable_params_count + frozen_params_count}")
print(f"Number of frozen parameters: {frozen_params_count}")


In [None]:
#b optimizer = optim.SGD(filter(lambda p: p.requires_grad, f_g.parameters()), lr=1e-2)

optimizer = optim.SGD([f_g.soft_prompt], lr=1e-1, momentum=0.9)
set_seed(2)
mae, rmse = evaluate(model = f_g, regressor=regressor, target_loader=target_loader_test, device=device, model_name='gpt2')
print(f"(Before TTA) MAE: {mae}, RMSE: {rmse}")

main_tta(target_loader=target_loader_test, model=f_g, regressor=regressor, optimizer=optimizer, device=device, mask_ratio=0.9, num_epochs=10)

mae, rmse = evaluate(model = f_g, regressor=regressor, target_loader=target_loader_test, device=device, model_name='gpt2')
print(f"(After TTA) MAE: {mae}, RMSE: {rmse}")