In [55]:
import numpy as np
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from models.other_models import MLP_SOH, RandomForestSOH, LightGBMSOH
from evaluator import evaluate
from sklearn.metrics import mean_absolute_error, mean_squared_error
import lightgbm as lgb

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)


def train_and_evaluate(model, train_loader, target_loader, combined_training, device, model_type='mlp', num_epochs=100, lr=1e-3):
    if model_type == 'mlp':
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

        target_data_iter = iter(target_loader)
        target_batch = next(target_data_iter)
        src_tar = target_batch[0].to(device)
        labels_tar = target_batch[1].to(device)

        for epoch in range(num_epochs):
            model.train()
            epoch_loss = 0
            for i, batch in enumerate(train_loader):

                if combined_training:
                    src = batch[0].to(device)
                    src = torch.cat((src, src_tar), dim=0)
                    labels = batch[1].to(device)
                    labels = torch.cat((labels, labels_tar), dim=0)
                    optimizer.zero_grad()
                    outputs = model(src).squeeze()
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    epoch_loss += loss.item()
                else:
                    src = batch[0].to(device)
                    labels = batch[1].to(device)
                    optimizer.zero_grad()
                    outputs = model(src).squeeze()
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    epoch_loss += loss.item()
            epoch_loss /= len(train_loader)
            print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss:.8f}')

        model.eval()
        test_labels = []
        test_predictions = []
        with torch.no_grad():
            for i, batch in enumerate(target_loader):
                src = batch[0].to(device)
                labels = batch[1].to(device)
                outputs = model(src).squeeze()
                test_labels.extend(labels.cpu().numpy())
                test_predictions.extend(outputs.cpu().numpy())
        
        
    elif model_type == 'random_forest':
        train_data = np.concatenate([batch[0].cpu().numpy().reshape(batch[0].size(0), -1) for batch in train_loader])
        train_labels = np.concatenate([batch[1].cpu().numpy() for batch in train_loader])

        model.fit(train_data, train_labels)

        test_data = np.concatenate([batch[0].cpu().numpy().reshape(batch[0].size(0), -1) for batch in target_loader])
        test_labels = np.concatenate([batch[1].cpu().numpy() for batch in target_loader])
        test_predictions = model.predict(test_data)

    elif model_type == 'lightgbm':
        train_data = np.concatenate([batch[0].cpu().numpy().reshape(batch[0].size(0), -1) for batch in train_loader])
        train_labels = np.concatenate([batch[1].cpu().numpy() for batch in train_loader])

        train_dataset = lgb.Dataset(train_data, train_labels)
        params = {
            'objective': 'regression',
            'metric': 'rmse',
            'boosting_type': 'gbdt'
        }
        model = lgb.train(params, train_dataset, num_boost_round=100)

        test_data = np.concatenate([batch[0].cpu().numpy().reshape(batch[0].size(0), -1) for batch in target_loader])
        test_labels = np.concatenate([batch[1].cpu().numpy() for batch in target_loader])
        test_predictions = model.predict(test_data, num_iteration=model.best_iteration)
 
    test_labels = np.array(test_labels)
    test_predictions = np.array(test_predictions)
    rmse = np.sqrt(mean_squared_error(test_labels, test_predictions))
    mae = mean_absolute_error(test_labels, test_predictions)
    print(f'Test RMSE: {rmse:.4f}')
    print(f'Test MAE: {mae:.4f}')
    return rmse, mae
        

In [89]:
from data_provider import data_provider

if __name__ == "__main__":
    # Generate some dummy data
    np.random.seed(42)
    source_set_train, source_loader_train = data_provider(name = 'GOTION', shuffle_flag=True, batch_size=10, flag = 'train')
    source_set_test, source_loader_test = data_provider(name = 'GOTION', shuffle_flag=False, batch_size=10, flag = 'test')
    target_set_train, target_loader_train = data_provider(name = 'SANYO', shuffle_flag=False, batch_size=10, flag = 'train')
    target_set_test, target_loader_test = data_provider(name = 'SANYO', shuffle_flag=False, batch_size=10, flag = 'test')

    # Train MLP model
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)
    mlp_model = MLP_SOH(input_dim=1, hidden_dim=64, num_layers=2).to(device)
    train_and_evaluate(mlp_model, source_loader_train, target_loader_train, combined_training=False,  device=device, model_type='mlp', num_epochs=10,  lr=1e-3)

    # # Train Random Forest model
    # rf_model = RandomForestSOH(n_estimators=100, random_state=42)
    # train_and_evaluate(rf_model, X, y, model_type='rf')

    # # Train LightGBM model
    # lgb_model = LightGBMSOH(num_leaves=31, learning_rate=0.05, n_estimators=100)
    # train_and_evaluate(lgb_model, X, y, model_type='lgb')


GOTION train 2840
GOTION test 1419
SANYO train 404
SANYO test 8
cuda
Epoch 1/10, Loss: 0.02863636
Epoch 2/10, Loss: 0.00230913
Epoch 3/10, Loss: 0.00211875
Epoch 4/10, Loss: 0.00175806
Epoch 5/10, Loss: 0.00136597
Epoch 6/10, Loss: 0.00087639
Epoch 7/10, Loss: 0.00043379
Epoch 8/10, Loss: 0.00016542
Epoch 9/10, Loss: 0.00005383
Epoch 10/10, Loss: 0.00002177
Test RMSE: 0.0610
Test MAE: 0.0593


In [57]:
count_parameters(mlp_model)

4353

In [85]:
rf_model = RandomForestSOH(n_estimators=100)
print("Training Random Forest...")
train_and_evaluate(rf_model, source_loader_train, target_loader_test, combined_training=False, device=device, model_type='random_forest')


Training Random Forest...
Test RMSE: 0.0787
Test MAE: 0.0673


(0.07869677744070395, 0.06732322864234447)

In [86]:
# Define LightGBM model
lgb_model = None  # Placeholder
print("Training LightGBM...")
train_and_evaluate(lgb_model, source_loader_train, source_loader_test, combined_training=False, device=device, model_type='lightgbm')

Training LightGBM...
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001079 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 12240
[LightGBM] [Info] Number of data points in the train set: 2840, number of used features: 48
[LightGBM] [Info] Start training from score 0.875610
Test RMSE: 0.0631
Test MAE: 0.0562


(0.06311573870314201, 0.05624137769777611)

In [63]:
import lightgbm as lgb

def count_lgb_tree_params(tree_structure):
    """
    Recursively count the number of nodes in a tree.
    Each node in the tree represents a parameter.
    """
    if isinstance(tree_structure, dict):
        num_nodes = 1  # Count the current node
        num_nodes += count_lgb_tree_params(tree_structure.get('left_child', {}))
        num_nodes += count_lgb_tree_params(tree_structure.get('right_child', {}))
        return num_nodes
    return 0

def count_lgb_params(lgb_model):
    """
    Count the total number of parameters in a LightGBM model.
    """
    booster = lgb_model.booster_
    model_info = booster.dump_model()
    total_params = 0
    num_trees = len(model_info['tree_info'])
    for tree in model_info['tree_info']:
        total_params += count_lgb_tree_params(tree['tree_structure'])
    return total_params


# Calculate parameters
params_count = count_lgb_params(lgb_model)
print(f"LightGBM model parameter count: {params_count}")


AttributeError: 'NoneType' object has no attribute 'booster_'