In [1]:
import torch
import torch.nn as nn 

class gps_encoder(nn.Module):
    
    def __init__(self):
        super().__init__()

        self.layer1 = nn.Sequential(
        nn.Conv1d(2, 20, 3, padding = 1),
        nn.ReLU(inplace=True)
        )

        self.layer2 = nn.Sequential(
        nn.Conv1d(20, 40, 3, padding = 1),
        nn.ReLU(inplace=True),
        nn.MaxPool1d(2,padding = 1)
        )

        self.layer3 = nn.Sequential(
        nn.Conv1d(40, 80, 3, padding = 1),
        nn.ReLU(inplace=True)
        )

        self.layer4 = nn.Sequential(
        nn.Conv1d(80, 40, 3, padding = 1),
        nn.ReLU(inplace=True),
        nn.MaxPool1d(2,padding = 1),
        nn.Flatten()
        )

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        return x

## lidar input: [bsz, 20, 20, 20]
class lidar_encoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.channel = 32
        self.dropProb = 0.3


        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels=20, out_channels=32, kernel_size = (3,3), padding = (1,1)),
            nn.ReLU(inplace = True)
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size = (3,3), padding = (1,1)),
            nn.ReLU(inplace = True),
        
            nn.Conv2d(64, 128, kernel_size = (3,3), padding = (1,1)),
            nn.ReLU(inplace = True),

            nn.Conv2d(128, 32, kernel_size = (3,3), padding = (1,1)),
            nn.ReLU(inplace = True)

        )

        self.maxpool = nn.Sequential(
            nn.MaxPool2d(kernel_size=(2, 3)),
            nn.Dropout(p = self.dropProb)
        )

        self.maxpool_ = nn.Sequential(
            nn.MaxPool2d(kernel_size=(1, 2)),
            nn.Dropout(p = self.dropProb)
        )

        self.flatten_layer = nn.Sequential(
            nn.Flatten()
        )



    def forward(self, x):
        a = self.layer1(x)
        x = a + self.layer2(a)
        x = self.maxpool(x) # b

        b = x
        x = self.layer2(x) + b
        x = self.maxpool(x) #c

        c = x 
        x = self.layer2(x) + c 
        x = self.maxpool_(x) #d

        d = x 
        x = self.layer2(x) + d 

        x = self.flatten_layer(x)

        return x


## image input: [bsz, 3, 112, 112]
class image_encoder(nn.Module):
    def __init__(self):
        super().__init__()

        self.channel = 32
        self.dropProb = 0.25


        self.layer0 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=self.channel, kernel_size = (7,7), padding = (1,1)),
            nn.ReLU(inplace = True)
        )

        self.layer1 = nn.Sequential(
            nn.Conv2d(self.channel, 32, kernel_size = (3,3), padding = (1,1)),
            nn.ReLU(inplace = True)
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size = (3,3), padding = (1,1)),
            nn.ReLU(inplace = True),
        
            nn.Conv2d(64, 128, kernel_size = (3,3), padding = (1,1)),
            nn.ReLU(inplace = True),

            nn.Conv2d(128, 64, kernel_size = (3,3), padding = (1,1)),
            nn.ReLU(inplace = True),

            nn.Conv2d(64, 32, kernel_size = (3,3), padding = (1,1)),
            nn.ReLU(inplace = True),

        )

        self.maxpool = nn.Sequential(
            nn.MaxPool2d(kernel_size=(6, 6)),
            nn.Dropout(p = self.dropProb)
        )

        self.maxpool_ = nn.Sequential(
            nn.MaxPool2d(kernel_size=(6, 6)),
            nn.Dropout(p = self.dropProb),
            nn.Flatten()
        )


    def forward(self, x):
        x = self.layer0(x)
        x = self.layer1(x)
        b = x 
        x = self.layer2(x) + b
        x = self.maxpool(x)
        c = x 
        x = self.layer2(x) + c 
        x = self.maxpool_(x)


        return x
    
              

class MySingleModel(nn.Module):

    def __init__(self, num_classes, modality):
        super().__init__()

        # print("DEBUG: modality is: ", modality)

        if modality == 'lidar':
            self.encoder = lidar_encoder()
            self.classifier = nn.Sequential(
                nn.Linear(160, num_classes),
                nn.Softmax()
                )
        elif modality == 'image':
            self.encoder = image_encoder()
            self.classifier = nn.Sequential(
                nn.Linear(288, num_classes),
                nn.Softmax()
                )        
        elif modality == 'gps':
            self.encoder = gps_encoder()
            self.classifier = nn.Sequential(
            nn.Linear(40, num_classes),
            nn.Softmax()
            )

    def forward(self, x):
        # print(x.shape)
        feature = self.encoder(x)
        output = self.classifier(feature)

        return output


class Encoder2_1(nn.Module):
    def __init__(self):
        super().__init__()

        self.encoder_1 = gps_encoder()
        self.encoder_2 = lidar_encoder()

    def forward(self, x1, x2):

        feature_1 = self.encoder_1(x1)
        feature_2 = self.encoder_2(x2)

        return feature_1, feature_2

class Encoder2_2(nn.Module):
    def __init__(self):
        super().__init__()

        self.encoder_1 = gps_encoder()
        self.encoder_2 = image_encoder()

    def forward(self, x1, x2):

        feature_1 = self.encoder_1(x1)
        feature_2 = self.encoder_2(x2)

        return feature_1, feature_2

class Encoder2_3(nn.Module):
    def __init__(self):
        super().__init__()

        self.encoder_1 = lidar_encoder()
        self.encoder_2 = image_encoder()

    def forward(self, x1, x2):

        feature_1 = self.encoder_1(x1)
        feature_2 = self.encoder_2(x2)

        return feature_1, feature_2


class My2Model(nn.Module):

    def __init__(self, num_classes, modality):
        super().__init__()

        # print("DEBUG: modality is: ", modality)

        if modality == 'g+l':
            self.encoder = Encoder2_1()
            self.classifier = nn.Sequential(
            nn.Linear(200, num_classes),
            nn.Softmax()
            )

        elif modality == 'g+i':
            self.encoder = Encoder2_2()
            self.classifier = nn.Sequential(
                nn.Linear(328, num_classes),
                nn.Softmax()
                ) 

        elif modality == 'l+i':
            self.encoder = Encoder2_3()
            self.classifier = nn.Sequential(
                nn.Linear(448, num_classes),
                nn.Softmax()
                )
     


    def forward(self, x1, x2):
        # print(x.shape)

        feature_1, feature_2 = self.encoder(x1, x2)

        feature = torch.cat((feature_1, feature_2), dim=1)
        output = self.classifier(feature)

        return output


class Encoder3(nn.Module):
    def __init__(self):
        super().__init__()

        self.encoder_1 = gps_encoder()
        self.encoder_2 = lidar_encoder()
        self.encoder_3 = image_encoder()

    def forward(self, x1, x2, x3):  
        feature_1 = self.encoder_1(x1)
        feature_2 = self.encoder_2(x2)
        feature_3 = self.encoder_3(x3)

        return feature_1, feature_2, feature_3


class My3Model(nn.Module):

    def __init__(self, num_classes):
        super().__init__()

        self.encoder = Encoder3()

        self.classifier = nn.Sequential(
        nn.Linear(488, num_classes),
        nn.Softmax(dim=1)
        )
     
    def forward(self, x1, x2, x3):
        feature_1, feature_2, feature_3 = self.encoder(x1, x2, x3)

        feature = torch.cat((feature_1, feature_2, feature_3), dim=1)
        output = self.classifier(feature)

        return output

In [2]:
import os
from torch.utils.data import DataLoader, Dataset
import numpy as np
import sys
import psutil
import os

# 获取当前进程的内存使用情况
def get_memory_usage():
    process = psutil.Process(os.getpid())
    mem = process.memory_info()
    return mem.rss  # 返回常驻内存集的大小（以字节为单位）
# def get_total_size(obj):
#     if isinstance(obj, (list, tuple)):
#         return sys.getsizeof(obj) + sum(get_total_size(i) for i in obj)
#     return sys.getsizeof(obj)

class rdata:
    def __init__(self, data_dir):
        data_list_1 = []
        data_list_2 = []
        data_list_3 = []
        
        labels_list = []
        print(111, get_memory_usage())
        for root, dirs, _ in os.walk(data_dir):
            for dir in dirs:
                d = os.path.join(root, dir)
                if self.count_files_in_directory(d) == 4:
                    # print(os.path.join(data_dir, d))
                    x_tr_1 = np.load(os.path.join(data_dir, d, 'gps.npz'))
                    x_tr_2 = np.load(os.path.join(data_dir, d, 'lidar.npz'))
                    x_tr_3 = np.load(os.path.join(data_dir, d, 'image.npz'))
                    y_tr = np.load(os.path.join(data_dir, d, 'rf.npz'))
                    print(222, get_memory_usage())
                    # print(get_memory_usage())
                    for i in range(len(x_tr_1['gps'])):
                        # print(get_memory_usage())
                        # print(len(x_tr_3['image'][i]))
                        # print(len(x_tr_3['image'][i][0]))
                        # print(len(x_tr_3['image'][i][0][0]))
                        
                        data_list_1.append(x_tr_1['gps'][i])
                        labels_list.append(y_tr['rf'][i])
                        data_list_2.append(x_tr_2['lidar'][i])
                        data_list_3.append(x_tr_3['image'][i])
                print(333, get_memory_usage())
            #         l += 1
            #     if l > 0:
            #         break;
            # if l > 0:
            #     break;
        print(len(data_list_1))
        
        self.data_1 = np.array(data_list_1)
        self.data_2 = np.array(data_list_2)
        self.data_3 = np.array(data_list_3)

        self.data_1 = self.data_1.astype("float")
        self.data_2 = self.data_2.astype("float")
        self.data_3 = self.data_3.astype("float")
        
        self.labels = np.array(labels_list)
        
    def count_files_in_directory(self, dir_path):
        file_count = sum(1 for item in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, item)))
        return file_count

class data_set(Dataset):
    def __init__(self, data_1, data_2, data_3, data_labels):
        super().__init__()
        self.data_1 = data_1
        self.data_2 = data_2
        self.data_3 = data_3
        self.labels = data_labels

    def __len__(self):
        return len(self.data_1)
    
    def __getitem__(self, index):  
        return self.data_1[index], self.data_2[index], self.data_3[index], self.labels[index]
    
class data_factory:
    def __init__(self, data_dir, config):
        self.rdata = rdata(data_dir)
        self.config = config
    def get_dataset(self):
        board_0 = round(len(self.rdata.data_1) * 0.7)
        board_1 = round(len(self.rdata.data_1) * 0.7)+round(len(self.rdata.data_1) * 0.15)
        
        train_data_1 = torch.tensor(self.rdata.data_1[:board_0])
        train_data_2 = torch.tensor(self.rdata.data_2[:board_0])
        train_data_3 = torch.tensor(self.rdata.data_3[:board_0])  
        train_labels = torch.tensor(self.rdata.labels[:board_0])
         
        test_data_1 = torch.tensor(self.rdata.data_1[board_0 : board_1])
        test_data_2 = torch.tensor(self.rdata.data_2[board_0 : board_1])
        test_data_3 = torch.tensor(self.rdata.data_3[board_0 : board_1])
        
        test_labels = torch.tensor(self.rdata.labels[board_0 : board_1])  
              
        valid_data_1 = torch.tensor(self.rdata.data_1[board_1:])
        valid_data_2 = torch.tensor(self.rdata.data_2[board_1:])
        valid_data_3 = torch.tensor(self.rdata.data_3[board_1:])
        
        valid_labels = torch.tensor(self.rdata.labels[board_1:])  
        datasets = [data_set(train_data_1, train_data_2, train_data_3, train_labels), data_set(test_data_1, test_data_2, test_data_3, test_labels), data_set(valid_data_1, valid_data_2, valid_data_3, valid_labels)]
        dataloaders = [DataLoader(datasets[0], shuffle=True, batch_size=self.config.batch_size), DataLoader(datasets[1], shuffle=True, batch_size=self.config.batch_size), DataLoader(datasets[2], batch_size=self.config.batch_size)]
        # return datasets, dataloaders
        return dataloaders


In [3]:
import math
import torch
import numpy as np
import torch.optim as optim
from __future__ import print_function

class TwoCropTransform:
    """Create two crops of the same image"""
    def __init__(self, transform):
        self.transform = transform

    def __call__(self, x):
        return [self.transform(x), self.transform(x)]

def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        # print(correct)

        res = []
        for k in topk:
            correct_k = correct[:k].contiguous().view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res
'''作用: 计算模型预测的准确率。支持计算多个 top-k 的准确率，比如 top-1 或 top-5。

output 是模型的输出，通常是未经处理的 logits。
target 是真实标签。
topk 指定需要计算的 k 个准确率，例如 topk=(1, 5) 会计算 top-1 和 top-5 准确率。
函数返回一个列表，包含每个 k 对应的准确率（以百分比表示）。'''


class train_tools:
    def __init__(self, model, config):
        self.model = model
        self.config = config
        self.set_optimizer()
        
    def set_optimizer(self):
        self.optimizer = optim.SGD(self.model.parameters(),
                        lr=self.config.learning_rate,
                        momentum=self.config.momentum,
                        weight_decay=self.config.weight_decay)

    
    def adjust_learning_rate(self, epoch):
        lr = self.config.learning_rate
        if self.config.cosine:
            eta_min = lr * (self.config.lr_decay_rate ** 3)
            lr = eta_min + (lr - eta_min) * (
                    1 + math.cos(math.pi * epoch / self.config.epochs)) / 2
        else:
            steps = np.sum(epoch > np.asarray(self.config.lr_decay_epochs))
            if steps > 0:
                lr = lr * (self.config.lr_decay_rate ** steps)

        for param_group in self.optimizer.param_groups:
            param_group['lr'] = lr


    def warmup_learning_rate(self, epoch, batch_id, total_batches):
        if self.config.warm and epoch <= self.config.warm_epochs:
            p = (batch_id + (epoch - 1) * total_batches) / \
                (self.config.warm_epochs * total_batches)
            lr = self.config.warmup_from + p * (self.config.warmup_to - self.config.warmup_from)

            for param_group in self.optimizer.param_groups:
                param_group['lr'] = lr


    def save_model(self, epoch, save_file):
        print('==> Saving...')
        state = {
            'opt': self.config,
            'model': self.model.state_dict(),
            'optimizer': self.optimizer.state_dict(),
            'epoch': epoch,
        }
        torch.save(state, save_file)
        del state


class AverageMeter:
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [4]:
import time

class Trainer:
    def __init__(self, config, train_loader, valid_loader, device):
        self.config = config
        self.device = device
        self.model = My3Model(self.config.num_classes).to(device)
        self.model.train()
        self.criterion = torch.nn.CrossEntropyLoss().to(device)
        self.train_tools = train_tools(self.model, config)
        self.train_loader = train_loader
        self.validater = Validater(self.model, valid_loader, config, self.criterion, device)
        self.best_acc = -100
    def every_epoch_train(self):
        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()
        
        end = time.time()
        for data_1, data_2, data_3, labels in self.train_loader:
            data_time.update(time.time() - end)
            bsz = data_1.shape[0]
  
            data_1 = data_1.to(self.device)
            data_2 = data_2.to(self.device)
            data_3 = data_3.to(self.device)

            labels = labels.to(self.device)
            
            output = self.model(data_1, data_2, data_3)
            loss = self.criterion(output, labels)

            acc, _ = accuracy(output, labels, topk=(1, 5))

            # update metric
            losses.update(loss.item(), bsz)
            top1.update(acc[0], bsz)

            # SGD
            self.train_tools.optimizer.zero_grad()
            loss.backward()
            self.train_tools.optimizer.step()

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
        print("loss: %f", loss.item())
        return losses.avg
    
    def train(self):
        record_loss = np.zeros(self.config.epochs)
        record_acc = np.zeros(self.config.epochs)
        for epoch in range(0, self.config.epochs + 1):
            self.train_tools.adjust_learning_rate(epoch)
            time1 = time.time()
            loss = self.every_epoch_train()
            time2 = time.time()
            print('epoch {}, total time {:.2f}'.format(epoch, time2 - time1))
            record_loss[epoch-1] = loss
            # evaluation
            loss, val_acc, _ = self.validater.validate()
            record_acc[epoch-1] = val_acc
            if val_acc > self.best_acc:
                self.best_acc = val_acc
                # best_confusion = confusion
            if self.best_acc > 65.01:
                self.train_tools.save_model(epoch, os.path.join(os.getcwd(), 'model/best.pth'))
                break;
        print(record_acc)
class Validater:
    def __init__(self, model, valid_loader, config, criterion, device):
        self.model = model
        self.config = config
        self.criterion = criterion
        self.valid_loader = valid_loader
        self.device = device
    def validate(self):
        self.model.eval()
        batch_time = AverageMeter()
        losses = AverageMeter()
        top1 = AverageMeter()

        confusion = np.zeros((self.config.num_classes, self.config.num_classes))

        with torch.no_grad():
            end = time.time()
            for data_1, data_2, data_3, labels in self.valid_loader:
                
                bsz = labels.shape[0]
                data_1 = data_1.to(self.device)
                data_2 = data_2.to(self.device)
                data_3 = data_3.to(self.device)
                
                labels = labels.to(self.device)
                # forward
                output = self.model(data_1, data_2, data_3)
                loss = self.criterion(output, labels)

                # update metric
                acc, _ = accuracy(output, labels, topk=(1, 5))
                losses.update(loss.item(), bsz)
                top1.update(acc[0], bsz)

                # calculate and store confusion matrix
                # rows = labels.cpu().numpy()
                # cols = output.max(1)[1].cpu().numpy()
                # for label_index in range(labels.shape[0][0]):
                #     confusion[rows[label_index], cols[label_index]] += 1

                # measure elapsed time
                batch_time.update(time.time() - end)
                end = time.time()

        return losses.avg, top1.avg, confusion

class Tester:
    def __init__(self, model, test_loader, device):
        self.model = model
        self.test_loader = test_loader
        self.device = device
        
    def test(self):
        self.model.eval()
        accs = AverageMeter()

        with torch.no_grad():
            for data_1, data_2, data_3, labels in self.test_loader:
                data_1 = data_1.to(self.device)
                data_2 = data_2.to(self.device)
                data_3 = data_3.to(self.device)
                
                labels = labels.to(self.device)
                output = self.model(data_1, data_2, data_3)
                acc, _ = accuracy(output, labels, topk=(1, 5))

                # calculate and store confusion matrix
                accs.update(acc, data_1.size(0))

        return accs.avg

In [5]:
import json

class Config:
    def __init__(self, config_path) -> None:
        self.config_path = config_path
        self.load_config()

    def load_config(self) -> None:
        with open(self.config_path, 'r') as f:
            config_data = json.load(f)

        self.print_freq = config_data.get('print_freq', 5)
        self.save_freq = config_data.get('save_freq', 20)
        self.batch_size = config_data.get('batch_size', 16)
        self.num_workers = config_data.get('num_workers', 16)
        self.epochs = config_data.get('epochs', 99)
        self.learning_rate = config_data.get('learning_rate', 0.001)
        self.lr_decay_epochs = config_data.get('lr_decay_epochs', '50,100,150')
        self.lr_decay_rate = config_data.get('lr_decay_rate', 0.9)
        self.weight_decay = config_data.get('weight_decay', 0.0001)
        self.momentum = config_data.get('momentum', 0.9)
        self.cosine = config_data.get('cosine', True)
        self.num_classes = config_data.get('num_classes', 12)

    def __repr__(self) -> str:
        return f"Config({self.__dict__})"

: 

In [None]:
# Copyright 2024 ichibanmikan
# 
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# 
#     https://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

if __name__ == "__main__":
    print(111, get_memory_usage())
    if torch.backends.mps.is_available():
        device = torch.device("mps")
    elif torch.cuda.is_available():
        device = torch.device("cuda")
    else:
        device = torch.device("cpu")
    config = Config(os.path.join(os.getcwd(), 'config.json'))
    print(222, get_memory_usage())
    data_f = data_factory(os.path.join(os.getcwd(), 'datasets'), config)
    train_loader, valid_loader, test_loader = data_f.get_dataset()
    tr = Trainer(config, train_loader, valid_loader, device)

    tr.train()
    print(tr.best_acc)
    te = Tester(tr.model, test_loader, device)
    acc = te.test()
    
    print(acc)
    

111 221659136
222 225247232
111 225247232
222 225263616
333 1185873920
222 1187725312
