In [1]:
import torch
import torch.nn as nn
import torchvision

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time

from torch.utils.data import Dataset, DataLoader
# from torch.utils.data.sampler import Sampler
import torch.optim as optim
import sys
sys.path.append('../')

from dataset import LbpDataset, train_transforms, val_transforms, test_transforms, collate_fn, get_data
from visualize import visualize
from model import fasterrcnn_resnet201_fpn, FastRCNNPredictor
from engine import evaluate
import utils
from train_lbp import get_train_test_list

In [2]:
from torchvision.models.detection.faster_rcnn import fasterrcnn_mobilenet_v3_large_fpn

In [3]:
# model = fasterrcnn_mobilenet_v3_large_fpn(pretrained=False, min_size=1600, max_size=1600)

In [4]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [5]:
import easydict 
args = easydict.EasyDict({ "batch_size": 12, 
                          "epochs": 50, 
                          "data": 0, 
                          'lr':0.1,
                         'momentum':0.9,
                         'weight_decay':1e-4,
                         'start_epoch':0,
                         'gpu':5,
                          'workers':6,
                         'output_dir' :'../trained_models/mobilenet-rcnn/'})

In [6]:
df = pd.read_csv('../../data/df.csv')
df.head()
# Data loading code
data_dir = '../../data/df.csv'
train_list, test_list = get_train_test_list(data_dir)
train_dataset = LbpDataset(train_list, transform=train_transforms)
test_dataset = LbpDataset(test_list, transform=val_transforms)  

total 4019 train 3014 test 1005
3014
1005


In [7]:
train_sampler = torch.utils.data.RandomSampler(train_dataset)
test_sampler = torch.utils.data.SequentialSampler(test_dataset)

train_loader = DataLoader(
    train_dataset, batch_size=args.batch_size,
    sampler=train_sampler, num_workers=args.workers,
    collate_fn=utils.collate_fn)

test_loader = DataLoader(
    test_dataset, batch_size=args.batch_size,
    sampler=test_sampler, num_workers=args.workers,
    collate_fn=utils.collate_fn)

In [8]:
num_classes = 2
model = fasterrcnn_mobilenet_v3_large_fpn(pretrained=True, min_size=2048, max_size=2048)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
device = torch.device('cuda')
model.to(device)
print('model is loaded to gpu')

model is loaded to gpu


In [9]:
params = [p for p in model.parameters() if p.requires_grad]
# optimizer = torch.optim.Adam(params, lr=0.0005)
optimizer = torch.optim.SGD(
       params, lr=0.001, momentum=0.9, weight_decay=1e-4)
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[20, 40, 60, 80, 100], 
                                                    gamma=0.5)

In [10]:
# checkpoint = torch.load('../trained_model/model.pt')
# model.load_state_dict(checkpoint['model'])
# optimizer.load_state_dict(checkpoint['optimizer'])
# lr_scheduler.load_state_dict(checkpoint['lr_scheduler'])
# epoch = checkpoint['epoch']
# print(epoch)

In [11]:
# evaluate(model, test_loader, device=device) 
# evaluate(model, test_loader, device=device)  
# checkpoint = torch.load(args.output_dir + 'checkpoint.pth')
# state_dict = checkpoint['model']
# model.load_state_dict(state_dict)

In [None]:
from train_lbp import train_one_epoch

start_time = time.time()
for epoch in range(120):
    train_one_epoch(model, optimizer, train_loader, device, epoch, 100)
    lr_scheduler.step()
    
    if epoch > 20 and epoch % 5 == 0 :
        if args.output_dir:
            checkpoint = {
                'model': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'args': args,
                'epoch': epoch
            }
            utils.save_on_master(
                checkpoint,
                os.path.join(args.output_dir, 'model_{}.pth'.format(epoch)))
            utils.save_on_master(
                checkpoint,
                os.path.join(args.output_dir, 'checkpoint.pth'))

        # evaluate after every epoch
        evaluate(model, test_loader, device=device)    
print('total time is {}'.format(time.time() - start_time))    

Epoch: [0]  [  0/252]  eta: 0:29:54  lr: 0.001000  loss: 2.4119 (2.4119)  loss_classifier: 0.4932 (0.4932)  loss_box_reg: 1.5796 (1.5796)  loss_objectness: 0.3243 (0.3243)  loss_rpn_box_reg: 0.0147 (0.0147)  time: 7.1225  data: 5.4570  max mem: 13928
Epoch: [0]  [100/252]  eta: 0:02:36  lr: 0.001000  loss: 0.1793 (0.6168)  loss_classifier: 0.0383 (0.1476)  loss_box_reg: 0.0164 (0.0780)  loss_objectness: 0.0868 (0.3427)  loss_rpn_box_reg: 0.0084 (0.0485)  time: 0.9814  data: 0.1082  max mem: 14391
Epoch: [0]  [200/252]  eta: 0:00:52  lr: 0.001000  loss: 0.1920 (0.4110)  loss_classifier: 0.0448 (0.1005)  loss_box_reg: 0.0220 (0.0526)  loss_objectness: 0.0836 (0.2195)  loss_rpn_box_reg: 0.0335 (0.0384)  time: 0.9817  data: 0.1064  max mem: 14391
Epoch: [0]  [251/252]  eta: 0:00:00  lr: 0.001000  loss: 0.1826 (0.3702)  loss_classifier: 0.0482 (0.0911)  loss_box_reg: 0.0232 (0.0476)  loss_objectness: 0.0841 (0.1943)  loss_rpn_box_reg: 0.0315 (0.0372)  time: 0.9163  data: 0.0968  max mem: 14

In [None]:
evaluate(model, test_loader, device=device) 