In [1]:

import os
import torch
import torchvision
from torch import nn
import pandas as pd
from torch.utils.data import Dataset, DataLoader, random_split
from PIL import Image
import matplotlib.pyplot as plt
from torchvision import models

In [2]:
LOCAL_HOST = 1
CPU_TEST = 1
lr = 1e-3
wd = 1e-5
lr_period = 2
lr_decay = 0.9 # 学习率变成原本的0.9

In [3]:
# 数据路径
if LOCAL_HOST:
    train_csv = 'data/dog-breed-identification/labels.csv'
    test_csv = 'data/dog-breed-identification/sample_submission.csv'  
    train_images_dir = 'data/dog-breed-identification/train/'
    test_images_dir = 'data/dog-breed-identification/test/'
else:
    train_csv = '/kaggle/input/classify-leaves/labels.csv'
    test_csv = '/kaggle/input/classify-leaves/sample_submission.csv'  
    train_images_dir = '/kaggle/input/classify-leaves/train/'
    test_images_dir = '/kaggle/input/classify-leaves/test/'

In [4]:
train_data = pd.read_csv(train_csv)
categories = pd.unique(train_data['breed']).tolist()
categories.sort()
print(len(categories))

120


In [5]:
test_data = pd.read_csv(test_csv)

In [30]:
len(test_data)

10357

In [6]:
class dogDataset(Dataset):
    
    def __init__(self, root_dir, x, y):
        self.id_list = x
        self.breed = y
        self.root_dir = root_dir
        self.transform = torchvision.transforms.Compose([
            torchvision.transforms.RandomResizedCrop(224, scale=(0.2, 1.0),
                                             ratio=(3.0 / 4.0, 4.0 / 3.0)),
            torchvision.transforms.RandomHorizontalFlip(),
            torchvision.transforms.ColorJitter(brightness=0.3, contrast=0.3,
                                       saturation=0.3),
            torchvision.transforms.ToTensor(),
            torchvision.transforms.Normalize([0.485, 0.456, 0.406],
                                     [0.229, 0.224, 0.225])
        ])
        
    def __len__(self):
        return len(self.breed)

    def __getitem__(self, idx):
        image_name = self.root_dir + self.id_list[idx] + '.jpg'
        image = Image.open(image_name)
        return self.transform(image), self.breed[idx]

In [7]:
train_labels = train_data['breed']
train_labels = torch.tensor(pd.get_dummies(train_labels).astype('float32').values)

In [8]:
base_dataset = dogDataset(train_images_dir, train_data['id'].tolist(), train_labels)
train_size = int(0.8 * len(base_dataset))

val_size = len(base_dataset) - train_size
print(train_size, val_size)

train_subset, val_subset = random_split(base_dataset, [train_size, val_size])
batch_size = 128

train_loader = DataLoader(train_subset, batch_size=batch_size, shuffle=True,num_workers=4)
val_loader = DataLoader(val_subset, batch_size=batch_size, shuffle=True,num_workers=4)

8177 2045


In [14]:
test_dataset = dogDataset(test_images_dir, test_data['id'].tolist(), train_labels)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=True,num_workers=4)

In [10]:
class base_resnet(nn.Module):
    def __init__(self):

        super(base_resnet, self).__init__()
        self.model = models.resnet34(pretrained=True)

        self.model.fc = nn.Sequential(
            nn.Linear(512, 256), 
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Linear(256, 120)
        )
        
        # 将其他层的参数设置为不需要更新
        for param in self.model.parameters():
            param.requires_grad = False
        for param in self.model.fc.parameters():
            param.requires_grad = True
 
    def forward(self, x):
        x = self.model(x) 
        return x

In [11]:

def training(dataloader, model, loss_fn, optimizer, devices):
    
    model.train()

    loss = 0.0
    n = 0

    for X,y in dataloader:
        X,y = X.to(devices[0]), y.to(devices[0])
        output = model(X)

        cur_loss = loss_fn(output, y)
            
        optimizer.zero_grad()
        cur_loss.backward()
        optimizer.step()

        loss += cur_loss
        n += len(y)

    return loss / n

def val(dataloader, model, loss_fn, devices):
    
    model.eval()

    with torch.no_grad():

        loss = 0.0
        n = 0 

        for X,y in dataloader:

            X,y = X.to(devices[0]), y.to(devices[0])
            output = model(X)
            cur_loss = loss_fn(output, y)
            loss += cur_loss 
            n += len(y)
    return loss/n


In [12]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'cpu'
model = base_resnet().to(device)
loss_fn = nn.CrossEntropyLoss(reduction='sum')
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)


devices = [device]
if device != 'cpu' and torch.cuda.device_count() > 1:
    devices = list(range(torch.cuda.device_count()))    
    nn.DataParallel(model, device_ids=devices).to(devices[0])
    
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, lr_period, lr_decay)

epochs = 0
min_loss = 1e10
params = model.state_dict()

for epoch in range(epochs):

    loss = training(train_loader, model, loss_fn, optimizer, devices)
    print(f'epoch {epoch + 1}/{epochs}: loss = {loss} ')
    loss = val(val_loader, model, loss_fn, devices)
    print(f'Validation: loss = {loss} \n')


    if loss < min_loss:
        min_loss = loss
        params = model.state_dict()
        print('saved local best')
        
    scheduler.step()
    
        
print(min_loss)
model.load_state_dict(params)



10000000000.0


<All keys matched successfully>

In [24]:
fun = nn.Softmax()

data = []

cnt = 0
with torch.no_grad():
    for X,y in test_loader:

        X = X.to(devices[0])
        y_ = model(X)
        
        for pred in y_:
            pred = fun(pred)
            pred = pred.to('cpu').tolist()
            pred.insert(0,test_data['id'].iloc[cnt] )
            cnt+=1
            data.append(pred)
            # print(pred)

        if cnt > 20: break

In [25]:
categories.insert(0, 'id')


In [26]:
categories

['id',
 'affenpinscher',
 'afghan_hound',
 'african_hunting_dog',
 'airedale',
 'american_staffordshire_terrier',
 'appenzeller',
 'australian_terrier',
 'basenji',
 'basset',
 'beagle',
 'bedlington_terrier',
 'bernese_mountain_dog',
 'black-and-tan_coonhound',
 'blenheim_spaniel',
 'bloodhound',
 'bluetick',
 'border_collie',
 'border_terrier',
 'borzoi',
 'boston_bull',
 'bouvier_des_flandres',
 'boxer',
 'brabancon_griffon',
 'briard',
 'brittany_spaniel',
 'bull_mastiff',
 'cairn',
 'cardigan',
 'chesapeake_bay_retriever',
 'chihuahua',
 'chow',
 'clumber',
 'cocker_spaniel',
 'collie',
 'curly-coated_retriever',
 'dandie_dinmont',
 'dhole',
 'dingo',
 'doberman',
 'english_foxhound',
 'english_setter',
 'english_springer',
 'entlebucher',
 'eskimo_dog',
 'flat-coated_retriever',
 'french_bulldog',
 'german_shepherd',
 'german_short-haired_pointer',
 'giant_schnauzer',
 'golden_retriever',
 'gordon_setter',
 'great_dane',
 'great_pyrenees',
 'greater_swiss_mountain_dog',
 'groenen

In [27]:
import pandas as pd

df = pd.DataFrame(data, columns=categories)

In [29]:
df.to_csv("output.csv", index=False)