In [7]:
%env JOBLIB_TEMP_FOLDER=/tmp
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import models
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import os
import cv2
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import time


class ImgDataset(Dataset):
    def __init__(self, x, y=None, transform=None):
        self.x = x
        # label is required to be a LongTensor
        self.y = y
        if y is not None:
            self.y = torch.LongTensor(y)
        self.transform = transform
    def __len__(self):
        return len(self.x)
    def __getitem__(self, index):
        X = self.x[index]
        if self.transform is not None:
            X = self.transform(X)
        if self.y is not None:
            Y = self.y[index]
            return X, Y
        else:
            return X


env: JOBLIB_TEMP_FOLDER=/tmp


In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [None]:
# # Read Files
# df = pd.read_csv('/kaggle/input/shopee-product-detection-open/train.csv',encoding="utf-8", dtype=str, nrows=20000)	
# x = np.zeros((len(df), 128, 128, 3),dtype=np.uint8)
# y = np.zeros((len(df)),dtype=np.uint8)
# filename = df['filename']
# category = df['category']
# for i in range(len(df)):
#     if len(category[i]) == 1:
#         category[i] = '0'+category[i]
#     if i % 100 == 0:
#         print(i, '/kaggle/input/shopee-product-detection-open/train/train/train/'+category[i]+'/'+filename[i])
#     img = cv2.imread('/kaggle/input/shopee-product-detection-open/train/train/train/'+category[i]+'/'+filename[i])
#     x[i, :, :] = cv2.resize(img,(128, 128))
#     y[i] = category[i]

In [None]:
# Read Files
df = pd.read_csv('/kaggle/input/shopee-product-detection-open/train.csv',encoding="utf-8", dtype=str)	
x = np.zeros((len(df), 128, 128, 3),dtype=np.uint8)
y = np.zeros((len(df)),dtype=np.uint8)
filename = df['filename']
category = df['category']
for i in range(len(df)):
    if len(category[i]) == 1:
        category[i] = '0'+category[i]
    if i % 100 == 0:
        print(i, '/kaggle/input/shopee-product-detection-open/train/train/train/'+category[i]+'/'+filename[i])
    img = cv2.imread('/kaggle/input/shopee-product-detection-open/train/train/train/'+category[i]+'/'+filename[i])
    x[i, :, :] = cv2.resize(img,(128, 128))
    y[i] = category[i]

x_train, x_val, y_train, y_val = train_test_split(x,y,test_size=0.2,stratify=y)



# training 時做 data augmentation
train_transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(), # 隨機將圖片水平翻轉
    transforms.RandomRotation(15), # 隨機旋轉圖片
    transforms.ToTensor(), # 將圖片轉成 Tensor，並把數值 normalize 到 [0,1] (data normalization)
])
# testing 時不需做 data augmentation
test_transform = transforms.Compose([
    transforms.ToPILImage(),                                    
    transforms.ToTensor(),
])


batch_size = 128
train_set = ImgDataset(x_train, y_train, train_transform)
val_set = ImgDataset(x_val, y_val, test_transform)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)



# model = Classifier().cuda()
model = models.resnet101(pretrained=True).cuda()
loss = nn.CrossEntropyLoss() # 因為是 classification task，所以 loss 使用 CrossEntropyLoss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # optimizer 使用 Adam
num_epoch = 30

for epoch in range(num_epoch):
    epoch_start_time = time.time()
    train_acc = 0.0
    train_loss = 0.0
    val_acc = 0.0
    val_loss = 0.0

    model.train() # 確保 model 是在 train model (開啟 Dropout 等...)
    for i, data in enumerate(train_loader):
        optimizer.zero_grad() # 用 optimizer 將 model 參數的 gradient 歸零
        train_pred = model(data[0].cuda()) # 利用 model 得到預測的機率分佈 這邊實際上就是去呼叫 model 的 forward 函數
        batch_loss = loss(train_pred, data[1].cuda()) # 計算 loss （注意 prediction 跟 label 必須同時在 CPU 或是 GPU 上）
        batch_loss.backward() # 利用 back propagation 算出每個參數的 gradient
        optimizer.step() # 以 optimizer 用 gradient 更新參數值

        train_acc += np.sum(np.argmax(train_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
        train_loss += batch_loss.item()

    model.eval()
    with torch.no_grad():
        for i, data in enumerate(val_loader):
            val_pred = model(data[0].cuda())
            batch_loss = loss(val_pred, data[1].cuda())

            val_acc += np.sum(np.argmax(val_pred.cpu().data.numpy(), axis=1) == data[1].numpy())
            val_loss += batch_loss.item()

        #將結果 print 出來
        print('[%03d/%03d] %2.2f sec(s) Train Acc: %3.6f Loss: %3.6f | Val Acc: %3.6f loss: %3.6f' % \
            (epoch + 1, num_epoch, time.time()-epoch_start_time, \
             train_acc/train_set.__len__(), train_loss/train_set.__len__(), val_acc/val_set.__len__(), val_loss/val_set.__len__()))


PATH = './resNet.pth'
torch.save(model.state_dict(), PATH)

In [14]:
# Do Prediction

# Read Test File
# Read Files
df_test = pd.read_csv('/kaggle/input/shopee-product-detection-open/test.csv',encoding="utf-8", dtype=str)	
test_x = np.zeros((len(df_test), 128, 128, 3),dtype=np.uint8)
# y_test = np.zeros((len(df_test)),dtype=np.uint8)
filename = df_test['filename']
# category = df_test['category']
for i in range(len(df_test)):
    if i % 100 == 0:
        print(i, '/kaggle/input/shopee-product-detection-open/test/test/test/'+filename[i])
    img = cv2.imread('/kaggle/input/shopee-product-detection-open/test/test/test/'+filename[i])
    test_x[i, :, :] = cv2.resize(img,(128, 128))
#     y_test[i] = category[i]

# testing 時不需做 data augmentation
test_transform = transforms.Compose([
    transforms.ToPILImage(),                                    
    transforms.ToTensor(),
])

batch_size = 128
test_set = ImgDataset(test_x, transform=test_transform)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)


model = models.resnet101(pretrained=True).cuda()
model.load_state_dict(torch.load('/kaggle/input/resnet-model/resNet.pth'))
model.eval()


prediction = []
with torch.no_grad():
    for i, data in enumerate(test_loader):
        test_pred = model(data.cuda())
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        for y in test_label:
            if len(str(y)) == 1:
                y = '0'+str(y)
            prediction.append(y)
            
            
            
            
#將結果寫入 csv 檔
# with open("predict.csv", 'w') as f:
#     f.write('Id,Category\n')
#     for i, y in  enumerate(prediction):
#         f.write('{},{}\n'.format(i, y))

df = pd.DataFrame(list(zip(filename, prediction)), 
               columns =['filename', 'category'])
df.to_csv("predict.csv",encoding="utf_8_sig", index=False)
print('finished')

        

0 /kaggle/input/shopee-product-detection-open/test/test/test/fd663cf2b6e1d7b02938c6aaae0a32d2.jpg
100 /kaggle/input/shopee-product-detection-open/test/test/test/65e04aeb8a47dfb6ddace83ac36eb066.jpg
200 /kaggle/input/shopee-product-detection-open/test/test/test/c4fb5395ccbb386eefbe96aae6d5e060.jpg
300 /kaggle/input/shopee-product-detection-open/test/test/test/8eaca4ab9a2b78af894f70c1f7a2efe7.jpg
400 /kaggle/input/shopee-product-detection-open/test/test/test/b9909277fb2c07de52f22d7f41745d4a.jpg
500 /kaggle/input/shopee-product-detection-open/test/test/test/3a23ef555ced0eab4428434db1e21b37.jpg
600 /kaggle/input/shopee-product-detection-open/test/test/test/331d907e9083c66aa835a63914c0bcb3.jpg
700 /kaggle/input/shopee-product-detection-open/test/test/test/1146cab54204f63db4ad698ce908975f.jpg
800 /kaggle/input/shopee-product-detection-open/test/test/test/f450adad3c829529a37051012941d3a3.jpg
900 /kaggle/input/shopee-product-detection-open/test/test/test/094def548a66dc564108ddb5cb9af59b.jpg
10

8200 /kaggle/input/shopee-product-detection-open/test/test/test/bdc9df6d03eca62b4ed8d57c35559b83.jpg
8300 /kaggle/input/shopee-product-detection-open/test/test/test/0d5e6140384125002c898ad71fa5e6cd.jpg
8400 /kaggle/input/shopee-product-detection-open/test/test/test/88dde68cd623f2842272e1afa6c8c50d.jpg
8500 /kaggle/input/shopee-product-detection-open/test/test/test/ddc39951c81d6468fae55e7b1d2d74e7.jpg
8600 /kaggle/input/shopee-product-detection-open/test/test/test/12a3d9502554802bb94316390a7a0c28.jpg
8700 /kaggle/input/shopee-product-detection-open/test/test/test/421cb2d08c708500d2d1dcfc806a4b33.jpg
8800 /kaggle/input/shopee-product-detection-open/test/test/test/75a0e83eea484e8b959d02c1c75ffc63.jpg
8900 /kaggle/input/shopee-product-detection-open/test/test/test/24f025fa01997060b962121d82b84083.jpg
9000 /kaggle/input/shopee-product-detection-open/test/test/test/d4ed2944e79fb588b575115d51412888.jpg
9100 /kaggle/input/shopee-product-detection-open/test/test/test/1da7d3b921676026281e29a03e7

In [None]:
# from torchvision import models
# import torch

# dir(models)

In [None]:
# num_ftrs = res_mod.fc.in_features
# res_mod.fc = nn.Linear(num_ftrs, 2)

# res_mod = res_mod.to(device)
# criterion = nn.CrossEntropyLoss()

# # Observe that all parameters are being optimized
# optimizer_ft = optim.SGD(res_mod.parameters(), lr=0.001, momentum=0.9)

# # Decay LR by a factor of 0.1 every 7 epochs
# exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

# train_model(res_mod,criterion,exp_lr_scheduler,10)
