In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container{width:100%}</style>"))

In [None]:
import numpy as np
import cv2
import pandas as pd
import os
import glob
import matplotlib.pyplot as plt
import torch
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms
import torch.nn as nn
import tqdm
import torch.nn.functional as F
import torchvision.models as models
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau,StepLR
import PIL
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')

In [None]:
addr = '../'
df = pd.read_csv(addr+'defect_area.csv')
train_good = glob.glob(addr+'train/train_contest/good_all/*.bmp')
labels = [0]*len(train_good)
train_bad = glob.glob(addr+'train/train_contest/defect/*.bmp')
labels = labels + [1]*len(train_bad)
train_ = train_good + train_bad
names = list(map(lambda x: x.split('/')[-1].split('.')[0], train_))
train_data = pd.DataFrame({'id': names, 'addr':train_, 'label':labels})
del train_good, train_bad,names,labels
sample_sub = pd.read_csv(addr+'submission_sample.csv')

#test_all = glob.glob(addr + 'test/test_contest/test/*.bmp')
test_all = list(map(lambda x: addr+ 'test/test_contest/test/'+x+'.bmp',sample_sub['id']))

names = list(map(lambda x: x.split('/')[-1].split('.')[0], test_all))
test_data = pd.DataFrame({'id': names, 'addr': test_all})
#del names, test_all

In [None]:
def getImageMetaData(p):
    strFile = p
    file = None;
    bRet = False;
    strMd5 = "";
    
    try:
        file = open(strFile, "rb");
        md5 = hashlib.md5();
        strRead = "";
        
        while True:
            strRead = file.read(8096);
            if not strRead:
                break;
            md5.update(strRead);
        #read file finish
        bRet = True;
        strMd5 = md5.hexdigest();
    except:
        bRet = False;
    finally:
        if file:
            file.close()

    return p,strMd5

In [None]:
def read_image(p,size=(224,224)):
    img = cv2.imread(p);
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = cv2.resize(img,size)
    return img.copy()

def visual(n=12, data=train_data):
    r,c = 3,4
    fig,axes = plt.subplots(r,c,figsize=(12,10))
    indx = np.random.randint(0,data.shape[0],n).astype('uint32')
    for i in range(n):
        ax = axes[i%r,i%c]
        ax.imshow(read_image(data['addr'][indx[i]]),cmap = 'gray')
        ax.axis('off')
        if data.shape[1]>2:
            ax.set_title(f"{['Good','Bad'][data['label'][indx[i]]]}")
        
visual()    

In [None]:
class Nexperia_data(Dataset):
    def __init__(self,dataset = None,typ='train',transform = None):
        
        self.data = dataset
        self.type = typ
        self.transform = transform
        self.img_addrs = self.data['addr'].values
        if typ=='train':
            self.img_labels = self.data['label'].values

    def __len__(self):
        return self.data.shape[0]
    def __getitem__(self,idx):
        
        img = read_image(self.img_addrs[idx])
        #img = np.repeat(img[np.newaxis,:,:],3,axis=0)
        #print(img)
        #img = img.reshape(1,img.shape[0],img.shape[1])
        #img = PIL.Image.open(self.data['addr'][idx])
        if self.transform:
            img = PIL.Image.fromarray(img)
            img = self.transform(img)        
        

        if self.type=='train':
            label = self.img_labels[idx]
            return img,label
        return img
       

In [None]:
class Nexmodel(nn.Module):
    def __init__(self):
        super(Nexmodel,self).__init__()
        self.backbone = models.vgg19(pretrained=True);
        in_features = self.backbone.classifier[6].in_features
        self.backbone.classifier[6] = nn.Linear(in_features,2)
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax();
    def forward(self,x):
        x = self.backbone(x)
        x = self.softmax(x)
        return x

In [None]:

torch.cuda.empty_cache()
BATCH_SIZE = 8;
tr,vl = train_test_split(train_data,test_size=0.2)
transform = transforms.Compose(
            [
                transforms.ToTensor()
            ]
    )
tr_data_loader = DataLoader(Nexperia_data(dataset=tr,transform=transform),batch_size=BATCH_SIZE,shuffle=True)
vl_data_loader = DataLoader(Nexperia_data(dataset=vl,transform=transform),batch_size=BATCH_SIZE,shuffle=True)

In [None]:

model = Nexmodel();
model = model.to(device)
optimizer = optim.SGD(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

In [None]:

for ep in range(5):
    print(f'ep: {ep}')
    model.train();    
    for img,labels in tqdm.notebook.tqdm(tr_data_loader):
        img = img.to(device)
        otpt = model(img)

        labels = labels.to(device)
        optimizer.zero_grad()
        loss = criterion(otpt,labels.long())
        loss.backward();
        optimizer.step();
        
    

In [None]:
lbls = None
op = None;
#model.eval()
for img,labels in tqdm.notebook.tqdm(vl_data_loader):
    
    if lbls is None:
        lbls = labels.numpy().reshape(-1,)
    else:
        lbls = np.hstack([lbls,labels.numpy().reshape(-1,)])
    img = img.to(device)
    otpt = model(img)
    otpt = otpt.detach().cpu().numpy()
    if op is None:
        op = np.argmax(otpt,1)
    else:
        op = np.hstack([op,np.argmax(otpt,1)])
    
    

In [None]:
tst_data_loader = DataLoader(Nexperia_data(dataset=test_data,typ='test',transform=transform),batch_size=1,shuffle=False)

In [None]:
tst_op = None;
for img in tqdm.notebook.tqdm(tst_data_loader):
    img = img.to(device)
    otpt = model(img)
    otpt = otpt.detach().cpu().numpy()
    if tst_op is None:
        tst_op = np.argmax(otpt,1)
    else:
        tst_op = np.hstack([tst_op, np.argmax(otpt,1)])

In [None]:
sample_sub['defect_score'] = tst_op

In [None]:
sample_sub.to_csv('vgg19_0.96791_submission.csv',index=False)