<a href="https://colab.research.google.com/github/moonsh2050/test/blob/main/vision2_copying.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
from google.colab import drive
drive.mount('/content/gdrive')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
from tqdm import tqdm
import imutils
import zipfile
import os
from PIL import Image
from typing import Tuple, Sequence, Callable
import glob
from sklearn.model_selection import KFold
import time
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.models as models
from torchvision import transforms
import torchvision.transforms as T
from torch.utils.data import DataLoader, Dataset
from google.colab import output

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
labels_df = pd.read_csv('/content/gdrive/MyDrive/vision2/dirty_mnist_2nd_answer.csv')[:]
imgs_dir = np.array(sorted(glob.glob('/content/gdrive/MyDrive/vision2/dirty_mnist/*')))[:]
labels = np.array(labels_df.values[:,1:])

test_imgs_dir = np.array(sorted(glob.glob('/content/gdrive/MyDrive/vision2/test_dirty_mnist/*')))

labels_df
imgs_dir

labels.shape
# (50000, 26)
class MnistDataset(Dataset):
    def __init__(self, imgs_dir=None, labels=None, transform=None, train=True):
        self.imgs_dir = imgs_dir
        self.labels = labels
        self.transform = transform
        self.train = train
        pass
    
    def __len__(self):
        # 데이터 총 샘플 수
        return len(self.imgs_dir)
    
    def __getitem__(self, idx):
        # 1개 샘플 get
        img = cv2.imread(self.imgs_dir[idx], cv2.IMREAD_COLOR)
        img = self.transform(img)
        if self.train==True:
            label = self.labels[idx]
            return img, label
        else:
            return img
        
        pass

resnext = torch.hub.load('pytorch/vision:v0.6.0', 'resnext50_32x4d', pretrained=True)

class Resnext(nn.Module):
    def __init__(self):
        super(Resnext, self).__init__()
        self.resnext = resnext 
        self.FC = nn.Linear(1000, 26)
        nn.init.xavier_normal_(self.FC.weight)
      
        

    def forward(self, x):
        x = self.resnext(x)
        x = torch.sigmoid(self.FC(x))
        return x

model = Resnext()
model


kf = KFold(n_splits=5, shuffle=True)
folds=[]
for train_idx, valid_idx in kf.split(labels_df):
    folds.append((train_idx, valid_idx))

train_idx
valid_idx
folds[1][1]
len(folds[1][0])

def seed_everything(seed: int = 42):
    random.seed(seed)
    np.random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)  # type: ignore
    torch.backends.cudnn.deterministic = True  # type: ignore
    torch.backends.cudnn.benchmark = True

#seed_everything(42)
for fold in range(1):
    model = Resnext().to(device)
#     model = nn.DataParallel(model)
    train_idx = folds[fold][0]
    valid_idx = folds[fold][1]



    train_transform = transforms.Compose([                                
        transforms.ToTensor(),
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    )
        ])
    valid_transform = transforms.Compose([                                 
        transforms.ToTensor(),
        transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225]
    )
        ])


    epochs=30
    batch_size=64        
    
    
    
    # Data Loader
train_dataset = MnistDataset(imgs_dir=imgs_dir[train_idx], labels=labels[train_idx], transform=train_transform)
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)

valid_dataset = MnistDataset(imgs_dir=imgs_dir[valid_idx], labels=labels[valid_idx], transform=valid_transform)
valid_loader = DataLoader(dataset=valid_dataset, batch_size=32, shuffle=False)  
    
optimizer = torch.optim.Adam(model.parameters(), lr = 0.001)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones= [23,29], gamma=0.5)

criterion = torch.nn.BCELoss()
    
epoch_accuracy = []
valid_accuracy = []
valid_losses=[]
valid_best_accuracy=0

best_models=[]

for epoch in range(epochs):
  with tqdm(train_loader,total=train_loader.__len__(),unit='batch') as train_bar:
    model.train()
    batch_accuracy_list = []
    batch_loss_list = []
    start=time.time()
    for n, (X, y) in enumerate((train_bar)):
        train_bar.set_description(f"Train Epoch {epoch}")
        X = torch.tensor(X, device=device, dtype=torch.float32)
        y = torch.tensor(y, device=device, dtype=torch.float32)
        y_hat = model(X)
            
            
        optimizer.zero_grad()
        loss = criterion(y_hat, y)
        loss.backward()
        optimizer.step()
      
        y_hat  = y_hat.cpu().detach().numpy()
        y_hat = y_hat>0.5
        y = y.cpu().detach().numpy()

        batch_accuracy = (y_hat == y).mean()
        batch_accuracy_list.append(batch_accuracy)
        batch_loss_list.append(loss.item())
        train_acc = np.mean(batch_accuracy_list)
            
        train_bar.set_postfix(train_loss= loss.item(),train_acc = train_acc)

    model.eval()
    valid_batch_accuracy=[]
    valid_batch_loss = []

    with torch.no_grad():
      with tqdm(valid_loader,total=valid_loader.__len__(),unit="batch") as valid_bar:
        for n_valid, (X_valid, y_valid) in enumerate((valid_bar)):
            valid_bar.set_description(f"Valid Epoch {epoch}")
            X_valid = torch.tensor(X_valid, device=device)#, dtype=torch.float32)
            y_valid = torch.tensor(y_valid, device=device, dtype=torch.float32)
            y_valid_hat = model(X_valid)
                
            valid_loss = criterion(y_valid_hat, y_valid).item()
            
            y_valid_hat = y_valid_hat.cpu().detach().numpy()>0.5
                
                
            valid_batch_loss.append(valid_loss)
            valid_batch_accuracy.append((y_valid_hat == y_valid.cpu().detach().numpy()).mean())
            val_acc=np.mean(valid_batch_accuracy)
            valid_bar.set_postfix(valid_loss = valid_loss,valid_acc = val_acc)
              
        valid_losses.append(np.mean(valid_batch_loss))
        valid_accuracy.append(np.mean(valid_batch_accuracy))

    scheduler.step()

    if np.mean(valid_batch_accuracy) > 0.87:
        path = "/content/gdrive/MyDrive/Colab Notebooks/models/"
        MODEL = "resnext"
        torch.save(model, f'{path}_{MODEL}_{valid_loss:2.4f}_epoch_{epoch}.pth')

    if np.mean(valid_batch_accuracy)>valid_best_accuracy:
        best_model=model
        valid_best_accuracy = np.mean(valid_batch_accuracy)

best_models.append(best_model)

best_modelss=[]
model1 = torch.load('/content/gdrive/MyDrive/Colab Notebooks/models/resnext_0.2259_epoch_25.pth')
best_modelss.append(model1)

test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(
        [0.485, 0.456, 0.406],
        [0.229, 0.224, 0.225])
        ])

submission = pd.read_csv("sample_submission.csv")

for model in best_modelss:
    with torch.no_grad():
        model.eval()

        test_dataset = MnistDataset(imgs_dir=test_imgs_dir, transform=test_transform, train=False)
        test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

        for n, X_test in enumerate(tqdm(test_loader)):
            X_test = torch.tensor(X_test, device=device, dtype=torch.float32)
            with torch.no_grad():
                model.eval()  
                pred_test = model(X_test).cpu().detach().numpy()
                submission.iloc[n*32:(n+1)*32,1:] += pred_test
submission.iloc[:,1:] = np.where(submission.values[:,1:]>=0.5, 1,0)
submission

!pip install ttach
import ttach as tta


best_modelss=[]
model1 = torch.load('/content/gdrive/MyDrive/Colab Notebooks/models/624_resnext_0.2651_epoch_24.pth')
best_modelss.append(model1)

## tta

submission = pd.read_csv("sample_submission.csv")

for model in best_modelss:
    with torch.no_grad():
        model.eval()

        test_dataset = MnistDataset(imgs_dir=test_imgs_dir, transform=test_transform, train=False)
        test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

        tta_model=tta.ClassificationTTAWrapper(model,tta.aliases.ten_crop_transform(224,224),merge_mode='mean')

        for n, X_test in enumerate(tqdm(test_loader)):
            X_test = torch.tensor(X_test, device=device, dtype=torch.float32)
            with torch.no_grad():
                model.eval()  
                pred_test = tta_model(X_test).cpu().detach().numpy()
                submission.iloc[n*32:(n+1)*32,1:] += pred_test

submission.iloc[:,1:] = np.where(submission.values[:,1:]>=0.5, 1,0)
submission

submission.to_csv('0223tta2651.csv', index=False)

pred1=pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/02212094.csv')
pred1.head()

pred2=pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/02212151.csv')
pred2.head()

pred3=pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/02212259.csv')
pred3.head()

pred4=pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/best2651.csv')
pred4.head()


pred5=pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/best2951.csv')
pred5.head()


final=((pred1 + pred2 + pred3+pred4+pred5)/5 > 0.5)*1
final['index']=pred1['index']
final

final.to_csv('ensemble4.csv', index=False)

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.6.0


IndexError: ignored