In [1]:
from sklearn.model_selection import StratifiedKFold
import pandas as pd 
import numpy as np
import glob
from tqdm import tqdm
import cv2
from sklearn.model_selection import train_test_split
import os
!pip install torchinfo
from torchinfo import summary
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
from sklearn.metrics import recall_score
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset
from torchvision import datasets, models, transforms
from torchvision.models import resnet18

from PIL import Image
import matplotlib.pyplot as plt
from IPython.display import clear_output
!pip install git+https://github.com/openai/CLIP.git
from albumentations.core.composition import Compose, OneOf
from albumentations.pytorch import ToTensorV2
import albumentations as A    
    
import clip
from torch.utils.data import DataLoader

device = "cuda" if torch.cuda.is_available() else "cpu"


import pytorch_lightning as pl
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning import Callback
from pytorch_lightning.loggers import CSVLogger
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import WandbLogger
import wandb
 

Collecting torchinfo
  Downloading torchinfo-1.7.1-py3-none-any.whl (22 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.7.1
[0mCollecting git+https://github.com/openai/CLIP.git
  Cloning https://github.com/openai/CLIP.git to /tmp/pip-req-build-ht9_hwap
  Running command git clone --filter=blob:none --quiet https://github.com/openai/CLIP.git /tmp/pip-req-build-ht9_hwap
  Resolved https://github.com/openai/CLIP.git to commit d50d76daa670286dd6cacf3bcd80b5e4823fc8e1
  Preparing metadata (setup.py) ... [?25l- done
[?25hCollecting ftfy
  Downloading ftfy-6.1.1-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.1/53.1 kB[0m [31m427.7 kB/s[0m eta [36m0:00:00[0m
Building wheels for collected packages: clip
  Building wheel for clip (setup.py) ... [?25l- \ done
[?25h  Created wheel for clip: filename=clip-1.0-py3-none-any.whl size=1369409 sha256=edf10ca25ee0ca9553e784c2e9ea979898abc849aafd73cd2

## Настрока блокнота

In [2]:
class CFG:
    batch_size=256
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    num_epochs = 1
    wandb_project = "construction_equipment"
    wandb_entity = "vladsmirn_"
    img_size = 256
    lr = 0.001
    max_lr = 0.005

In [3]:
def get_transform(phase: str, img_size: int):
    if phase == 'train':
        return Compose([
#             A.Resize(height=img_size, width=img_size),
            A.RandomResizedCrop(height=img_size, width=img_size, scale=(0.97, 1.0)),
            A.Flip(p=0.5),
            A.RandomRotate90(p=0.5),
            A.ShiftScaleRotate(p=0.5),
            A.HueSaturationValue(p=0.1),
            A.OneOf([
                A.RandomBrightnessContrast(p=0.2),
                A.RandomGamma(p=0.2),
            ], p=0.5),
            A.OneOf([
                A.Blur(p=0.1),
                A.GaussianBlur(p=0.1),
                A.MotionBlur(p=0.1),
            ], p=0.1),
            A.OneOf([
                A.GaussNoise(p=0.1),
                A.ISONoise(p=0.1),
            ], p=0.2),
#              ToTensorV2(),
        ])
    else:
        return Compose([
            A.Resize(height=img_size, width=img_size),
#             ToTensorV2(),
        ])

In [4]:
DIR_TRAIN = "../input/construction-equipment/train/"
DIR_TEST = "../input/construction-equipment-test/test/"

PATH_TRAIN = "../input/construction-equipment/train.csv"
PATH_TEST = "../input/construction-equipment-test/test.csv"

def KFOLD(df, n_fold=5 , k_fold=1, seed=43, column_x='path', column_y='class'):
    skf = StratifiedKFold(n_splits=n_fold, shuffle=True, random_state=seed)
    
    for i_fold , (train_idx, valid_idx) in enumerate(skf.split(df[column_x], df[column_y])):
        if(i_fold+1 == k_fold):
            df_train = df.iloc[train_idx]
            df_valid = df.iloc[valid_idx]
            break

    print(f"train size: {len(df_train)}")
    print(f"valid size: {len(df_valid)}")
    
    return df_train, df_valid

class ImageDataset(Dataset):
    def __init__(self, data_df, transform=None):

        self.data_df = data_df
        self.transform = transform

    def __getitem__(self, idx):
        image_path, label = self.data_df.iloc[idx]['path'], self.data_df.iloc[idx]['class']
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = self.transform(image=image)['image'] 
        image = Image.fromarray(image)
        image = preprocess(image).to(device)  
        return image, torch.tensor(label).long()
    
    def __len__(self):
        return len(self.data_df)
    
model_clip, preprocess = clip.load('ViT-B/32', CFG.device)
model_clip.to(CFG.device)

data_df = pd.read_csv(PATH_TRAIN)
data_df["path"] = data_df["ID_img"].apply(lambda image_name : f'{DIR_TRAIN}{image_name}')
train_df, valid_df = KFOLD(data_df)
train_dataset = ImageDataset(train_df, get_transform("train",224))
valid_dataset = ImageDataset(valid_df, get_transform("valid",224))
 

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=256,
                                           shuffle=True)

valid_loader = torch.utils.data.DataLoader(dataset=valid_dataset,
                                           batch_size=256)
                                            
# CFG.steps_per_epoch = len(train_loader)
 
images, labels =  next(iter(train_loader))
model_clip.encode_image(images[:1])

100%|███████████████████████████████████████| 338M/338M [00:05<00:00, 67.8MiB/s]


train size: 3992
valid size: 998


tensor([[-4.7455e-02,  1.2756e-01, -2.2632e-01, -2.8955e-01,  1.9141e-01,
         -6.6553e-01,  3.9062e-01,  5.8691e-01, -4.0253e-02,  1.9141e-01,
          1.5088e-01, -1.4587e-01,  4.9170e-01,  2.8149e-01, -2.5098e-01,
          3.2623e-02, -5.6299e-01,  5.7227e-01, -1.4404e-01, -1.7749e-01,
          1.0352e-01, -1.1726e-02,  1.0376e-01,  2.1411e-01,  2.4429e-02,
          3.5352e-01, -2.0728e-01,  6.2042e-02, -1.2756e-01, -1.1401e-01,
         -2.3315e-02, -4.3213e-01, -8.8562e-02,  3.3984e-01, -5.2832e-01,
         -3.2153e-01, -4.0747e-01,  1.0962e-01, -2.0251e-01,  1.4082e+00,
         -2.5635e-01, -2.6392e-01,  6.5674e-02,  7.0129e-02,  1.7883e-01,
          2.6001e-01,  6.9629e-01,  4.1699e-01, -1.8030e-01,  2.7686e-01,
          3.2007e-01,  3.6377e-01,  4.1772e-01,  4.7729e-02,  5.1074e-01,
          2.8198e-01, -4.0454e-01,  1.0382e-01,  2.3950e-01, -2.9541e-01,
          2.9541e-01,  1.4392e-01,  8.3496e-02, -2.3547e-01, -1.4380e-01,
          3.1799e-02, -5.9473e-01,  4.

##   Объявление модели


In [5]:
class classification_construction(nn.Module):
    def __init__(self, CLIP_model):
        super().__init__()
        self.CLIP_model  =  CLIP_model
        self.classifier = nn.Sequential(
            nn.Linear(512, 128),
            nn.ReLU(inplace=True),
            nn.Dropout(0.1),
            nn.Linear(128, 8)
        )
 
    def forward(self, x):
        x = self.CLIP_model.encode_image(x.float())
        x = self.classifier(x.float())
        return x
    
for param in model_clip.parameters():
    param.requires_grad = False
    
# if torch.cuda.is_available(): model_clip = model_clip.cuda()   
     
model_clf = classification_construction(model_clip)

model_clf.to(CFG.device)

 
summary(model_clf)

Layer (type:depth-idx)                                                      Param #
classification_construction                                                 --
├─CLIP: 1-1                                                                 301,569
│    └─VisionTransformer: 2-1                                               432,384
│    │    └─Conv2d: 3-1                                                     (2,359,296)
│    │    └─LayerNorm: 3-2                                                  (1,536)
│    │    └─Transformer: 3-3                                                (85,054,464)
│    │    └─LayerNorm: 3-4                                                  (1,536)
│    └─Transformer: 2-2                                                     --
│    │    └─Sequential: 3-5                                                 (37,828,608)
│    └─Embedding: 2-3                                                       (25,296,896)
│    └─LayerNorm: 2-4                                              

In [6]:
def compute_recall_score(imgs,labels,model):
    model.eval()
    imgs = imgs.to(CFG.device)
    pred = model(imgs)

    pred_numpy = pred.cpu().detach().numpy()
    valid_predicts=[]
    for class_obj in pred_numpy:
          index, max_value = max(enumerate(class_obj), key=lambda i_v: i_v[1])
          valid_predicts.append(index)

    return recall_score(labels.cpu().detach().numpy(), valid_predicts, average = "macro")


class LightNPairModel(pl.LightningModule):
    def __init__(self, model,  lr=0.0001):
        super(LightNPairModel, self).__init__()
        self.model = model
        self.criterion = torch.nn.CrossEntropyLoss()
        self.lr = lr

    def forward(self, x, label=False, *args, **kwargs):
        return self.model(x[0])

    def configure_optimizers(self):
        
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=CFG.lr)
        
        self.scheduler = torch.optim.lr_scheduler.OneCycleLR(self.optimizer, 
                                                             epochs=CFG.num_epochs, steps_per_epoch=len(train_loader),
                                                             max_lr=CFG.max_lr , 
                                                             )
        scheduler = {'scheduler': self.scheduler, 'interval': 'step',}

        return [self.optimizer], [scheduler]


#     def configure_optimizers(self):
        
#         self.optimizer = torch.optim.Adam(self.model.parameters(), lr=CFG.lr)

#         return [self.optimizer] 
    
    def training_step(self, batch, batch_idx):
        images = batch[0]
        labels = batch[1]
        embedding = self.model(images)
        loss = self.criterion(embedding, labels)
        self.log('train_loss', loss)
        self.log('lr', self.optimizer.param_groups[0]['lr'])
        return loss
    
    def validation_step(self, batch, batch_idx):
        images = batch[0]
        labels = batch[1]
        embedding = self.model(images)
        loss = self.criterion(embedding, labels)
        score = compute_recall_score(images,labels,self.model)
        self.log('valid_loss', loss)    
        self.log('valid_score', score)   
        return loss
    
#     def validation_epoch_end(self, validation_step_outputs):
        
# #         image_array = image_map(self.path_images,self.model,show=False, save=False) 
# #         self.logger.experiment.log({"image": [wandb.Image(image_array)]})
#         self.log('valid_score',  compute_recall_score(valid_loader,model_clf))    
        
lit_model = LightNPairModel(model_clf, lr = CFG.lr)

In [7]:
def run_wandb(wandb_api_key, cfg):
    wandb.require(experiment="service")
    wandb.login(key=wandb_api_key)
    _cfg = cfg() 
    cfg_wandb = dict((name, getattr(_cfg, name)) for name in dir(_cfg) if not name.startswith('__'))
    
    if hasattr(cfg, 'wandb_resume_id') and cfg.wandb_resume_id :
        run = wandb.init(project=cfg.wandb_project, 
                   entity=cfg.wandb_entity, 
                   id=cfg.wandb_resume_id, 
                   resume="allow", 
                   config=cfg_wandb)
    else: 
        run = wandb.init(project=cfg.wandb_project, 
                   entity=cfg.wandb_entity, 
                   settings=wandb.Settings(start_method="fork"), 
                   config=cfg_wandb,
                   resume="allow",
                   id=wandb.util.generate_id())
        
    model_logger = WandbLogger(project=cfg.wandb_project, log_model='all')
    
    return model_logger,run

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
model_logger, _ = run_wandb(user_secrets.get_secret("WANDB_API_KEY"), CFG)

[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mvladsmirn[0m ([33mvladsmirn_[0m). Use [1m`wandb login --relogin`[0m to force relogin


  "There is a wandb run already in progress and newly created instances of `WandbLogger` will reuse"


In [8]:
checkpoint_callback = ModelCheckpoint(monitor='valid_loss',
#                                       save_top_k=1,
                                      save_weights_only=True,
                                      verbose=False,
                                      mode='min')

if torch.cuda.is_available() : 
    trainer = Trainer(
        max_epochs=CFG.num_epochs,
        gpus=[0],
        callbacks=[checkpoint_callback], 
        logger=model_logger,
        log_every_n_steps = 1 
    )
else :
    trainer = Trainer(
        max_epochs=CFG.num_epochs,
        callbacks=[checkpoint_callback], 
        logger=model_logger,
        log_every_n_steps = 1 
    )

  f"Setting `Trainer(gpus={gpus!r})` is deprecated in v1.7 and will be removed"


In [9]:
trainer.fit(lit_model, train_dataloaders=train_loader, val_dataloaders=valid_loader)

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

## Предсказание 

In [10]:
# _, run = run_wandb(user_secrets.get_secret("WANDB_API_KEY"), CFG)
# model_clf_best = classification_construction(model_clip)
# artifact = run.use_artifact('vladsmirn_/construction_equipment/model-3qeaibwz:v14', type='model')
# artifact_dir = artifact.download()
# checkpoint =   os.path.join(artifact_dir, "model.ckpt" )

# from collections import OrderedDict
# state_dict = torch.load(checkpoint,map_location ='cpu')['state_dict']
# new_state_dict = OrderedDict()
# for k, v in state_dict.items():
#     name = k[6:]  
#     new_state_dict[name] = v

# model_clf_best.load_state_dict(new_state_dict)

 

In [11]:
test_df = pd.read_csv(PATH_TEST)
test_df = test_df.drop(["class"], axis = 1)

class TestImageDataset(Dataset):
    def __init__(self, data_df, transform=None):
        self.data_df = data_df
        self.transform = transform

    def __getitem__(self, idx):
        image_name = self.data_df.iloc[idx]['ID_img']
        
        # читаем картинку
        image = cv2.imread(DIR_TEST+ f"{image_name}")
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = Image.fromarray(image)
        image = preprocess(image).to(device)
        # преобразуем, если нужно
        if self.transform:
            image = self.transform(image)
        
        return image
    
    def __len__(self):
        return len(self.data_df)
    
test_dataset = TestImageDataset(test_df)

In [12]:
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                           batch_size=32,
                                            )

In [13]:
model_clf.eval()
predicts = []
model_clf.to(CFG.device)
for imgs in tqdm(test_loader):
    
    imgs = imgs.to(CFG.device)
    pred = model_clf(imgs)
    for class_obj in pred:
      index, max_value = max(enumerate(class_obj), key=lambda i_v: i_v[1])
      predicts.append(index)

100%|██████████| 67/67 [00:23<00:00,  2.82it/s]


In [14]:
test_df["class"] = predicts

test_df.head()

test_df.to_csv("submit.csv", index=False)