In [38]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import os
import pandas as pd
import sklearn 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer
from torch.utils.data import DataLoader
import torchvision.models as models
import torchvision.transforms as transforms
from sklearn.metrics import accuracy_score, confusion_matrix ,roc_curve,roc_auc_score
from tqdm import tqdm
from PIL import Image
import warnings
from einops import rearrange
import logging

In [None]:
warnings.filterwarnings('ignore')

In [39]:
class args:
    train_path = 'input/train.csv'
    test_path = 'input/test.csv'
    MAX_EPOCH = 200
    BATCH_SIZE = 128
    weight_decay = 1e-3
    LR=0.0001
    img_size = 140
    sd_features = ['X4_sd', 'X11_sd','X18_sd', 'X26_sd', 'X50_sd', 'X3112_sd']
    label_features = ['X4_mean', 'X11_mean', 'X18_mean', 'X26_mean', 'X50_mean', 'X3112_mean']


In [40]:
train_df = pd.read_csv(args.train_path)
test_df = pd.read_csv(args.test_path)
train_df['id'] = train_df['id'].map(lambda x: args.base_path + '/train_images/' + str(x)+ ".jpeg")
test_df['id'] = test_df['id'].map(lambda x: args.base_path + '/test_images/' + str(x)+ ".jpeg")
FEATURE_COLS = test_df.columns[1:].tolist()

**Test sd?**

### Delete abnormal values

In [41]:
train_df

Unnamed: 0,id,WORLDCLIM_BIO1_annual_mean_temperature,WORLDCLIM_BIO12_annual_precipitation,WORLDCLIM_BIO13.BIO14_delta_precipitation_of_wettest_and_dryest_month,WORLDCLIM_BIO15_precipitation_seasonality,WORLDCLIM_BIO4_temperature_seasonality,WORLDCLIM_BIO7_temperature_annual_range,SOIL_bdod_0.5cm_mean_0.01_deg,SOIL_bdod_100.200cm_mean_0.01_deg,SOIL_bdod_15.30cm_mean_0.01_deg,...,X18_mean,X26_mean,X50_mean,X3112_mean,X4_sd,X11_sd,X18_sd,X26_sd,X50_sd,X3112_sd
0,train_images/192027691.jpeg,12.235703,374.466675,62.524445,72.256844,773.592041,33.277779,125,149,136,...,0.117484,1.243779,1.849375,50.216034,0.008921,1.601473,0.025441,0.153608,0.279610,15.045054
1,train_images/195542235.jpeg,17.270556,90.239998,10.351111,38.220940,859.193298,40.009777,124,144,138,...,0.389315,0.642940,1.353468,574.098472,0.003102,0.258078,0.000866,0.034630,0.010165,11.004477
2,train_images/196639184.jpeg,14.254504,902.071411,49.642857,17.873655,387.977753,22.807142,107,133,119,...,8.552908,0.395241,2.343153,1130.096731,,,,,,
3,train_images/195728812.jpeg,18.680834,1473.933350,163.100006,45.009758,381.053986,20.436666,120,131,125,...,1.083629,0.154200,1.155308,1042.686546,0.011692,2.818356,0.110673,0.011334,0.229224,141.857187
4,train_images/195251545.jpeg,0.673204,530.088867,50.857777,38.230709,1323.526855,45.891998,91,146,120,...,0.657585,10.919966,2.246226,2386.467180,0.006157,1.128000,0.026996,0.553815,0.107092,87.146899
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55484,train_images/190558785.jpeg,19.472172,244.795914,39.127552,67.074493,472.710358,27.758673,118,140,131,...,0.233690,1.783193,1.608341,969.547831,,,,,,
55485,train_images/194523231.jpeg,13.724150,1450.000000,162.260208,43.139324,652.716858,26.694387,125,144,135,...,1.017099,12.713048,2.418300,1630.015481,0.005474,0.128133,0.117010,3.164520,0.082212,136.503697
55486,train_images/195888987.jpeg,14.741204,581.866638,109.231110,89.272148,507.273010,26.874668,118,155,136,...,2.717395,10.206478,2.722599,602.229880,0.019727,0.215040,0.156309,0.919139,0.079395,26.159626
55487,train_images/135487319.jpeg,16.094763,1180.838135,80.176193,22.909716,342.184021,17.346190,109,130,117,...,4.429659,9.372170,3.251739,244.387170,,,,,,


In [42]:
quntilelist=np.array(train_df[args.label_features].quantile([0,0.95]))
IQR=quntilelist[1]-quntilelist[0]
low_bound=quntilelist[0]-1.5*IQR
upper_bound=quntilelist[1]+1.5*IQR
for i in range(len(args.label_features)):
    train_df = train_df[(train_df[args.label_features[i]] < upper_bound[i]) & (train_df[args.label_features[i]] > low_bound[i]) | train_df[args.label_features[i]].isna()]

In [43]:
quntilelist=np.array(train_df[args.sd_features].quantile([0,0.95]))
IQR=quntilelist[1]-quntilelist[0]
low_bound=quntilelist[0]-1.5*IQR
upper_bound=quntilelist[1]+1.5*IQR
for i in range(len(args.sd_features)):
    train_df = train_df[(train_df[args.sd_features[i]] < upper_bound[i]) & (train_df[args.sd_features[i]] > low_bound[i]) | train_df[args.sd_features[i]].isna()]

In [44]:
label_df = train_df[args.label_features+args.sd_features]
train_df = train_df.drop(columns = args.label_features+args.sd_features)

In [45]:
x_train,x_val,y_train,y_val = train_test_split(train_df, label_df, test_size=0.1)
x_train_images = np.array(x_train['id']) 
x_train = x_train.drop(columns = 'id')
x_val_images = np.array(x_val['id']) 
x_val = x_val.drop(columns = 'id')
y_train,y_val = np.array(y_train).astype(np.float32), np.array(y_val).astype(np.float32)

In [46]:
scaler = Normalizer()
train_features = scaler.fit_transform(x_train[FEATURE_COLS].values).astype(np.float32)
valid_features = scaler.transform(x_val[FEATURE_COLS].values).astype(np.float32)

train_labels = y_train[:,:6]
train_labels_aux = y_train[:,6:]
val_labels = y_val[:,:6]
val_labels_aux = y_val[:,6:]

train_features[np.isnan(train_features)] = -1
valid_features[np.isnan(valid_features)] = -1
train_labels[np.isnan(train_labels)] = -1
train_labels_aux[np.isnan(train_labels_aux)] = -1
val_labels[np.isnan(val_labels)] = -1
val_labels_aux[np.isnan(val_labels_aux)] = -1

In [47]:
train_dataset = [(x_train_images[i], train_features[i],train_labels[i], train_labels_aux[i] ) for i in range(len(x_train_images))]
val_dataset = [(x_val_images[i], valid_features[i],val_labels[i],val_labels_aux[i]) for i in range(len(x_val_images))]
train_loader = DataLoader(dataset = train_dataset, batch_size=args.BATCH_SIZE , shuffle= True, num_workers = 2)
val_loader = DataLoader(dataset = val_dataset, batch_size=args.BATCH_SIZE , shuffle= False, num_workers = 2)

In [49]:
def autopad(k, p=None):  # kernel, padding
    # Pad to 'same'
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
    return p

class Conv(nn.Module):
    # Standard convolution
    def __init__(self, c1, c2, k=1, s=1, p=None, g=1,
                 act=True):  # ch_in, ch_out, kernel, stride, padding, groups
        super().__init__()
        self.conv = nn.Conv1d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
        self.bn = nn.BatchNorm1d(c2)
        self.act = nn.ReLU()

    def forward(self, x):
        return self.act(self.bn(self.conv(x)))

In [50]:
class DinoV2(nn.Module):
    def __init__(self,args) ->None:
        super(DinoV2,self).__init__()
        self.backbone = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')
        # autocast_ctx = partial(torch.cuda.amp.autocast, enabled=True, dtype=torch.float16)
        if args.pretrain_choice == 'frozen':
            for param in self.backbone.parameters():
                param.requires_grad = False
        # self.feature_model = ModelWithIntermediateLayers(self.backbone, n_last_blocks=1, autocast_ctx=autocast_ctx)
        # self.classifier = LinearClassifier(self.in_planes*2, use_n_blocks=1, use_avgpool=True, num_classes=10)
        self.in_planes = self.backbone.embed_dim
        self.consize = int((args.img_size/args.patch_size)*(args.img_size/args.patch_size))
        if args.decoder == 'Conv':
            self.decoder = ConvDecoder(self.consize,self.in_planes)
        elif args.decoder == 'Linear':
            self.decoder = LinearDecoder(self.consize,self.in_planes)
       

    def forward(self,x):
        x = self.backbone.get_intermediate_layers(x, n=1, reshape=False, norm=True, return_class_token=True)[0]
        x = self.decoder(x)
        return x
    
class LinearDecoder(nn.Module):
    def __init__(self,in_dim,emb_dim) ->None:
        super().__init__()
        self.head = nn.Linear(in_dim,1) 
        self.relu1 = nn.ReLU()
        self.fc = nn.Linear(emb_dim*2,128)
        self.relu2 = nn.ReLU()
        self.head.weight.data.normal_(mean=0.0, std=0.01)
        self.head.bias.data.zero_()
        self.fc.weight.data.normal_(mean=0.0, std=0.01)
        self.fc.bias.data.zero_()

    def forward(self,x):
        feature, class_token = x
        feature = rearrange(feature,'b h c -> b c h') #[batch_size,(img_size/14)**2,384] --> [batch_size,384,1]
        feature = self.head(feature)
        feature = self.relu1(feature)
        feature = torch.squeeze(feature) #[batch_size,384]
        x0 = torch.cat([feature, class_token],dim=1)
        x0 = self.fc(x0)
        x0 = self.relu2(x0)
        return x0
    

class ConvDecoder(nn.Module):
    def __init__(self,c0,c1,dropout=0.5):
        super().__init__()
        self.conv1 = Conv(c0, c0)
        self.conv2 = Conv(c0, c0)
        self.drop = nn.Dropout(p= dropout)

        self.linear = nn.Linear(c1 * 2, 128)
        self.linear.weight.data.normal_(mean=0.0, std=0.01)
        self.linear.bias.data.zero_()

    def forward(self, x):
        feature, class_token = x
        feature, class_token = torch.cat([feature.detach()], dim=-1), torch.cat([class_token.detach()], dim=-1)
        feature = self.drop(self.conv2(self.conv1(feature)))
        x0 = torch.cat((torch.mean(feature, dim=1), class_token), dim=-1) #concate features and cls_token
        return self.linear(x0)

In [52]:
class MLP(nn.Module):
    def __init__(self) -> None:
        super(MLP,self).__init__()
        self.linear1 = nn.Sequential(nn.Linear(163, 326), nn.SELU(), nn.Linear(326, 256), nn.SELU())
        self.linear2 = nn.Sequential(nn.Linear(256, 128), nn.SELU(), nn.Linear(128, 64), nn.SELU())
        self.dropout = nn.Dropout()
    def forward(self, x):
        x = self.linear1(x)
        x = self.linear2(x)
        x = self.dropout(x)
        return x  
    
class Combine_model(nn.Module):
    def __init__(self,args) -> None:
        super(Combine_model,self).__init__()
        # self.model1 = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
        self.model1 = DinoV2(args) #output size [batch,(H/patch_size)*(W/patch_size),768]
        self.model2 = MLP()
        self.out_fc1 = nn.Sequential(
            nn.Linear(128+64,24), 
            nn.ReLU(),
            nn.Linear(24,6)
        )
        self.out_fc2 = nn.Sequential(
            nn.Linear(128+64,24), 
            nn.ReLU(),
            nn.Linear(24,6)
        )

    def forward(self,x,y):
        x = self.model1(x)
        y = self.model2(y)
        x = torch.cat([x,y], dim = 1)
        return self.out_fc1(x), self.out_fc2(x)

In [53]:
def transform(imgs,img_size,type='train',backbone='ResNet'):
        
    if type == 'train':
         trans = transforms.Compose([
                    transforms.Resize((img_size, img_size)),
                    transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=(0.45, 0.55), hue=0.1),
                    transforms.RandomHorizontalFlip(p=0.5),
                    transforms.RandomRotation((-10,10)),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])
                ])
    elif type == 'validate':
        trans = transforms.Compose([
                transforms.Resize((img_size, img_size)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.5,0.5,0.5], std=[0.5,0.5,0.5])
            ])
        
    return trans(imgs)


In [54]:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

In [55]:
class R2Loss(nn.Module):
    def __init__(self, use_mask=False):
        super(R2Loss, self).__init__()
        self.use_mask = use_mask

    def forward(self,y_pred , y_true):
        if self.use_mask:
            mask = (y_true != -1)
            y_true = torch.where(mask, y_true, torch.zeros_like(y_true))
            y_pred = torch.where(mask, y_pred, torch.zeros_like(y_pred))
        SS_res = torch.sum((y_true - y_pred) ** 2,axis=0)
        SS_tot = torch.sum((y_true - torch.mean(y_true, 1, True)) ** 2,axis=0)
        r2_loss = SS_res / (SS_tot + 1e-6)
        return torch.mean(r2_loss)
    

class MSELoss(nn.Module):
    def __init__(self, use_mask=False):
        super(MSELoss, self).__init__()
        self.use_mask = use_mask

    def forward(self, y_pred, y_true):
        if self.use_mask:
            mask = (y_true != -1)
            y_true = torch.where(mask, y_true, torch.zeros_like(y_true))
            y_pred = torch.where(mask, y_pred, torch.zeros_like(y_pred))
        mse_loss = torch.mean((y_true - y_pred) ** 2)
        return mse_loss

In [56]:
def train_epoch(train_loader,model, criterion1, criterion2, optimizer, scheduler, device,args):
    args.run_type = 'train'
    model.train()
    total_loss = 0
    for i, data in tqdm(enumerate(train_loader)):
        # Unpack data
        img_path, inputs, labels, labels_aux = data
        
        # Prepare image paths
        # # Prepare inputs and labels
        inputs = inputs.to(device)
        labels = labels.to(device)
        labels_aux = labels_aux.to(device)

        # Load and transform images
        batch_size = len(img_path)
        images = torch.zeros(batch_size, 3, args.img_size, args.img_size, dtype=torch.float32)
        for i in range(batch_size):
            image = Image.open(img_path[i]).convert("RGB")
            images[i] = transform(image,args.img_size,args.run_type,args.backbone)  # Assuming train_transform is defined elsewhere
        # Forward pass
        images = images.to(device)
        output1,output2 = model(images, inputs)
        # Backward and optimize
        optimizer.zero_grad()
        loss1 = criterion1[0](output1, labels)       
        loss2 = criterion2[0](output2,labels_aux) 
        loss = loss1 + 0.2*loss2
        loss.backward()
        optimizer.step()
        scheduler.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(train_loader)

    return avg_loss


In [57]:
def evaluate_epoch(test_loader,model,criterion1,criterion2,device,args):
    model.eval()
    args.run_type = 'validate'
    total_loss = 0
    total_r2_loss = 0
    output = []
    total_label = []
    with torch.no_grad():
        for i, data in tqdm(enumerate(test_loader)):
            # Unpack data
            img_path, inputs, labels, labels_aux = data
            # Prepare image paths
            # # Prepare inputs and labels
            inputs = inputs.to(device)
            labels = labels.to(device)
            labels_aux = labels_aux.to(device)

            batch_size = len(img_path)
            images = torch.zeros(batch_size, 3, args.img_size, args.img_size, dtype=torch.float32)

            for i in range(batch_size):
                image = Image.open(img_path[i]).convert("RGB")
                images[i] = transform(image,args.img_size,args.run_type,args.backbone)   # Assuming train_transform is defined elsewhere
            # Forward pass
            images = images.to(device)
            output1,output2 = model(images, inputs)
            output.append(output1)
            total_label.append(labels)
            loss1 = criterion1[0](output1, labels) + 0.2*criterion2[0](output2,labels_aux)     
            loss2 = criterion1[0](output1, labels)
            total_r2_loss += loss2.item()
            total_loss += loss1.item()
            total_r2 = criterion1[0](torch.cat(output, dim = 0), torch.cat(total_label, dim = 0))
        avg_loss = total_loss/len(test_loader)
        avg_r2_loss = total_r2_loss/len(test_loader)
    return avg_loss, avg_r2_loss, total_r2

In [58]:
args.backbone = 'DinoV2'
args.BATCH_SIZE = 128
args.patch_size = 14
args.pretrain_choice = 'frozen'
args.decoder = 'Linear'
loses = []
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = Combine_model(args).to(device)
optimizer = torch.optim.Adam(model.parameters(), weight_decay=args.weight_decay, lr=args.LR)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=args.MAX_EPOCH, eta_min=0, last_epoch=-1)
criterion1 = [R2Loss(use_mask=False), MSELoss(use_mask=False)]
criterion2 = [R2Loss(use_mask=True), MSELoss(use_mask=True)]
best_r2 = -5
args.output_path = '/kaggle/working/'
# set log
logging.basicConfig(filename=f'{args.output_path}/train_{args.backbone}_log.txt', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
for epoch in range(args.MAX_EPOCH):
    print('epoch',epoch)
    train_loss = train_epoch(train_loader,model,criterion1,criterion2,optimizer,scheduler,device,args)
    test_loss, test_r2_loss, total_R2 = evaluate_epoch(val_loader,model, criterion1,criterion2, device, args)
    best_r2 = max(best_r2, 1 - total_R2)
    print("Train_loss is: {}, test loss is: {}, test R2 loss is: {}, test R2 is: {}, current best result: {}".format(train_loss, test_loss, test_r2_loss, 1 - total_R2, best_r2))
    logging.info("Epoch %s", epoch)
    logging.info("Train loss: %s", train_loss)
    logging.info("Test loss: %s", test_loss)
    logging.info("Test R2 loss: %s", test_r2_loss)
    logging.info("Test R2: %s", 1 - total_R2)
    loses.append([train_loss, test_r2_loss, test_r2_loss, total_R2])
    if best_r2 == 1 - total_R2:
        torch.save(model, args.output_path + "/Dino.pkl")

Using cache found in C:\Users\A/.cache\torch\hub\facebookresearch_dinov2_main


epoch 0


173it [05:45,  2.03s/it]

In [None]:
losses = [train_losses, test_losses]
point = pd.DataFrame(losses,columns=np.arange(1,MAX_EPOCH+1),index=['train','test']).T
ax=sns.lineplot(data=point)
ax.set(xlabel='epoch',ylabel='R2')