In [None]:
!pip install timm

In [None]:
import numpy as np
import pandas as pd

import torch
import os

from PIL import Image
from PIL import ImageFile


from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score,accuracy_score,recall_score, precision_score,classification_report

import random
import timm
import torch.nn as nn
import torch.nn.functional as F
import albumentations
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import warnings
warnings.filterwarnings("ignore")

In [None]:
def set_seed(seed = 42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(42)

In [None]:
articles = pd.read_csv('../input/h-and-m-personalized-fashion-recommendations/articles.csv',dtype=str)
articles['filename'] = articles['article_id'].astype(str) + '.jpg'
articles_subset = articles[articles.product_type_name.isin(['Trousers','Dress','Sweater','T-shirt','Shorts','Shirt','Underwear bottom','Skirt'])].copy()
articles_subset['image_path']="../input/h-and-m-personalized-fashion-recommendations/images/"+articles_subset['article_id'].astype(str).str.slice(0,3)+'/'+articles_subset['filename']
articles_subset['image_exists'] = (articles_subset['image_path']).apply(lambda x: os.path.isfile(x))
articles_subset = articles_subset[['article_id','product_type_name','filename','image_path','image_exists']]
articles_subset.reset_index(inplace=True)
articles_subset.head(5)

In [None]:
articles_subset=articles_subset[articles_subset['image_exists']==True]

In [None]:
class HnMadata:
    def __init__(self,image_paths, targets, resize=None,augementations=None):
        self.image_paths=image_paths
        self.targets=targets
        self.resize=resize
        self.augmentations=augementations
        
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self,item):
        image=Image.open(self.image_paths[item])
        image=image.convert("RGB")
        targets=self.targets[item]
        
        if self.resize is not None:
            image= image.resize((self.resize[1],self.resize[0]),resample=Image.BILINEAR)
        image=np.array(image)
        
        if self.augmentations is not None:
            augmented=self.augmentations(image=image)
            image=augmented['image']
        image= np.transpose (image, (2,0,1)).astype(np.float32)
        
        return { "image": torch.tensor(image, dtype=torch.float),
                 "targets":torch.tensor(targets,dtype=torch.long),}

In [None]:
le = preprocessing.LabelEncoder()
articles_subset['labels']= le.fit_transform(articles_subset['product_type_name'])
X_train, X_test, y_train, y_test = train_test_split(articles_subset['image_path'],articles_subset['labels'], stratify=articles_subset['labels'], random_state=42)


In [None]:
X_train.reset_index(drop=True,inplace=True),y_train.reset_index(drop=True,inplace=True)
X_test.reset_index(drop=True,inplace=True),y_test.reset_index(drop=True,inplace=True)

In [None]:
mean = (0.485, 0.456, 0.406)
std = (0.229, 0.224, 0.225)
train_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True),
            albumentations.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15),
            albumentations.Flip(p=0.5)
        ]
    )

valid_aug = albumentations.Compose(
        [
            albumentations.Normalize(mean, std, max_pixel_value=255.0, always_apply=True)
        ]
    )

In [None]:
trainset=HnMadata(image_paths=X_train,targets=y_train,resize=(227,227),augementations=train_aug)
trainloader=DataLoader(trainset,batch_size=32,shuffle=True,num_workers=2)

In [None]:
valset=HnMadata(image_paths=X_test,targets=y_test,resize=(227,227),augementations=valid_aug)
valloader=DataLoader(valset,batch_size=32,shuffle=False,num_workers=2)

In [None]:
class Efficientnet(nn.Module):
    def __init__(self, model_name, pretrained=True):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.classifier.in_features
        self.model.fc = nn.Sequential(nn.Linear(in_features=n_features, out_features=256),
                                      nn.ReLU(),
                                      nn.BatchNorm1d(256, eps=1e-05, momentum=0.1),
                                      nn.Dropout(p=0.5),
                                      nn.Linear(in_features=256, out_features=8)
                                    )

    def forward(self, x):
        x = self.model(x)
        return x

In [None]:
from tqdm import tqdm
model= Efficientnet(model_name='tf_efficientnet_b0',pretrained=True)
optimizer=torch.optim.Adam(model.parameters(),lr=0.005)
criterion = nn.CrossEntropyLoss()

In [None]:
print(model)

In [None]:
def train(data_loader, model, optimizer, device):
    model.train()
    running_loss=0
    for data in tqdm(data_loader):
        inputs = data["image"]
        targets = data["targets"]
        inputs = inputs.to(device, dtype=torch.float)
        targets = targets.to(device, dtype=torch.long)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    epoch_loss = running_loss/len(data_loader)
    print('Train Loss: {:.4f}'.format(epoch_loss))

In [None]:
def evaluate(data_loader, model, device):
    model.eval()
    
    final_targets = []
    final_outputs = []
    
    running_loss=0
    with torch.no_grad():
        for data in data_loader:
            inputs = data["image"]
            targets = data["targets"]
            inputs = inputs.to(device, dtype=torch.float)
            targets = targets.to(device, dtype=torch.long)
            output = model(inputs)
            loss = criterion(output, targets)
            preds = F.softmax(output).argmax(axis=1)
            targets = targets.detach().cpu().numpy().tolist()
            outputs = preds.detach().cpu().numpy().tolist()
            final_targets.extend(targets)
            final_outputs.extend(outputs)
            running_loss += loss.item()
            
    epoch_loss = running_loss/len(data_loader)
    print('Valid Loss: {:.4f}'.format(epoch_loss))       
    return final_outputs, final_targets

In [None]:
device='cuda'
model.to(device)
epochs=10
for epoch in range(epochs):
    print('Epoch {}/{}'.format(epoch,epochs))
    print('-' * 100)
    train(trainloader,model,optimizer,device=device)
    print('-' * 100)
    predictions, valid_targets=evaluate(valloader,model,device=device)
    acc=accuracy_score(valid_targets,predictions)
    f1=f1_score(valid_targets,predictions,average='macro')
    recall=recall_score(valid_targets,predictions,average='weighted')
    precision=precision_score(valid_targets,predictions,average='weighted')
    print('Accuracy : {:.4f}'.format(acc*100))
    print('Precision : {:.4f}'.format(precision*100))
    print('Recall : {:.4f}'.format(recall*100))
    print('F1-score : {:.4f}'.format(f1*100))
    print(classification_report(valid_targets,predictions))
    print('-' * 100)
    

In [None]:
torch.save(model,"./Efficientnetb0.pt")