In [1]:
import os
import pandas as pd
from PIL import Image
import random
import wandb
import copy
import timm

from imblearn.over_sampling import SMOTE

import torch
import torch.nn as nn
import torch.optim as optm
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

import torchvision
from torchvision import transforms
from torchvision.transforms import Resize, ToTensor, Normalize

from sklearn.metrics import f1_score
import numpy as np

import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from matplotlib import gridspec

from torchinfo import summary
from tqdm.auto import tqdm

%matplotlib inline
device = 'cuda' if torch.cuda.is_available() else 'cpu'
cwd=os.path.dirname(os.getcwd())

In [2]:
def make_images(meta,img_dir,train):
    images=[]
    labels=[]
    if train:
        for idx in range(len(meta)):
            folder_path=os.path.join(img_dir, meta.path[idx])
            for img in os.listdir(folder_path):
                if '._' in img:
                    continue
                images.append(os.path.join(folder_path,img))
                labels.append((('incorrect' in img)+('normal' in img)*2)*6+(meta.gender[idx]=='female')*3+(30<=meta.age[idx])+(60<=meta.age[idx]))
    else:
        for img_id in meta.ImageID:
            images.append(os.path.join(img_dir, img_id))
    return images,labels

class ImageDataset(Dataset):
    def __init__(self,train=True):
        self.train=train
        self.md=['info','train']
        self.path=[os.path.join(cwd,'input/data/eval'),os.path.join(cwd,'input/data/train')]
        self.meta=pd.read_csv(os.path.join(self.path[train], f'{self.md[train]}.csv'))
        self.img_dir=os.path.join(self.path[train],'images')
        self.classes=[('Wear','Incorrect','Not Wear'),('남','여'),('<30','>=30 and <60','>=60')]
        
        self.images,self.labels=make_images(self.meta,self.img_dir,train)

    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        image=ToTensor()(Image.open(self.images[idx]))
        if self.train:
            label=torch.tensor(self.labels[idx])
        else:
            label=0
        return image,label

In [4]:
data=ImageDataset()
print(len(data))
with tqdm(enumerate(data)) as pbar:
    for n,(image,label) in pbar:
        if n==0:
            images=image.view(-1)
            continue
        images=torch.vstack((images,image.view(-1)))
        if n%300==0:
            print(n)
        
labels=torch.tensor(data.labels)

print(images.shape,labels.shape)
print(pd.Series(data.labels).value_counts())

18900


HBox(children=(HTML(value=''), FloatProgress(value=1.0, bar_style='info', layout=Layout(width='20px'), max=1.0…

300
600
900
1200
1500
1800
2100
2400
2700
3000
3300



KeyboardInterrupt: 

In [None]:
smote=SMOTE(random_state=0)
smote_images,smote_labels=smote.fit_sample(images,labels)

print(smote_images.shape,smote_labels.shape)
print(pd.Series(smote_labels).value_counts())

In [None]:
smote_images