In [13]:
#age 예측을 위한 데이터셋
from torch.utils.data import Dataset, DataLoader
from torchvision.datasets import ImageFolder

from PIL import Image
import os
import os.path
from typing import Any, Callable, cast, Dict, List, Optional, Tuple
from torchvision.datasets.folder import default_loader, IMG_EXTENSIONS

class TrainDatasetAge(ImageFolder):
    def __init__(
            self,
            root: str,
            transform: Optional[Callable] = None,
            target_transform: Optional[Callable] = None,
            loader: Callable[[str], Any] = default_loader,
            is_valid_file: Optional[Callable[[str], bool]] = None,
    ):
        super(ImageFolder, self).__init__(root, loader, IMG_EXTENSIONS if is_valid_file is None else None,
                                          transform=transform,
                                          target_transform=target_transform,
                                          is_valid_file=is_valid_file)
        self.imgs = self.samples
        ##위는 그냥 기존 코드
        ##아래는 직접 작성
        gender_dic = {'male':0, 'female':3 }
        mask_dic = {'mask':0, 'incor':6, 'norma':12} ##이는 전처리의 편의에 의함
        def feature_extract(f):
            ff = f[0].split('/')[-1].split('.')[0].replace('incorrect_mask','incorr').split('_')
            return gender_dic[ff[1]], mask_dic[ff[-1][:-1]]
        self.features = list(map(feature_extract, self.samples))
        

    def __getitem__(self, index: int) -> Tuple[Any, Any]:

        path, target = self.samples[index]
        sample = self.loader(path)
        if self.transform is not None:
            sample = self.transform(sample)
        if self.target_transform is not None:
            target = self.target_transform(target)
        #아래 한 줄과 리턴에 하나 추가
        features = self.features[index]
        return sample, target, features

In [16]:
dataset = TrainDatasetAge(os.path.join(f'/opt/ml/input/cropped_v2/train/images_classified_age/'), transforms)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=2, shuffle=True, num_workers=8)
# next(iter(dataloader))
dataset.classes

['0', '1', '2']

In [12]:
import matplotlib
import matplotlib.pyplot as plt

import os

import torch
import torch.nn as nn
import torch.optim as optim

import torchvision
from torchvision import datasets, models, transforms

import numpy as np
import time
from tqdm import tqdm

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # device 객체
# 데이터셋을 불러올 때 사용할 변형(transformation) 객체 정의
transforms = transforms.Compose([
#     transforms.Resize((224, 248)),
    transforms.Resize((224, 224)),
#     transforms.Resize((112, 112)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # 정규화(normalization)
])

In [17]:
import random

import matplotlib
import matplotlib.pyplot as plt

import os

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Subset

import torchvision
from torchvision import datasets, models, transforms

from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

import pandas as pd
import numpy as np
import time
from tqdm import tqdm

from PIL import Image

In [18]:
model = models.resnet34(pretrained=True)

In [23]:
model.fc.in_features

512

In [64]:
# print(model)

In [94]:
import itertools

iterable1 = ['0','6','12']
iterable2 = ['0','3']
iterable3 = ['0','1','2']
# iterable1 = [0,6,12]
# iterable2 = [0,3]
# iterable3 = [0,1,2]
iters = list(itertools.product(iterable1, iterable2, iterable3))
iters

[('0', '0', '0'),
 ('0', '0', '1'),
 ('0', '0', '2'),
 ('0', '3', '0'),
 ('0', '3', '1'),
 ('0', '3', '2'),
 ('6', '0', '0'),
 ('6', '0', '1'),
 ('6', '0', '2'),
 ('6', '3', '0'),
 ('6', '3', '1'),
 ('6', '3', '2'),
 ('12', '0', '0'),
 ('12', '0', '1'),
 ('12', '0', '2'),
 ('12', '3', '0'),
 ('12', '3', '1'),
 ('12', '3', '2')]

In [85]:
import pandas as pd
import random

In [86]:
df = pd.DataFrame(range(18)).T
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17


In [87]:
df_mask = pd.Series({0:0.5, 6:0.3, 12:0.2}).to_frame().T
df_mask = pd.concat([df_mask,df_mask], ignore_index=True)
df_mask

Unnamed: 0,0,6,12
0,0.5,0.3,0.2
1,0.5,0.3,0.2


In [88]:
df_gender = pd.Series({0:0.4, 3:0.6}).to_frame().T
df_gender = pd.concat([df_gender,df_gender], ignore_index=True)
df_gender

Unnamed: 0,0,3
0,0.4,0.6
1,0.4,0.6


In [89]:
df_age = pd.Series({0:0.5, 1:0.3, 2:0.2}).to_frame().T
df_age = pd.concat([df_age,df_age], ignore_index=True)
df_age

Unnamed: 0,0,1,2
0,0.5,0.3,0.2
1,0.5,0.3,0.2


In [90]:
df = pd.DataFrame()
for m,g,a in iters:
    print(m)
    df[m+g+a] = df_mask[m]*df_gender[g]*df_age[a]
df

0
0
0
0
0
0
6
6
6
6
6
6
12
12
12
12
12
12


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,0.1,0.06,0.04,0.15,0.09,0.06,0.06,0.036,0.024,0.09,0.054,0.036,0.04,0.024,0.016,0.06,0.036,0.024
1,0.1,0.06,0.04,0.15,0.09,0.06,0.06,0.036,0.024,0.09,0.054,0.036,0.04,0.024,0.016,0.06,0.036,0.024


In [None]:
import itertools

iterable1 = ['0','6','12']
iterable2 = ['0','3']
iterable3 = ['0','1','2']

iters = list(itertools.product(iterable1, iterable2, iterable3))

mask = pd.read_csv('sub_mask.csv')
gender = pd.read_csv('sub_gender.csv')
age = pd.read_csv('sub_age.csv')

df = pd.DataFrame()
for m,g,a in iters: df[int(m)+int(g)+int(a)] = mask[m]*gender[g]*age[a]

In [114]:
df.to_csv('sub_all1.csv')


NameError: name 'df1' is not defined

In [132]:
df1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,4.849798e-11,1.664804e-06,2.716136e-06,4.214067e-15,1.446575e-10,2.360095e-10,2.658205e-11,9.124896e-07,1.488731e-06,2.309757e-15,7.928770e-11,1.293583e-10,1.106906e-05,3.799709e-01,6.199243e-01,9.618084e-10,3.301628e-05,5.386622e-05
1,4.937038e-08,2.077911e-03,9.909908e-01,3.408029e-10,1.434379e-05,6.840793e-03,3.729018e-12,1.569477e-07,7.485100e-05,2.574135e-14,1.083407e-09,5.166953e-07,3.331264e-14,1.402069e-09,6.686705e-07,2.299566e-16,9.678462e-12,4.615822e-09
2,1.135067e-10,1.868424e-06,3.313389e-06,5.898588e-13,9.709614e-09,1.721864e-08,1.020546e-08,1.679911e-04,2.979087e-04,5.303455e-11,8.729970e-07,1.548138e-06,2.178079e-05,3.585316e-01,6.358056e-01,1.131879e-07,1.863176e-03,3.304082e-03
3,1.143226e-12,5.389695e-08,1.026183e-06,3.879255e-16,1.828859e-11,3.482098e-10,1.375009e-11,6.482427e-07,1.234236e-05,4.665753e-15,2.199651e-10,4.188075e-09,1.058089e-06,4.988321e-02,9.497625e-01,3.590364e-10,1.692663e-05,3.222784e-04
4,9.624010e-07,9.835436e-10,8.188055e-14,3.044306e-09,3.111185e-12,2.590079e-16,9.084867e-06,9.284449e-09,7.729355e-13,2.873762e-08,2.936895e-11,2.444981e-15,9.958189e-01,1.017696e-03,8.472372e-08,3.150015e-03,3.219216e-06,2.680015e-10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12595,6.775961e-06,1.868053e-02,9.812942e-01,1.967633e-11,5.424532e-08,2.849524e-06,8.645241e-11,2.383392e-07,1.252003e-05,2.510442e-16,6.920995e-13,3.635621e-11,1.796417e-11,4.952512e-08,2.601570e-06,5.216514e-17,1.438132e-13,7.554552e-12
12596,2.921104e-07,5.904584e-04,1.971659e-04,3.703605e-04,7.486296e-01,2.499824e-01,2.930149e-11,5.922868e-08,1.977764e-08,3.715074e-08,7.509478e-05,2.507565e-05,3.741729e-11,7.563357e-08,2.525556e-08,4.744058e-08,9.589419e-05,3.202099e-05
12597,2.713797e-09,9.676308e-11,2.897470e-14,2.743209e-05,9.781178e-07,2.928872e-10,9.550800e-05,3.405431e-06,1.019721e-09,9.654310e-01,3.442338e-02,1.030773e-05,7.549913e-10,2.691995e-11,8.060900e-15,7.631737e-06,2.721170e-07,8.148262e-11
12598,3.902652e-05,9.916038e-01,3.761698e-03,1.799924e-07,4.573329e-03,1.734915e-05,1.145025e-10,2.909331e-06,1.103669e-08,5.280913e-13,1.341799e-08,5.090180e-11,5.771679e-11,1.466495e-06,5.563220e-09,2.661929e-13,6.763551e-09,2.565786e-11


In [140]:
np.array(np_sub_all).shape

(12600, 18)

In [156]:
df1 = pd.read_csv('sub_all1.csv').drop('Unnamed: 0',axis=1)
df2 = pd.read_csv('jdu_softmax_output.csv').drop('Unnamed: 0',axis=1)
df3 = pd.read_csv('ensemble.csv')
df4 = pd.read_csv('soft_vote_Efb4.csv').drop('Unnamed: 0',axis=1)

sub = pd.read_csv('sub0302_0731_AgeF1-0.7930_epo2.csv')#제출 형식에 맞는 파일 아무거나

np_sub_all = np.array(df1)+np.array(df2)+np.array(df3)+np.array(df4)
sub['ans'] = np.array(np_sub_all).argmax(axis=1)
sub.to_csv('softvoting1.csv')

In [152]:
df4

Unnamed: 0.1,Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,0,0.077989,0.027907,0.015263,0.064415,0.028795,0.028955,0.005540,0.011758,0.019243,0.009560,0.003342,0.030460,0.041165,0.401850,0.037496,0.121655,0.039166,0.035441
1,1,0.025803,0.181926,0.210697,0.278706,0.012799,0.034500,0.002701,0.021672,0.024301,0.051056,0.009710,0.020095,0.009394,0.014557,0.020907,0.022292,0.008844,0.050040
2,2,0.012135,0.005515,0.033127,0.023594,0.006104,0.018960,0.005125,0.000855,0.010752,0.021449,0.002424,0.007741,0.008077,0.004491,0.745106,0.025519,0.008379,0.060647
3,3,0.033059,0.018439,0.053689,0.033242,0.007354,0.011987,0.004352,0.004440,0.015228,0.037807,0.002358,0.014660,0.082033,0.013328,0.551290,0.063327,0.025713,0.027695
4,4,0.017781,0.019336,0.006267,0.019420,0.010419,0.006524,0.007373,0.016197,0.012392,0.010548,0.004056,0.006001,0.299210,0.016379,0.008847,0.528813,0.004537,0.005902
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12595,12595,0.030227,0.836366,0.010629,0.015772,0.011315,0.004406,0.001498,0.009897,0.003914,0.005716,0.001404,0.001933,0.004209,0.022590,0.011400,0.009489,0.015056,0.004180
12596,12596,0.044754,0.026729,0.007182,0.706607,0.102472,0.014916,0.001950,0.005340,0.005865,0.020594,0.011243,0.007943,0.001979,0.006034,0.002647,0.010732,0.008386,0.014626
12597,12597,0.003187,0.003755,0.003678,0.003834,0.005249,0.006277,0.004856,0.002570,0.006055,0.920560,0.009153,0.002954,0.003547,0.002502,0.003240,0.011157,0.004666,0.002760
12598,12598,0.009162,0.008102,0.004535,0.933705,0.003342,0.003250,0.002703,0.001445,0.002868,0.003260,0.001958,0.002975,0.007267,0.004409,0.003243,0.003944,0.001843,0.001989


In [158]:
from sklearn.svm import SVC
classifier = SVC(kernel = "rbf", C = 2, gamma = 0.5)
training_points = [[1, 2], [1, 5], [2, 2], [7, 5], [9, 4], [8, 2]]
labels = [1, 1, 1, 0, 0, 0]
classifier.fit(training_points, labels) 

SVC(C=2, gamma=0.5)