In [1]:
# -*- coding: utf-8 -*
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from train import resnext50d_32x4d
from train import tf_efficientnet_b3_ns
from data_loader import test_data_loader
from tqdm import tqdm
import numpy as np

In [2]:
def inference(model, test_loader, device):
    preds = []
    model.eval()
    test_tqdm = tqdm(test_loader, total=len(test_loader), position=0, leave=True)
    for images in test_tqdm:
        images = images.to(device)
        preds.extend(model(images).detach().cpu().numpy())
    return preds

def new_softmax(a):
    c = np.max(a) # 최댓값
    exp_a = np.exp(a-c) # 각각의 원소에 최댓값을 뺀 값에 exp를 취한다. (이를 통해 overflow 방지)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y

In [3]:
def main(model_path):
    path = "/opt/ml/input/data/eval/images/"
    df = pd.read_csv("/opt/ml/input/data/eval/submission.csv")
    df["ImageID"] = df["ImageID"].apply(lambda x: path+x)
    test_loader = test_data_loader(df)

    device = torch.device('cuda')
    model = resnext50d_32x4d("resnext50d_32x4d", n_class=3, pretrained=True).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))

    preds = []
    with torch.no_grad():
        # for i in test_loader:
        preds += [inference(model, test_loader, device)]

    return preds

In [4]:
# mask 착용 유무 확인
pred_1 = main("/opt/ml/input/data/model/is_wear_mask/resnext50d_32x4d/0402_0441_acc_0.9966490299823633_epoch_2.pth")

100%|██████████| 197/197 [02:14<00:00,  1.46it/s]


In [5]:
df = pd.read_csv("/opt/ml/input/data/eval/submission.csv")
df["is_wear_mask"] = np.argmax(np.array(pred_1[0]), axis=1)
df.head()

Unnamed: 0,ImageID,ans,is_wear_mask
0,cbc5c6e168e63498590db46022617123f1fe1268.jpg,14,2
1,0e72482bf56b3581c081f7da2a6180b8792c7089.jpg,4,0
2,b549040c49190cedc41327748aeb197c1670f14d.jpg,1,2
3,4f9cb2a045c6d5b9e50ad3459ea7b791eb6e18bc.jpg,4,2
4,248428d9a4a5b6229a7081c32851b90cb8d38d0c.jpg,12,2


In [6]:
def main(model_path, resnet=False):
    temp_df = pd.read_csv("/opt/ml/input/data/eval/submission.csv")
    path = "/opt/ml/input/data/eval/images/"
    # df = pd.read_csv("/opt/ml/input/data/eval/submission.csv")
    temp_df["ImageID"] = temp_df["ImageID"].apply(lambda x: path+x)
    test_loader = test_data_loader(temp_df)

    device = torch.device('cuda')
    if resnet:
        model = resnext50d_32x4d("resnext50d_32x4d", n_class=2, pretrained=True).to(device)
    else:
        model = tf_efficientnet_b3_ns("tf_efficientnet_b3_ns", n_class=2, pretrained=True).to(device)

    model.load_state_dict(torch.load(model_path, map_location=device))

    preds = []
    with torch.no_grad():
        # for i in test_loader:
        preds += [inference(model, test_loader, device)]

    return preds

# 성별 확인
pred_age_and_gender_1 = main("/opt/ml/input/data/model/age_and_gender/tf_efficientnet_b3_ns/0408_0155_f1score_0.9820153398571806_epoch_2.pth")
pred_age_and_gender_2 = main("/opt/ml/input/data/model/age_and_gender/tf_efficientnet_b3_ns/0408_0227_f1score_0.9798888594866367_epoch_2.pth")
pred_age_and_gender_3 = main("/opt/ml/input/data/model/age_and_gender/tf_efficientnet_b3_ns/0408_0257_f1score_0.9891505689335803_epoch_2.pth")

new_pred = np.zeros_like(pred_age_and_gender_1[0])
new_pred.shape

new_pred += (np.array(pred_age_and_gender_1[0]))
new_pred += (np.array(pred_age_and_gender_2[0]))
new_pred += (np.array(pred_age_and_gender_3[0]))

df["age_and_gender"] = np.argmax(new_pred, axis=1)
df.head()

# pred_2 = main("/opt/ml/input/data/model/age_and_gender/resnext50d_32x4d/0402_0456_acc_0.9677248677248678_epoch_2.pth", resnet=True)

# df["age_and_gender"] = np.argmax(np.array(pred_2[0]), axis=1)
# df.head()

100%|██████████| 197/197 [01:51<00:00,  1.76it/s]
100%|██████████| 197/197 [01:55<00:00,  1.70it/s]
100%|██████████| 197/197 [01:55<00:00,  1.70it/s]


Unnamed: 0,ImageID,ans,is_wear_mask,age_and_gender
0,cbc5c6e168e63498590db46022617123f1fe1268.jpg,14,2,0
1,0e72482bf56b3581c081f7da2a6180b8792c7089.jpg,4,0,0
2,b549040c49190cedc41327748aeb197c1670f14d.jpg,1,2,0
3,4f9cb2a045c6d5b9e50ad3459ea7b791eb6e18bc.jpg,4,2,0
4,248428d9a4a5b6229a7081c32851b90cb8d38d0c.jpg,12,2,0


In [7]:
def main(model_path, resnet=False):
    temp_df = df[df["age_and_gender"] == 1]
    path = "/opt/ml/input/data/eval/images/"
    # df = pd.read_csv("/opt/ml/input/data/eval/submission.csv")
    temp_df["ImageID"] = temp_df["ImageID"].apply(lambda x: path+x)
    test_loader = test_data_loader(temp_df)

    device = torch.device('cuda')
    if resnet:
        model = resnext50d_32x4d("resnext50d_32x4d", n_class=5, pretrained=True).to(device)
    else:
        model = tf_efficientnet_b3_ns("tf_efficientnet_b3_ns", n_class=3, pretrained=True).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))

    preds = []
    with torch.no_grad():
        # for i in test_loader:
        preds += [inference(model, test_loader, device)]

    return preds

# # 여자 나이 확인
# pred_woman_age = main("/opt/ml/input/data/model/female_info/resnext50d_32x4d/0402_1753_acc_0.7429638139000574_epoch_2.pth", resnet=True)
# woman_df["age"] = np.argmax(np.array(pred_woman_age[0]), axis=1)
# woman_df.head()

pred_woman_age_1 = main("/opt/ml/input/data/model/female_info/tf_efficientnet_b3_ns/0408_0211_f1score_0.8905644118914261_epoch_2.pth")
pred_woman_age_2 = main("/opt/ml/input/data/model/female_info/tf_efficientnet_b3_ns/0408_0236_f1score_0.9069366652305041_epoch_0.pth")
pred_woman_age_3 = main("/opt/ml/input/data/model/female_info/tf_efficientnet_b3_ns/0408_0313_f1score_0.9026281775096942_epoch_2.pth")

new_pred = np.zeros_like(pred_woman_age_1[0])
new_pred += np.array(pred_woman_age_1[0])
new_pred += np.array(pred_woman_age_2[0])
new_pred += np.array(pred_woman_age_3[0])

woman_df = df[df["age_and_gender"] == 1]
woman_df["age"] = np.argmax(new_pred, axis=1)
woman_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
100%|██████████| 79/79 [00:44<00:00,  1.77it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
100%|██████████| 79/79 [00:42<00:00,  1.86it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
100%|██████████| 79/79 [00:45<00:00,  1.73it/s]
A value is trying to be set on a copy of a slice from a DataFrame.


Unnamed: 0,ImageID,ans,is_wear_mask,age_and_gender,age
7,441419a874f4d031cd576850b68539ca7d35bedf.jpg,4,0,1,1
8,388856cd1ef99b1918273a827a75f2aff2478321.jpg,4,0,1,1
9,795ba8ccc769a3f9da6a897f75df6706b729345b.jpg,4,0,1,0
11,78900cfc8a2bb5cb69c60bdef92b173a8a48c8ec.jpg,4,0,1,0
13,d3921eae9f9a45d89f0bd1ea0e71354178481245.jpg,4,0,1,0


In [8]:
def main(model_path, resnet=False):
    temp_df = df[df["age_and_gender"] == 0]
    path = "/opt/ml/input/data/eval/images/"
    # df = pd.read_csv("/opt/ml/input/data/eval/submission.csv")
    temp_df["ImageID"] = temp_df["ImageID"].apply(lambda x: path+x)
    test_loader = test_data_loader(temp_df)

    device = torch.device('cuda')
    if resnet:
        model = resnext50d_32x4d("resnext50d_32x4d", n_class=5, pretrained=True).to(device)
    else:
        model = tf_efficientnet_b3_ns("tf_efficientnet_b3_ns", n_class=3, pretrained=True).to(device)
    model.load_state_dict(torch.load(model_path, map_location=device))

    preds = []
    with torch.no_grad():
        # for i in test_loader:
        preds += [inference(model, test_loader, device)]

    return preds

# # 남자 나이 확인
# pred_man_age = main("/opt/ml/input/data/model/male_info/resnext50d_32x4d/0402_1728_acc_0.7138939670932358_epoch_0.pth", resnet=True)
# man_df = df[df["age_and_gender"] == 0]
# man_df["age"] = np.argmax(np.array(pred_man_age[0]), axis=1)
# man_df.head()

pred_man_age_1 = main("/opt/ml/input/data/model/male_info/tf_efficientnet_b3_ns/0408_0159_f1score_0.858122001370802_epoch_1.pth")
pred_man_age_2 = main("/opt/ml/input/data/model/male_info/tf_efficientnet_b3_ns/0408_0231_f1score_0.8662551440329218_epoch_1.pth")
pred_man_age_3 = main("/opt/ml/input/data/model/male_info/tf_efficientnet_b3_ns/0408_0301_f1score_0.8868312757201646_epoch_1.pth")

new_pred = np.zeros_like(pred_man_age_1[0])
new_pred += (np.array(pred_man_age_1[0]))
new_pred += (np.array(pred_man_age_2[0]))
new_pred += (np.array(pred_man_age_3[0]))

man_df = df[df["age_and_gender"] == 0]
man_df["age"] = np.argmax(new_pred, axis=1)
man_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
100%|██████████| 119/119 [01:10<00:00,  1.69it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
100%|██████████| 119/119 [01:10<00:00,  1.68it/s]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
100%|██████████| 119/119 [01:03<00:00,  1.87it/s]
A value is trying to be set on a copy of a slice from a DataF

Unnamed: 0,ImageID,ans,is_wear_mask,age_and_gender,age
0,cbc5c6e168e63498590db46022617123f1fe1268.jpg,14,2,0,1
1,0e72482bf56b3581c081f7da2a6180b8792c7089.jpg,4,0,0,1
2,b549040c49190cedc41327748aeb197c1670f14d.jpg,1,2,0,1
3,4f9cb2a045c6d5b9e50ad3459ea7b791eb6e18bc.jpg,4,2,0,2
4,248428d9a4a5b6229a7081c32851b90cb8d38d0c.jpg,12,2,0,0


In [9]:
def func(data):
    if data["age"] < 2:
        age = 0
    elif data["age"] < 4:
        age = 1
    else:
        age = 2
    return age
    
# man_df["age"] = man_df.apply(func, axis=1)
# woman_df["age"] = woman_df.apply(func, axis=1)

In [10]:
def func(data):
    if data["is_wear_mask"] == 0:
        first = 0
    elif data["is_wear_mask"] == 1:
        first = 1
    else:
        first = 2

    if data["age_and_gender"] == 0:
        second = 0
    else:
        second = 1

    return first*6 + second*3 + data["age"]

In [13]:
man_df.reset_index(inplace=True)
woman_df.reset_index(inplace=True)

result = pd.concat([man_df, woman_df]).set_index("index").sort_index()
result.head()

Unnamed: 0_level_0,ImageID,ans,is_wear_mask,age_and_gender,age
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,cbc5c6e168e63498590db46022617123f1fe1268.jpg,14,2,0,1
1,0e72482bf56b3581c081f7da2a6180b8792c7089.jpg,4,0,0,1
2,b549040c49190cedc41327748aeb197c1670f14d.jpg,1,2,0,1
3,4f9cb2a045c6d5b9e50ad3459ea7b791eb6e18bc.jpg,4,2,0,2
4,248428d9a4a5b6229a7081c32851b90cb8d38d0c.jpg,12,2,0,0


In [14]:
import time
now = time.localtime()
temp_time = "{0:02d}{1:02d}_{2:02d}{3:02d}".format(now.tm_mon, now.tm_mday, now.tm_hour, now.tm_min)
temp_time

'0408_0936'

In [15]:
df = pd.read_csv("/opt/ml/input/data/eval/submission.csv")
df["ans"] = result.apply(func, axis=1)
df.to_csv("/opt/ml/input/data/submission/" + temp_time + "_submission.csv")