In [None]:
import os
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from torchvision import transforms
from torchvision.transforms import Resize, ToTensor, Normalize

In [None]:
# 테스트 데이터셋 폴더 경로를 지정해주세요.
test_dir = '/opt/ml/input/data/eval'

## 미사용 to resnet

In [None]:
class MyModel(nn.Module):
    def __init__(self, num_classes: int = 1000):
        super(MyModel, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
        )
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(64, 32),
            nn.ReLU(inplace=True),
            nn.Linear(32, num_classes),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [None]:
class VGG(nn.Module):
    def __init__(self, num_classes=1000, init_weights=True):
        super(VGG, self).__init__()
        
        #self.features = features #convolution
        self.features = nn.Sequential(
            nn.Conv2d(3, 512, kernel_size=11, stride=4, padding=2),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
        )
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(512, 64),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(64, 32),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(32, num_classes),
        )#FC layer
        
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x) #Convolution 
        x = self.avgpool(x) # avgpool
        x = x.view(x.size(0), -1) #
        #x = torch.flatten(x, 1)
        x = self.classifier(x) #FC layer
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

In [None]:
class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)

In [None]:
import torch
import torch.utils.data as data
import pandas as pd
import numpy as np
from glob import glob

def file_load(opt):
    
    #data_path = []
    #f = open("{0}.txt".format(opt), 'r')
    files = glob.glob("/opt/ml/input/data/train/images/*/*") 
    #while True:
        #line = f.readline()
        #if not line: break
        #data_path.append(line[:-1])
    #f.close()
    return files

class CustomDataset(data.Dataset):
    def __init__(self, opt_data):
        super(CustomDataset, self).__init__()

        """
        opt_data : 'train', 'validation'
        
        """
        #self.file_list = file_load('/opt/ml/input/data/train/train_path.csv')
        y = pd.read_csv('/opt/ml/input/data/train/train_path.csv', index_col=0)
        self.y = y.values
        
    def __getitem__(self, index):
        
        x = np.load(self.file_list[index])
        self.x_data = torch.from_numpy(x).float()
        self.y_data = torch.from_numpy(self.y[index]).float()
        return self.x_data, self.y_data

    def __len__(self):
        return len(self.y)
        
if __name__ == "__main__":
    a = CustomDataset('train')

## resnet

In [None]:
from tensorflow.keras.applications import ResNet50

model = ResNet50(include_top=True, weights=None, input_shape=(64, 48, 3), pooling=max, classes=18)

2021-08-26 06:16:22.767056: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2021-08-26 06:16:22.768364: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusolver.so.11'; dlerror: libcusolver.so.11: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2021-08-26 06:16:22.768436: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudnn.so.8'; dlerror: libcudnn.so.8: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2021-08-26 06:16:22.768447: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1835] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if 

In [None]:
def setup_gpus():
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        try:
            tf.config.experimental.set_visible_devices(gpus[0],'GPU')
            tf.config.experimental.set_virtual_device_configuration(gpus[0],[tf.config.experimental.VirtualDeviceConfiguration(memory_limit=1500)])
        except RuntimeError as e:
            print(e)

In [None]:
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
print('compile end')


compile end


In [None]:
from PIL import Image
from numpy import genfromtxt
import gzip
import _pickle
from glob import glob
import numpy as np
import pandas as pd
import imageio
import cv2


def dir_to_dataset(glob_files, loc_train_labels=""):
    print("Gonna process:\n\t %s"%glob_files)
    dataset = []
    for file_count, file_name in enumerate( sorted(glob(glob_files)) ):
        img = imageio.imread(file_name, pilmode='RGB')
        img = cv2.resize(img, (48, 64))
        #pixels = [f[0] for f in list(img.getdata())]
        dataset.append(img)
        if file_count % 1000 == 0:
            print("\t %s files processed"%file_count)
    print('done')
    print(img.shape)
        
    # outfile = glob_files+"out"
    # np.save(outfile, dataset)
    if len(loc_train_labels) > 0:
        df = pd.read_csv(loc_train_labels, names = ["class"])
        return np.array(dataset), np.array(df["class"])
    else:
        return np.array(dataset)
    
Data1, y1 = dir_to_dataset("/opt/ml/input/data/train/images/*/*","/opt/ml/input/data/train/train_path.csv")

print('yup')

# Data and labels are read 
train_num = 17000
valid_num = 1000
test_num = 900

train_set_x = Data1[:train_num]
train_set_y = y1[1:train_num+1]
val_set_x = Data1[17000:17000+valid_num]
val_set_y = y1[17001:17001+valid_num]
test_set_x = Data1[18000:18000+test_num]
test_set_y = y1[18001:18001+test_num]

train_set = train_set_x, train_set_y
val_set = val_set_x, val_set_y
test_set = test_set_x, test_set_y

dataset = [train_set, val_set, test_set]

Gonna process:
	 /opt/ml/input/data/train/images/*/*
	 0 files processed
	 1000 files processed
	 2000 files processed
	 3000 files processed
	 4000 files processed
	 5000 files processed
	 6000 files processed
	 7000 files processed
	 8000 files processed
	 9000 files processed
	 10000 files processed
	 11000 files processed
	 12000 files processed
	 13000 files processed
	 14000 files processed
	 15000 files processed
	 16000 files processed
	 17000 files processed
	 18000 files processed
done
(64, 48, 3)
yup


In [None]:
import sys
print(sys.getsizeof(Data1))
print(sys.getsizeof(y1))

174182544
151304


In [None]:
%load_ext memory_profiler
%memit

peak memory: 1749.21 MiB, increment: -0.69 MiB


In [None]:
%%time 
import _pickle, gzip, urllib.request, json
import numpy as np
import matplotlib.pyplot as plt
from keras.utils import np_utils

train_set, valid_set, test_set = dataset
    
(train_images, train_labels), (valid_images, valid_labels), (test_images, test_labels) = train_set, valid_set, test_set

train_images = train_images.reshape(train_images.shape[0], 64, 48, 3)
valid_images = valid_images.reshape(valid_images.shape[0], 64, 48, 3)
test_images = test_images.reshape(test_images.shape[0], 64, 48, 3)
train_images = train_images.astype('float32')
valid_images = valid_images.astype('float32')
test_images = test_images.astype('float32')
train_images /= 255
valid_images /= 255
test_images /= 255

n_classes = 18
print("Shape before one-hot encoding: ", train_labels.shape)
train_labels = np_utils.to_categorical(train_labels, n_classes)
valid_labels = np_utils.to_categorical(valid_labels, n_classes)
test_labels = np_utils.to_categorical(test_labels, n_classes)
print("Shape after one-hot encoding: ", train_labels.shape)

'''
for i in range(0,10000,2000):
    img = train_set[0][i]
    label = train_set[1][i]
    img_reshape = img.reshape((64,48,3))
    imgplot = plt.imshow(img_reshape)
    print('This is a {}'.format(label))
    plt.show()
'''

Shape before one-hot encoding:  (17000,)
Shape after one-hot encoding:  (17000, 18)
CPU times: user 296 ms, sys: 208 ms, total: 504 ms
Wall time: 502 ms


"\nfor i in range(0,10000,2000):\n    img = train_set[0][i]\n    label = train_set[1][i]\n    img_reshape = img.reshape((64,48,3))\n    imgplot = plt.imshow(img_reshape)\n    print('This is a {}'.format(label))\n    plt.show()\n"

In [None]:
#es = callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
#save = tf.keras.callbacks.ModelCheckpoint('best_model.h5', monitor='val_loss', mode='min', save_best_only=True)
#callback = [es, save]

#hist = model.fit(train_images, train_labels, batch_size=64, epochs=10, validation_data=(valid_images, valid_labels), callbacks=callback)

model.fit(train_images, train_labels, batch_size=128, epochs=5, validation_data=(valid_images, valid_labels))
print('fit end')

test_loss, test_acc = model.evaluate(test_images, test_labels, verbose=2)
ptinr('acc : ', test_acc)

2021-08-26 06:17:30.071673: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
fit end
29/29 - 2s - loss: 1.7929 - accuracy: 0.5922


NameError: name 'ptinr' is not defined