# 環境確認

In [1]:
import sys
import torch

print("Pythonのバージョン：",sys.version)
print("PyTorchのバージョン：", torch.__version__)
print("使用しているGPUの確認")
!nvidia-smi

Pythonのバージョン： 3.7.13 (default, Mar 16 2022, 17:37:17) 
[GCC 7.5.0]
PyTorchのバージョン： 1.10.0+cu111
使用しているGPUの確認
Tue Apr  5 11:59:06 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P0    25W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                            

# データのダウンロード

In [2]:
!pip install signate

from googleapiclient.discovery import build
import io, os
from googleapiclient.http import MediaIoBaseDownload
from google.colab import auth


auth.authenticate_user()

drive_service = build('drive', 'v3')
results = drive_service.files().list(
        q="name = 'signate.json'", fields="files(id)").execute()
signate_api_key = results.get('files', [])

filename = "/root/.signate/signate.json"
os.makedirs(os.path.dirname(filename), exist_ok=True)

request = drive_service.files().get_media(fileId=signate_api_key[0]['id'])
fh = io.FileIO(filename, 'wb')
downloader = MediaIoBaseDownload(fh, request)
done = False
while done is False:
    status, done = downloader.next_chunk()
    print("Download %d%%." % int(status.progress() * 100))
os.chmod(filename, 600)

Collecting signate
  Downloading signate-0.9.9-py3-none-any.whl (37 kB)
Collecting wget
  Downloading wget-3.2.zip (10 kB)
Collecting six>=1.16
  Downloading six-1.16.0-py2.py3-none-any.whl (11 kB)
Collecting urllib3>=1.26.7
  Downloading urllib3-1.26.9-py2.py3-none-any.whl (138 kB)
[K     |████████████████████████████████| 138 kB 15.5 MB/s 
Building wheels for collected packages: wget
  Building wheel for wget (setup.py) ... [?25l[?25hdone
  Created wheel for wget: filename=wget-3.2-py3-none-any.whl size=9675 sha256=988c186824d1f349a95b9c57e166d600bfe52ea21f7cac90c0ec2eb4dccbef40
  Stored in directory: /root/.cache/pip/wheels/a1/b6/7c/0e63e34eb06634181c63adacca38b79ff8f35c37e3c13e3c02
Successfully built wget
Installing collected packages: six, wget, urllib3, signate
  Attempting uninstall: six
    Found existing installation: six 1.15.0
    Uninstalling six-1.15.0:
      Successfully uninstalled six-1.15.0
  Attempting uninstall: urllib3
    Found existing installation: urllib3 1.2



Download 100%.


In [3]:
!signate list
!signate files --competition-id=133
!signate download --competition-id=133

  competitionId  title                                                                             closing     prize        submitters
---------------  --------------------------------------------------------------------------------  ----------  ---------  ------------
              1  【練習問題】銀行の顧客ターゲティング                                              -                              5566
             24  【練習問題】お弁当の需要予測                                                      -                              6905
             27  【練習問題】Jリーグの観客動員数予測                                               -                              1689
            100  【練習問題】手書き文字認識                                                        -           Knowledge           219
            102  【練習問題】タイタニックの生存予測                                                -           Knowledge          1415
            103  【練習問題】音楽ラベリング                                                        -           Knowledge            65
            

In [4]:
!unzip test_images.zip

[1;30;43mストリーミング出力は最後の 5000 行に切り捨てられました。[0m
  inflating: test_images/test_3699.jpg  
  inflating: test_images/test_37.jpg  
  inflating: test_images/test_370.jpg  
  inflating: test_images/test_3700.jpg  
  inflating: test_images/test_3701.jpg  
  inflating: test_images/test_3702.jpg  
  inflating: test_images/test_3703.jpg  
  inflating: test_images/test_3704.jpg  
  inflating: test_images/test_3705.jpg  
  inflating: test_images/test_3706.jpg  
  inflating: test_images/test_3707.jpg  
  inflating: test_images/test_3708.jpg  
  inflating: test_images/test_3709.jpg  
  inflating: test_images/test_371.jpg  
  inflating: test_images/test_3710.jpg  
  inflating: test_images/test_3711.jpg  
  inflating: test_images/test_3712.jpg  
  inflating: test_images/test_3713.jpg  
  inflating: test_images/test_3714.jpg  
  inflating: test_images/test_3715.jpg  
  inflating: test_images/test_3716.jpg  
  inflating: test_images/test_3717.jpg  
  inflating: test_images/test_3718.jpg  
  inflating: te

In [5]:
# ディレクトリ内のデータの確認
%ls

label_master.tsv  sample_submit.tsv  test_images.zip   train_master.tsv
[0m[01;34msample_data[0m/      [01;34mtest_images[0m/       train_images.zip


# 事前準備

In [43]:
# ライブラリのインポート
import os
import random
from glob import glob
from warnings import filterwarnings

import argparse

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import cv2
from PIL import Image

import torch
from torchvision import datasets, transforms, models
from torch.utils.data import Dataset, DataLoader
from torch import nn, optim
from torchsummary import summary

from collections import defaultdict

filterwarnings('ignore')  # warningをオフにする

# ランダムシードの設定
torch.manual_seed(1234)
torch.cuda.manual_seed(1234)
np.random.seed(1234)
random.seed(1234)

In [44]:
# 初期設定
from google.colab import drive
drive.mount('/content/gdrive') # マウント先

ATTACH_PATH = '/content/gdrive/MyDrive/CIFAR10-Pytorch'  # 保存先のベースディレクトリ  
SAVE_MODEL_PATH = f'{ATTACH_PATH}/model/'  # モデルの保存先

# 提出データの保存先
SUBMIT_PATH = f'{ATTACH_PATH}/submit/'

# 提出データの名前
submit_name = 'sample_submission.tsv'

# 画像データのディレクトリ
img_dir = 'test_images/'

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [45]:
# デバイスの設定
device = 'cuda' if torch.cuda.is_available() else 'cpu'
kwargs = {'num_workers': 1, 'pin_memory': True} if torch.cuda.is_available() else {} # GPUが使える場合、DataLoaderでのミニバッチの取り出しを高速化
print(f'デバイス：{device}')

デバイス：cuda


In [46]:
# ハイパーパラメータの設定
parser = argparse.ArgumentParser()
parser.add_argument('--image_size', type=int, default=96)
parser.add_argument('--num_classes', type=int, default=10)
opt = parser.parse_args(args=[])
print(opt)

Namespace(image_size=96, num_classes=10)


# 学習済みパラメータの読み込み

In [47]:
# モデルのパス
model_path = f'{SAVE_MODEL_PATH}mobilenet_v3_large_finetuning.pth'

In [48]:
model = models.mobilenet_v3_large()
fc_in_features = model.classifier[0].out_features # 最終レイヤー関数の次元数
model.classifier[3] = nn.Linear(fc_in_features, opt.num_classes) # 最終レイヤー関数の付け替え
print(model)

MobileNetV3(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False

In [49]:
model.load_state_dict(torch.load(model_path)) # 学習済みモデルの読み込み

<All keys matched successfully>

In [50]:
# モデルをGPUに送る
model.to(device)

MobileNetV3(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): Hardswish()
    )
    (1): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=16, bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
          (2): ReLU(inplace=True)
        )
        (1): ConvNormActivation(
          (0): Conv2d(16, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(16, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
        )
      )
    )
    (2): InvertedResidual(
      (block): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1), bias=False

# Dataset, Dataloader

In [51]:
# データの読み込み
test = pd.read_csv('sample_submit.tsv', header=None, sep='\t')
print(test.head())
print('データ数: ', test.shape[0])

            0  1
0  test_0.jpg  3
1  test_1.jpg  8
2  test_2.jpg  3
3  test_3.jpg  3
4  test_4.jpg  2
データ数:  8000


In [52]:
# 画像データの名前リストの抽出
x_test = test[0].values
dummy = test[0].values
print(x_test, f'データ数：{len(x_test)}')

['test_0.jpg' 'test_1.jpg' 'test_2.jpg' ... 'test_7997.jpg'
 'test_7998.jpg' 'test_7999.jpg'] データ数：8000


In [53]:
# transformの設定
transform = {
    'train': transforms.Compose([
        transforms.Resize(opt.image_size),
        transforms.RandomHorizontalFlip(p=0.2),
        transforms.RandomRotation(degrees=20),
        transforms.RandomAffine(degrees=[-10, 10], translate=(0.1, 0.1), scale=(0.5, 1.5)),
        transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]),
    'val': transforms.Compose([
        transforms.Resize(opt.image_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
        ]),
}

# Dataset

In [54]:
# Datasetの設定
class CifarDataset(Dataset):
    def __init__(self, image_name_list, label_list, img_dir, transform=None, phase=None):
        self.image_name_list = image_name_list # 画像ファイル名
        self.label_list = label_list # ラベル
        self.img_dir = img_dir # 画像データのディレクトリ
        self.phase = phase # 変数phaseで学習(train)もしくは検証(val)の設定を行う
        self.transform = transform

    def __len__(self):
        return len(self.image_name_list) # 1エポックあたりに読み込むデータ数として、入力データの数を指定

    def __getitem__(self, index):
        
        # index番目の画像を読み込み、前処理を行う
        image_path = os.path.join(self.img_dir, self.image_name_list[index]) # train_master.iloc[index, 0]はファイル名を抽出
        img = Image.open(image_path)
        img = self.transform[self.phase](img)
        
        # index番目のラベルを取得する
        label = self.label_list[index]
        
        return img, label

In [55]:
# Datasetのインスタンス作成
test_dataset = CifarDataset(x_test, dummy, img_dir, transform=transform, phase='val')

# DataLoader

In [56]:
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# テストデータの予測

In [57]:
# 予測データフレームの作成
preds = []

# dataloaderから、ミニバッチ単位でデータを読み込む
for images, _ in test_dataloader:
    
    # 入力データをdeviceへ
    images = images.to(device)
    
    # 学習済みモデルを推論モードに設定
    model.eval()
    
    # モデルによる変換
    outputs = model(images)
    pred = torch.argmax(outputs, dim=1)
    pred = pred.to('cpu').numpy()

    # 予測値をリストに追加
    preds.extend(pred)

# 提出

In [58]:
# 提出用データの読み込み
sub = pd.read_csv('sample_submit.tsv', header=None, sep='\t')
print(sub.head())
print('データ数: ', sub.shape[0])

            0  1
0  test_0.jpg  3
1  test_1.jpg  8
2  test_2.jpg  3
3  test_3.jpg  3
4  test_4.jpg  2
データ数:  8000


In [59]:
# 目的変数カラムの置き換え
sub[1] = preds
print(sub.head())

            0  1
0  test_0.jpg  6
1  test_1.jpg  1
2  test_2.jpg  5
3  test_3.jpg  9
4  test_4.jpg  3


In [60]:
# ファイルのエクスポート
sub.to_csv(SUBMIT_PATH+submit_name, sep='\t', header=None, index=None)