# 環境確認

In [1]:
import sys
import torch

print("Pythonのバージョン：",sys.version)
print("PyTorchのバージョン：", torch.__version__)
print("使用しているGPUの確認")
!nvidia-smi

Pythonのバージョン： 3.7.13 (default, Mar 16 2022, 17:37:17) 
[GCC 7.5.0]
PyTorchのバージョン： 1.10.0+cu111
使用しているGPUの確認
Fri Apr  1 12:25:56 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   32C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                            

# データのダウンロード

In [2]:
!pip install kaggle



In [3]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))
  
# Then move kaggle.json into the folder where the API expects to find it.
!mkdir -p ~/.kaggle/ && mv kaggle.json ~/.kaggle/ && chmod 600 ~/.kaggle/kaggle.json

Saving kaggle.json to kaggle.json
User uploaded file "kaggle.json" with length 68 bytes


In [4]:
!kaggle competitions download -c digit-recognizer

digit-recognizer.zip: Skipping, found more recently modified local copy (use --force to force download)


In [5]:
!ls

digit-recognizer.zip  sample_data	     test.csv
gdrive		      sample_submission.csv  train.csv


In [6]:
!unzip digit-recognizer.zip

Archive:  digit-recognizer.zip
replace sample_submission.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
  inflating: sample_submission.csv   
  inflating: test.csv                
  inflating: train.csv               


# 事前準備

In [7]:
# ライブラリのインポート
import os
import random
from glob import glob
from warnings import filterwarnings

import argparse

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

import cv2
from PIL import Image

import torch
from torchvision import datasets, transforms, models
from torch.utils.data import Dataset, DataLoader
from torch import nn, optim

from collections import defaultdict

filterwarnings('ignore')  # warningをオフにする

# ランダムシードの設定
torch.manual_seed(1234)
torch.cuda.manual_seed(1234)
np.random.seed(1234)
random.seed(1234)

In [21]:
# 初期設定
from google.colab import drive
drive.mount('/content/gdrive') # マウント先

ATTACH_PATH = '/content/gdrive/MyDrive/mnist_pytorch'  # 保存先のベースディレクトリ  
SAVE_MODEL_PATH = f'{ATTACH_PATH}/model/'  # モデルの保存先

# 提出データの保存先
SUBMIT_PATH = f'{ATTACH_PATH}/submit/'

# 提出データの名前
submit_name = 'sample_submission.csv'

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [9]:
# デバイスの設定
device = 'cuda' if torch.cuda.is_available() else 'cpu'
kwargs = {'num_workers': 1, 'pin_memory': True} if torch.cuda.is_available() else {} # GPUが使える場合、DataLoaderでのミニバッチの取り出しを高速化
print(f'デバイス：{device}')

デバイス：cuda


# 学習済みパラメータの読み込み

In [10]:
# モデルのパス
model_path = f'{SAVE_MODEL_PATH}mobilenet_v3_large_loss_0.0252.pth'

In [11]:
# 学習済みモデルの読み込み
model = torch.load(model_path)

# Dataset, Dataloader

In [12]:
# データの読み込み
test = pd.read_csv('test.csv')
print(test.head())
print('データ数: ', test.shape[0])

   pixel0  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  pixel8  \
0       0       0       0       0       0       0       0       0       0   
1       0       0       0       0       0       0       0       0       0   
2       0       0       0       0       0       0       0       0       0   
3       0       0       0       0       0       0       0       0       0   
4       0       0       0       0       0       0       0       0       0   

   pixel9  ...  pixel774  pixel775  pixel776  pixel777  pixel778  pixel779  \
0       0  ...         0         0         0         0         0         0   
1       0  ...         0         0         0         0         0         0   
2       0  ...         0         0         0         0         0         0   
3       0  ...         0         0         0         0         0         0   
4       0  ...         0         0         0         0         0         0   

   pixel780  pixel781  pixel782  pixel783  
0         0         0   

In [13]:
# transformの設定
transform = {
    'train': transforms.Compose([
        transforms.ToPILImage(),
        #transforms.RandomRotation(degrees=20),
        transforms.RandomAffine(degrees=20, translate=(0.1,0.1), scale=(0.9, 1.1)),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
        ]),
    'val': transforms.Compose([
        transforms.ToPILImage(),
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
        ]),
}

In [14]:
# Dataset
class MNISTDataset(Dataset):
    def __init__(self, df, transform=None, phase=None):
        self.df = df
        self.transform = transform
        self.phase = phase

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        
        # index番目の画像を読み込み、前処理を行う
        data = self.df.iloc[index]
        image = data[0:].values.reshape((28,28)).astype(np.uint8)
        image = self.transform[self.phase](image)
        
        # index番目のラベルを取得する
        label = data[0]

        return image, label

In [15]:
# Datasetのインスタンス作成
test_dataset = MNISTDataset(test, transform=transform, phase='val')

In [16]:
# Dataloader
test_dataloader = DataLoader(test_dataset, batch_size=1, shuffle=False)

# テストデータの予測

In [17]:
# 予測データフレームの作成
preds = []

# dataloaderから、ミニバッチ単位でデータを読み込む
for images, _ in test_dataloader:
    
    # 入力データをdeviceへ
    images = images.to(device)
    
    # 学習済みモデルを推論モードに設定
    model.eval()
    
    # モデルによる変換
    outputs = model(images)
    pred = torch.argmax(outputs, dim=1)
    pred = pred.to('cpu').numpy()

    # 予測値をリストに追加
    preds.extend(pred)

# 提出

In [18]:
# 提出用データの読み込み
sub = pd.read_csv('sample_submission.csv')
print(sub.head())
print('データ数: ', sub.shape[0])

   ImageId  Label
0        1      0
1        2      0
2        3      0
3        4      0
4        5      0
データ数:  28000


In [23]:
# 目的変数カラムの置き換え
sub['Label'] = preds

# ファイルのエクスポート
sub.to_csv(SUBMIT_PATH+submit_name, sep=',', index=None)