## Document Classification with InceptionV3

### 0 Import modules 

In [9]:
import os
import numpy as np
import torchvision
from torchvision.transforms import ToTensor
from torch.utils.data.sampler import SubsetRandomSampler
import pathlib
import yaml
from yaml.loader import SafeLoader
from helpers import get_file_paths_and_labels, initialize_model, CustomImageDataset, train_inception, test_accuracy, main
from torchvision import datasets, models, transforms

### 0.1 (Optional) Mount google drive

In [10]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [11]:
### 1 Parse config parameters 

In [12]:
with open('config.yaml') as f:
    config = yaml.load(f, Loader=SafeLoader)
    print(config)

{'IMAGE_DIR': 'C:\\Users\\Johannes\\Documents\\Tobacco3482-jpg\\', 'NUM_CLASSES': 10, 'NUM_EPOCHS': 15, 'VAL_SPLIT_REL': 0.1, 'TEST_SPLIT_REL': 0.1, 'NUM_SAMPLES': 15, 'INPUT_SIZE': 299, 'HYPERPARAMETER_GRID': {'lr': 'tune.loguniform(1e-4, 1e-1),', 'batch_size': 'tune.choice([4, 8, 16]),', 'mom': 'tune.choice([0.1, 0.2, 0.3, 0.4, 0.5, 0.6]),', 'weight_decay': 'tune.loguniform(0.001, 0.1),', 'num_epochs': 15}}


In [13]:
image_dir = config["IMAGE_DIR"]
num_classes = config["NUM_CLASSES"]
num_epochs = config["NUM_EPOCHS"]
num_samples = config["NUM_SAMPLES"]
input_size = config["INPUT_SIZE"]
hyperparameter_grid = config["HYPERPARAMETER_GRID"]


### 2 Create Custom Dataset

In [14]:
data_dir = pathlib.Path(image_dir)
print(data_dir)

print(os.listdir(image_dir))

C:\Users\Johannes\Documents\Tobacco3482-jpg
['ADVE', 'Email', 'Form', 'Letter', 'Memo', 'News', 'Note', 'Report', 'Resume', 'Scientific']


In [15]:
labels_df, label_to_index = get_file_paths_and_labels(data_dir)

In [16]:
transforms_train = transforms.Compose([
        transforms.ToPILImage(),
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

transforms_test = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])

In [18]:
train_dataset = CustomImageDataset(transforms_train, labels_df, image_dir)
test_dataset = CustomImageDataset(transforms_test, labels_df, image_dir)
val_split_rel = 0.1
test_split_rel = 0.1

In [19]:
dataset_size = labels_df.shape[0]
indices = list(range(dataset_size))
val_split = int(np.floor(val_split_rel * dataset_size))
test_split = val_split * 2
train_indices = indices[test_split:]
val_indices = indices[:val_split]
test_indices = indices[val_split:test_split]

# Creating PT data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
val_sampler = SubsetRandomSampler(val_indices)
test_sampler = SubsetRandomSampler(test_indices)

### 3 Initialize Model 

In [21]:
net, input_size = initialize_model(num_classes=num_classes)

TypeError: cannot unpack non-iterable Inception3 object

### 4 Run training and Tuning 

In [None]:
main(hyperparameter_grid, num_samples=num_samples, max_num_epochs=num_epochs)