This file does two things: training a CNN per country, or training a CNN on a subset of countries to predict another. Ideally, we would do a 5-fold cross-validation and train the CNN per fold. But for 2 countries, 5 folds, and 2 metrics this leads to 40 CNN training runs. Furthermore, there are different types of cross-validation (randomized or spatial with regard to clusters). That would mean 40 CNN training runs. And this doesn't even count training the CNN again for cross-country generalization tests. The approach taken by the paper and in this file reduces the runs to (2 per country using 70/30 train/valid + 2 holding one country out) * 2 metrics = 8 CNN runs. This is far more manageable and less prone to error.
<br> <br>
Written by Jatin Mathur
<br>
5/2020

In [1]:
import pandas as pd
import numpy as np
import os
from tqdm.notebook import tqdm
BASE_DIR = '..'
import sys
sys.path.append(BASE_DIR)
from utils import merge_on_lat_lon
from config import TRAINING_CONFIG, RANDOM_SEED

In [2]:
COUNTRIES_DIR = os.path.join(BASE_DIR, 'data', 'countries')
PROCESSED_DIR = os.path.join(BASE_DIR, 'data', 'processed')
# can try using the google downloader, in which case change this to be your google api token
ACCESS_TOKEN_DIR = os.path.join(BASE_DIR, 'planet_api_key.txt')

TYPE = TRAINING_CONFIG['TYPE']
COUNTRY = TRAINING_CONFIG['COUNTRY']
METRIC = TRAINING_CONFIG['METRIC']

CNN_TRAIN_IMAGE_DIR = os.path.join(BASE_DIR, 'data', 'cnn_images', TYPE, COUNTRY, METRIC)
CNN_SAVE_DIR = os.path.join(BASE_DIR, 'models', TYPE, COUNTRY, METRIC)

# groups to cut distribution into
NUMBER_OF_BINS = 4 

# reduce if memory errors on CUDA
BATCH_SIZE = 8

# Number of epochs to train for
# after epoch 5, the model will update the entire network (not just the newly initialized ones)
TOTAL_EPOCHS = 30
# if script notices existing models at earlier epochs, it will load that and set this variable
CURRENT_EPOCH = 0

In [3]:
assert TYPE in ['single_country', 'country_held_out']
assert COUNTRY in ['malawi_2016', 'ethiopia_2015']
assert METRIC in ['house_has_cellphone', 'est_monthly_phone_cost_pc']

In [4]:
os.makedirs(CNN_TRAIN_IMAGE_DIR, exist_ok=True)
os.makedirs(CNN_SAVE_DIR, exist_ok=True)
os.makedirs(os.path.join(PROCESSED_DIR, TYPE, COUNTRY), exist_ok=True)

# Preprocess

In [5]:
DF_DOWNLOAD = pd.read_csv(os.path.join(PROCESSED_DIR, 'image_download_locs.csv'))
downloaded = os.listdir(os.path.join(COUNTRIES_DIR, 'malawi_2016', 'cnn_images')) + \
            os.listdir(os.path.join(COUNTRIES_DIR, 'ethiopia_2015', 'cnn_images'))

print("expected:", len(DF_DOWNLOAD), "actually downloaded:", len(downloaded))

# it's not that bad if some don't download, we just drop them from consideration
DF_DOWNLOAD['row'] = np.arange(len(DF_DOWNLOAD))
idx_not_download = DF_DOWNLOAD.set_index('image_name').drop(downloaded)['row'].values.tolist()
DF_DOWNLOAD.drop(idx_not_download, inplace=True)
DF_DOWNLOAD.drop('row', axis=1, inplace=True)
DF_DOWNLOAD.reset_index(drop=True, inplace=True)

expected: 26200 actually downloaded: 26177


In [8]:
def assign_bin(cutoffs):
    '''
    Returns a function that takes a scalar value x and assigns it to a bin based on 
    the cutoffs given to the "parent" function
    '''
    def binning_function(x):
        # inner_function is still aware of variable cutoffs
        for i in range(len(cutoffs) - 1):
            if (x >= cutoffs[i]).any() and (x < cutoffs[i + 1]).any():
                return i
        raise ValueError(f'Given value {x} is outside the cutoffs')
    return binning_function

def create_bin_eth_house_has_cellphone(df, metric):
    '''
    This is a hacky fix for the odd case that doesn't work:
    when the country is ethiopia and metric is house_has_cellphone
    
    pd.qcut will not work because 28% of the data is all 1's, meaning no range that
    includes '1' can have only 25% of the data.
    I choose hand-determined cutoffs that work for this country and this metric
    
    In general pd.qcut will fail whenever a single value dominates the distribution. This may lead to
    more preprocessing steps for this method to generalize to other countries in a consistent way
    '''
    hand_cutoffs = np.array([0, 0.3, 0.6, 0.9, 1.01])
    binning_function = assign_bin(hand_cutoffs)
    bins = np.apply_along_axis(binning_function, 0, df[metric].values)
    return bins, hand_cutoffs

def create_bin(df, metric):
    '''
    df: dataframe with column metric
    
    Uses a quantile cut to bin the metric of interest into four equally-represented categories
    Also identifies the images that are near the lower and upper cutoffs
    
    Adds columns 'bin', 'near_upper', and 'near_lower' to df
    '''
    np.random.seed(RANDOM_SEED)
    frac_lower = 0.1 # lower 10% of a bin's range will count as being "near"
    frac_upper = 0.1 # upper 10% of a bin's range will count as being "near"
    df['bin'] = 0
    bin_cutoffs = None
    if TYPE == 'single_country' and COUNTRY == 'ethiopia_2015' and metric == 'house_has_cellphone':
        # special case, function explains why
        df['bin'], bin_cutoffs = create_bin_eth_house_has_cellphone(df, metric)
    else: 
        bins, bin_cutoffs = pd.qcut(df[metric], NUMBER_OF_BINS, retbins=True)
        df['bin'] = bins.cat.codes
    df['bin'] = df['bin'].astype(np.int64)
    df['near_lower'] = False
    df['near_upper'] = False
    for i in range(1, len(bin_cutoffs) - 1):
        span = bin_cutoffs[i + 1] - bin_cutoffs[i]
        if i != 0:
            # we take the minimum of the current bin and the bin 
            # we want to join to as the effective span
            # this prevents a bin with very large span from dominating
            span = min(span, bin_cutoffs[i] - bin_cutoffs[i - 1])
        lower_c = bin_cutoffs[i] + frac_lower * span
        df['near_lower'].loc[(df['bin'] == i) & (df[metric] < lower_c)] = True
    for i in range(0, len(bin_cutoffs) - 2):
        span = bin_cutoffs[i + 1] - bin_cutoffs[i]
        if i != len(bin_cutoffs) - 2:
            # we take the minimum of the current bin and the bin 
            # we want to join to as the effective span
            # this prevents a bin with very large span from dominating
            span = min(span, bin_cutoffs[i + 2] - bin_cutoffs[i + 1])
        upper_c = bin_cutoffs[i + 1] - frac_upper * span
        df['near_upper'].loc[(df['bin'] == i) & (df[metric] > upper_c)] = True
    

def preprocess_single_country(frac=0.7):
    '''
    uses DF_DOWNLOAD and given country to hold out
    frac represents the percent of clusters to use for training
    
    saves the images (symlinked) to data/cnn_images/TYPE/COUNTRY/
    saves the dataframe to data/processed/TYPE/COUNTRY/METRIC.csv
    '''
    savedir = os.path.join(PROCESSED_DIR, TYPE, COUNTRY)
    os.makedirs(savedir, exist_ok=True)
    savepath = os.path.join(savedir, f'{METRIC}.csv')
    if os.path.exists(savepath):
        print("already processed this country")
        df_images = pd.read_csv(savepath)
        return df_images
    np.random.seed(RANDOM_SEED)
    df_images = DF_DOWNLOAD[DF_DOWNLOAD['country'] == COUNTRY].copy()
    unique_clusters = df_images[['cluster_lat', 'cluster_lon']].drop_duplicates()
    shuffled_clusters = unique_clusters.sample(frac=1)
    num_train = int(frac * len(shuffled_clusters))
    train_clusters = shuffled_clusters[:num_train]
    train_clusters['is_train'] = True
    df_images = merge_on_lat_lon(df_images, train_clusters, how='left')
    # if not marked as true, will be NA (aka a validation cluster)
    df_images['is_train'].fillna(False, inplace=True)
    create_bin(df_images, METRIC)
    
    os.makedirs(os.path.join(CNN_TRAIN_IMAGE_DIR, 'train'), exist_ok=False)
    os.makedirs(os.path.join(CNN_TRAIN_IMAGE_DIR, 'valid'), exist_ok=False)

    symlink_images(df_images)
    
    # save to disk
    df_images.to_csv(savepath, index=False)
    return df_images

def preprocess_country_held_out():
    '''
    uses DF_DOWNLOAD and holds given country out
    '''
    savepath = os.path.join(PROCESSED_DIR, TYPE, COUNTRY, f'{METRIC}.csv')
#     if os.path.exists(savepath):
#         print("already processed this country held out")
#         df_images = pd.read_csv(savepath)
#         return df_images
    df_images = DF_DOWNLOAD.copy()
    df_images['is_train'] = True
    # these belong to the country held out
    df_images['is_train'].loc[df_images['country'] == COUNTRY] = False
    create_bin(df_images, METRIC)
    
    os.makedirs(os.path.join(CNN_TRAIN_IMAGE_DIR, 'train'), exist_ok=False)
    os.makedirs(os.path.join(CNN_TRAIN_IMAGE_DIR, 'valid'), exist_ok=False)
    
    symlink_images(df_images)
    
    # save to disk
    df_images.to_csv(savepath, index=False)
    return df_images
    
def symlink_images(df_images):
    '''
    df_images: dataframe with 'image_name', 'country', 'is_train' columns
    
    This function will symlink (a type of link that takes very little space and points to another link)
    the images into "train" and "valid" folders in CNN_TRAIN_IMAGE_DIR
    Symlinking prevents us from having to copy the images, which saves disk space and time. From a user's
    perspective, opening the symlinked file opens the actual hard link file elsewhere. This means
    our CNN training can operate on a directory of symlinked images without any problem/knowledge of
    symlinks because this function is supported natively by the filesystem. 
    In this case, the original hard link is in the original download directory at COUNTRIES_DIR/<country>/cnn_images.
    THAT DIRECTORY CANNOT BE MOVED OR MODIFIED OR SCRIPTS WILL BREAK
    '''
    train = df_images[df_images['is_train']]
    valid = df_images[~df_images['is_train']]
    
    # uses symlinking to save disk space
    print('symlinking train images')
    for im_name, country in tqdm(zip(train['image_name'], train['country']), total=len(train)):
        src = os.path.abspath(os.path.join(COUNTRIES_DIR, country, 'cnn_images', im_name))
        dest = os.path.join(CNN_TRAIN_IMAGE_DIR, 'train', im_name)
        if os.system(f"ln -s {src} {dest}") != 0:
            print("error creating symlink")
            raise ValueError()

    print('symlinking valid images')
    for im_name, country in tqdm(zip(valid['image_name'], valid['country']), total=len(valid)):
        src = os.path.abspath(os.path.join(COUNTRIES_DIR, country, 'cnn_images', im_name))
        dest = os.path.join(CNN_TRAIN_IMAGE_DIR, 'valid', im_name)
        if os.system(f"ln -s {src} {dest}") != 0:
            print("error creating symlink")
            raise ValueError()
    return

In [9]:
df_images = None
if TYPE == 'single_country':
    df_images = preprocess_single_country(frac=0.7)
else:
    df_images = preprocess_country_held_out()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


symlinking train images


HBox(children=(FloatProgress(value=0.0, max=15600.0), HTML(value='')))


symlinking valid images


HBox(children=(FloatProgress(value=0.0, max=10577.0), HTML(value='')))




In [10]:
df_images.head()

Unnamed: 0,image_name,image_lat,image_lon,cluster_lat,cluster_lon,house_has_cellphone,est_monthly_phone_cost_pc,country,nightlights,is_train,bin,near_lower,near_upper
0,-17.140065764205975_35.17229723579403_-17.0951...,-17.140066,35.172297,-17.09515,35.217213,0.5,0.819316,malawi_2016,0.025206,True,2,False,False
1,-17.11012192140199_35.17229723579403_-17.09515...,-17.110122,35.172297,-17.09515,35.217213,0.5,0.819316,malawi_2016,0.025206,True,2,False,False
2,-17.08017807859801_35.17229723579403_-17.09515...,-17.080178,35.172297,-17.09515,35.217213,0.5,0.819316,malawi_2016,0.025206,True,2,False,False
3,-17.050234235794026_35.17229723579403_-17.0951...,-17.050234,35.172297,-17.09515,35.217213,0.5,0.819316,malawi_2016,0.025206,True,2,False,False
4,-17.140065764205975_35.20224107859801_-17.0951...,-17.140066,35.202241,-17.09515,35.217213,0.5,0.819316,malawi_2016,0.025206,True,2,False,False


# Train CNN

In [11]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
from PIL import Image
import time
import os
import copy

In [12]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
DEVICE

device(type='cuda', index=0)

In [13]:
def initialize_model():
    global CURRENT_EPOCH
    model = None
    input_size = 224 # hardcoded for VGG, our network
    existing = os.listdir(CNN_SAVE_DIR)
    found = False
    if len(existing) != 0:
        largest_epoch = 0
        prefix = 'trained_model_'
        for f in existing:
            if f[:len(prefix)] != prefix:
                continue
            found = True
            string = f.split('.')[0] # remove extension
            epoch = int(string[len(prefix):]) # parse out the epoch
            if epoch > largest_epoch:
                largest_epoch = epoch
        if found:
            CURRENT_EPOCH = largest_epoch + 1
            path = os.path.join(CNN_SAVE_DIR, prefix + str(largest_epoch) + '.pt')
            model = torch.load(path, map_location=DEVICE)
            print(f'using existing model at epoch {largest_epoch}')
    if not found:
        torch.manual_seed(RANDOM_SEED)
        model = models.vgg11_bn(pretrained=True)
        # turn off training for all existing paramaters (for now)
        for param in model.parameters():
            param.requires_grad = False
        num_ftrs = model.classifier[6].in_features
        model.classifier[6] = nn.Linear(num_ftrs, NUMBER_OF_BINS)
        model = model.to(DEVICE)
    return model, input_size

model, input_size = initialize_model()
optimizer = optim.Adam(model.parameters(), lr=3e-6)

In [14]:
# we will query this to figure out the correct label
DF_LOOKUP = df_images.set_index('image_name')

In [15]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'valid': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}
print("Initializing Datasets and Dataloaders...")

class ForwardPassDataset(torch.utils.data.Dataset):
    def __init__(self, image_dir, transformer):
        self.image_dir = image_dir
        self.image_list = os.listdir(self.image_dir)
        self.transformer = transformer

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, index):
        image_name = self.image_list[index]

        # Load image
        X = self.filename_to_im_tensor(self.image_dir + '/' + image_name)
        y = DF_LOOKUP.loc[image_name]['bin']
        
        return X, y, image_name
    
    def filename_to_im_tensor(self, file):
        im = (plt.imread(file)[:,:,:3] * 256).astype(np.uint8)
        im = Image.fromarray(im)
        im = self.transformer(im)
        return im

# Create training and validation datasets
image_datasets = {x: ForwardPassDataset(os.path.join(CNN_TRAIN_IMAGE_DIR, x), 
                                          data_transforms[x]) for x in ['train', 'valid']}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], 
                                                   batch_size=BATCH_SIZE, 
                                                   shuffle=True, 
                                                   num_workers=4) for x in ['train', 'valid']}

Initializing Datasets and Dataloaders...


In [15]:
class CustomCriterion:
    '''
    This custom criterion will allow images that are near the border of two bins
    to calculate their loss partially based on the bin they are close to
    '''
    def __init__(self, alpha=0.75):
        # alpha describes what percent should go to the correct class
        # if the image is near_lower or near_upper
        self.criterion = nn.CrossEntropyLoss()
        self.alpha = alpha
    
    def __call__(self, outputs, labels, image_names):
        ret = None
        for i in range(len(image_names)):
            lookup = DF_LOOKUP.loc[image_names[i]]
            output = outputs[i].reshape(1, -1)
            label = labels[i].reshape(1)
           
            if lookup['near_upper']:
                # the +1 on the second line shifts the criteria to the upper bin
                iret = self.alpha * self.criterion(output, label) + \
                        (1 - self.alpha) * self.criterion(output, label + 1)
            elif lookup['near_lower']:
                # the -1 on the second line shifts the criteria to the lower bin
                iret = self.alpha * self.criterion(output, label) + \
                        (1 - self.alpha) * self.criterion(output, label - 1)
            else:
                iret = self.criterion(output, label) # regular cross entropy
            if ret is None:
                ret = iret
            else:
                ret += iret
        return ret / len(image_names) # averaged

In [16]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs):
    global CURRENT_EPOCH, DEVICE
    since = time.time()
    val_acc_history = []
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(CURRENT_EPOCH, num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        if epoch == 5:
            # fine tune whole model now
            for param in model.parameters():
                param.requires_grad = True

        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels, image_names in tqdm(dataloaders[phase]):
                inputs = inputs.to(DEVICE)
                labels = labels.to(DEVICE)

                # zero the parameter gradients
                optimizer.zero_grad()
                # track gradients in train phase only
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = criterion(outputs, labels, image_names)
                    _, preds = torch.max(outputs, 1)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            # deep copy the model if it is better
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'valid':
                val_acc_history.append(epoch_acc)

        if epoch % 5 == 4:
            # save intermediate results in case script breaks
            savepath = os.path.join(CNN_SAVE_DIR, f'trained_model_{METRIC}_epoch_{epoch}.pt')
            torch.save(model, savepath)
        
        # end one epoch
        CURRENT_EPOCH += 1
        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    
    # load best model
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [17]:
criterion = CustomCriterion()
model, hist = train_model(model, dataloaders_dict, criterion, optimizer, TOTAL_EPOCHS)

Epoch 0/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.3596 Acc: 0.3399


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.4781 Acc: 0.1613

Epoch 1/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.3366 Acc: 0.3868


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.5074 Acc: 0.1591

Epoch 2/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.3201 Acc: 0.3981


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.5030 Acc: 0.1538

Epoch 3/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.3163 Acc: 0.4018


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.4958 Acc: 0.1668

Epoch 4/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.3090 Acc: 0.4020


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.4937 Acc: 0.1779

Epoch 5/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.2795 Acc: 0.4204


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.4338 Acc: 0.2641

Epoch 6/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.2490 Acc: 0.4331


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.4575 Acc: 0.2653

Epoch 7/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.2316 Acc: 0.4426


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.4471 Acc: 0.2807

Epoch 8/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.2178 Acc: 0.4511


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.4557 Acc: 0.2865

Epoch 9/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.2048 Acc: 0.4582


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.4920 Acc: 0.2772

Epoch 10/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.1951 Acc: 0.4664


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.4663 Acc: 0.2825

Epoch 11/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.1861 Acc: 0.4683


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.5015 Acc: 0.2805

Epoch 12/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.1789 Acc: 0.4779


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.4889 Acc: 0.2890

Epoch 13/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.1696 Acc: 0.4787


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.5152 Acc: 0.2849

Epoch 14/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.1644 Acc: 0.4790


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.5367 Acc: 0.2680

Epoch 15/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.1627 Acc: 0.4803


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.5587 Acc: 0.2428

Epoch 16/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.1539 Acc: 0.4923


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.5827 Acc: 0.2575

Epoch 17/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.1411 Acc: 0.4995


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.5009 Acc: 0.2679

Epoch 18/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.1344 Acc: 0.5041


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.5763 Acc: 0.2592

Epoch 19/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.1242 Acc: 0.5081


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.6380 Acc: 0.2594

Epoch 20/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.1221 Acc: 0.5117


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.5875 Acc: 0.2548

Epoch 21/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.1162 Acc: 0.5078


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.5595 Acc: 0.2594

Epoch 22/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.1033 Acc: 0.5174


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.5607 Acc: 0.2625

Epoch 23/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.0938 Acc: 0.5208


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.7594 Acc: 0.2281

Epoch 24/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.0903 Acc: 0.5335


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.6065 Acc: 0.2544

Epoch 25/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.0880 Acc: 0.5286


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.5849 Acc: 0.2674

Epoch 26/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.0771 Acc: 0.5293


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.6453 Acc: 0.2535

Epoch 27/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.0690 Acc: 0.5364


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.7756 Acc: 0.2235

Epoch 28/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.0631 Acc: 0.5430


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.6906 Acc: 0.2461

Epoch 29/29
----------


HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


train Loss: 1.0575 Acc: 0.5441


HBox(children=(FloatProgress(value=0.0, max=1950.0), HTML(value='')))


valid Loss: 1.6950 Acc: 0.2607

Training complete in 219m 58s
Best val Acc: 0.288974


In [18]:
savepath = os.path.join(CNN_SAVE_DIR, f'trained_model_{METRIC}.pt')
if os.path.isfile(savepath):
    print('A model is already saved at this location')
else:
    print(f'Saving model to {savepath}')
    torch.save(model, savepath)

Saving model to ../models/country_held_out/malawi_2016/house_has_cellphone/trained_model_house_has_cellphone.pt


In [17]:
# use this to see the validation accuracy
model.eval()
running_corrects = 0
for inputs, labels, _ in tqdm(dataloaders_dict['valid']):
    inputs = inputs.to(DEVICE)
    labels = labels.to(DEVICE)
    with torch.set_grad_enabled(False):
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

    # statistics
    running_corrects += torch.sum(preds == labels.data)

epoch_acc = running_corrects.double() / len(dataloaders_dict['valid'].dataset)

print('Acc: {:.4f}'.format(epoch_acc))

HBox(children=(FloatProgress(value=0.0, max=1323.0), HTML(value='')))


Acc: 0.2889
