# Land-Use Image Classification
---
Created by Carlos Moreno, Elizabeth Khan, Frances Leung, Jeffrey Laughman

### Download dataset split into Train, Test, and Validation datasets

In [1]:
# !pip install split-folders
#! pip install wget

In [2]:
import splitfolders
import wget
import zipfile
import requests
import os

In [3]:
path = os.getcwd()

# Make folder if does not exist
if os.path.exists('sinkhole')==False:
    os.makedirs('sinkhole')
    os.chdir('sinkhole')
else:
    os.chdir('sinkhole')

Download file and save to sinkhole folder location from https://madm.dfki.de/files/sentinel/EuroSAT.zip


#### Helper function to unzip and split the data

In [4]:
def split_data(new_folder='sinkhole'):
    # create deep learning folder if does not exist
#     if os.path.exists(new_folder)==False:
#         os.makedirs(new_folder)
#         os.chdir(new_folder)
#     else:
#         os.chdir(new_folder)
    # download data
    #requests.get('https://madm.dfki.de/files/sentinel/EuroSAT.zip')
    # unzip file
    with zipfile.ZipFile('EuroSAT.zip') as z:
        z.extractall()  
    # This will split folders into train, validation and test inside the output folder
    splitfolders.ratio('2750','data')
    
    print('EuroSAT RGB files were successfully split, see data folder for train, test, and validation data')
    
    
    
    

In [5]:
# split_data()

In [6]:
# Import Libraries
import os
import random
import shutil
import time
import warnings
import gc 

import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
import torch.optim

import torch.utils.data
import torchvision
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torchvision.datasets import ImageFolder
import torchvision.models as models

  warn(f"Failed to load image Python extension: {e}")


In [7]:
# Clear out cuda cache
torch.cuda.empty_cache()
# garbage collection
gc.collect()

0

In [8]:
# This cell is tagged `parameters` for papermill
LR = 0.1
MOMENTUM = 0.9
WEIGHT_DECAY = 5e-4
EPOCHS = 120

### Set the architecture and other variables

In [9]:
##########################
# Using the pre-trained resnet50 architecture with pretrained weights for transfer learning
ARCH = torchvision.models.resnet50(pretrained=True)

# Freeze all layers except for the final layer
for param in ARCH.parameters():
    param.requires_grad = False
    
classes = ['AnnualCrop', 'Forest', 'HerbaceousVegetation', 'Highway', 'Industrial', 'Pasture','PermanentCrop','Residential','River', 'SeaLake']

# Getting the final layer to match the number of classes
num_ftrs = ARCH.fc.in_features
ARCH.fc = nn.Linear(num_ftrs, len(classes))

##########################

##########################
SEED=1
random.seed(SEED)
torch.manual_seed(SEED)
cudnn.deterministic = True
#########################
# Define epochs and printing frequency
START_EPOCH = 0
#EPOCHS = 10
PRINT_FREQ = 2500

#Dynamically set batch size and workers
avail_gpus = min(1, torch.cuda.device_count())
batch_size = 100 if avail_gpus else 64
#batch_size = 50
TRAIN_BATCH= batch_size
VAL_BATCH = batch_size
WORKERS= int(os.cpu_count()-2/2)

# Files locations of training and validation data
root_dir = "data"
TRAINDIR= os.path.join(root_dir,'train')
VALDIR= os.path.join(root_dir,'val')

print(TRAINDIR,'\n', VALDIR)

# check if cuda is available in this cell
# if it is not available, you should not go forward!
device = "cuda:0" if torch.cuda.is_available() else 'cpu'

# enable algorithm optimization
cudnn.benchmark = True

data\train 
 data\val


### Train model

In [10]:
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter('Time', ':6.3f')
    data_time = AverageMeter('Data', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(train_loader),
        [batch_time, data_time, losses, top1, top5],
        prefix="Epoch: [{}]".format(epoch))

    ######################
    # switch model to train mode here
    model.train()
    ################

    end = time.time()
    for i, (images, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        #####################
        # send the images to cuda device
        # send the target to cuda device
        images = images.to(device)
        target = target.to(device)

        # compute output
        output = model(images)

        # compute loss 
        loss = criterion(output, target)

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))

        # compute gradient and do SGD step
        
        #### zero out gradients in the optimier
        optimizer.zero_grad() # Zero gradients each step 
        ## backprop!
        loss.backward()
        # update the weights!
        optimizer.step()
        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % PRINT_FREQ == 0:
            progress.display(i)

### Validate Function

In [11]:
def validate(val_loader, model, criterion):
    batch_time = AverageMeter('Time', ':6.3f')
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [batch_time, losses, top1, top5],
        prefix='Test: ')

    # switch to evaluate mode
    # model ???
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (images, target) in enumerate(val_loader):
            
            
            ### send the images and target to cuda
            images = images.to(device)
            target = target.to(device)

            # compute output
            # output = model ??? images?
            output = model(images)
            # compute loss
            # loss  = criterion ?? output ?? target
            loss = criterion(output, target)

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % PRINT_FREQ == 0:
                progress.display(i)

        # TODO: this should also be done with the ProgressMeter
        print(' * Acc@1 {top1.avg:.3f} Acc@5 {top5.avg:.3f}'
              .format(top1=top1, top5=top5))

    return top1.avg

### Save the checkpoint

In [12]:
def save_checkpoint(state, is_best, filename='checkpoint.pth.tar'):
    # save the model state!
    #checkpoint = 'checkpoint'
    filepath = filename
    torch.save(state, filepath)
    if is_best:
        shutil.copyfile(filepath, 'model_best.pth.tar')

In [13]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)

In [14]:
class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'

In [15]:
# if we are adjusting the LR manually use this
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    lr = LR * (0.1 ** (epoch // 30))
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [16]:
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)

        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))

        res = []
        for k in topk:
            correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res

In [17]:
# Use imagenet related mean and standard deviation for normalization
imagenet_mean_RGB = [0.47889522, 0.47227842, 0.43047404]
imagenet_std_RGB = [0.229, 0.224, 0.225]
# cinic_mean_RGB = [0.47889522, 0.47227842, 0.43047404]
# cinic_std_RGB = [0.24205776, 0.23828046, 0.25874835]
# cifar_mean_RGB = [0.4914, 0.4822, 0.4465]
# cifar_std_RGB = [0.2023, 0.1994, 0.2010]

In [18]:
normalize = transforms.Normalize(mean=imagenet_mean_RGB, std=imagenet_std_RGB)

In [19]:
IMG_SIZE = 224
IMG_SIZE2 = int(round(1.15 * IMG_SIZE,0))
#IMG_SIZE = 224 #ALEXNET

### Initialize the model using the architecture you selected above

In [20]:
# model = ... 

model = ARCH

### Send the model to the cuda device

In [21]:
# send the model to the cuda device.. 
model.cuda()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

### Instantiate the loss to cross entropy

In [22]:
# use the cross-entropy loss
criterion = nn.CrossEntropyLoss()

### Instantiate the optimizer to SGD

In [23]:
# use SGD .. use the momentum and weight decay vars
optimizer = torch.optim.SGD(model.parameters(), lr=LR, momentum=MOMENTUM)

#### Create the learning rate scheduler

In [24]:
# Learning rate scheduler
metric = 0
#scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer=optimizer, T_max=4)
#scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optimizer, factor=0.1, patience=3, verbose = True)
scheduler1 = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
scheduler2 = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30,80], gamma=0.1)

In [25]:
transform_train = transforms.Compose([
    transforms.RandomResizedCrop(IMG_SIZE),
#     transforms.Grayscale(3),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(imagenet_mean_RGB, imagenet_std_RGB),
])

### Create the train dataset object

In [26]:
# Training dataset from local file
train_dataset = ImageFolder(root=TRAINDIR, transform=transform_train)

In [27]:
transform_val = transforms.Compose([
    transforms.Resize(IMG_SIZE2),
    transforms.CenterCrop(IMG_SIZE),
#     transforms.Grayscale(3),
    transforms.ToTensor(),
    transforms.Normalize(mean=imagenet_mean_RGB, std=imagenet_std_RGB),
])

### Create the val dataset object

In [28]:
#Validation dataset from local file
val_dataset = ImageFolder(root=VALDIR,transform=transform_val)

### Create the train dataloader

In [29]:
# fill this in
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size,
                                         shuffle=True, num_workers=WORKERS)

### Create the validation dataloader

In [30]:
# fill this in..
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size,
                                         shuffle=True, num_workers=WORKERS) 

In [31]:
best_acc1 = 0

In [32]:
for epoch in range(START_EPOCH, EPOCHS):
#    adjust_learning_rate(optimizer, epoch)

    # train for one epoch
    train(train_loader, model, criterion, optimizer, epoch)

    # evaluate on validation set
    acc1 = validate(val_loader, model, criterion)

    # remember best acc@1 and save checkpoint
    is_best = acc1 > best_acc1
    best_acc1 = max(acc1, best_acc1)


    save_checkpoint({
        'epoch': epoch + 1,
        'arch': ARCH,
        'state_dict': model.state_dict(),
        'best_acc1': best_acc1,
        'optimizer' : optimizer.state_dict(),
    }, is_best)
    scheduler1.step()
    scheduler2.step()
    print('lr: ' + str(scheduler2.get_last_lr()))
    # break if top 1 accuracy greater than 60 or else will complete total number of epochs
    if acc1>= 92:
        break
        
# print out best performing model top 1 accuracy        
print('\n\n ****The best performing model has a Top 1 Accuracy of {}***'.format(best_acc1))

Epoch: [0][  0/216]	Time 26.333 (26.333)	Data 20.421 (20.421)	Loss 2.2691e+00 (2.2691e+00)	Acc@1  19.00 ( 19.00)	Acc@5  60.00 ( 60.00)
Test: [ 0/27]	Time 24.747 (24.747)	Loss 1.2270e+00 (1.2270e+00)	Acc@1  81.00 ( 81.00)	Acc@5  99.00 ( 99.00)
 * Acc@1 85.370 Acc@5 99.815
lr: [0.09000000000000001]
Epoch: [1][  0/216]	Time 21.289 (21.289)	Data 20.546 (20.546)	Loss 1.3607e+00 (1.3607e+00)	Acc@1  81.00 ( 81.00)	Acc@5 100.00 (100.00)
Test: [ 0/27]	Time 32.205 (32.205)	Loss 7.4886e-01 (7.4886e-01)	Acc@1  91.00 ( 91.00)	Acc@5 100.00 (100.00)
 * Acc@1 87.185 Acc@5 99.815
lr: [0.08100000000000002]
Epoch: [2][  0/216]	Time 23.374 (23.374)	Data 22.525 (22.525)	Loss 1.3086e+00 (1.3086e+00)	Acc@1  78.00 ( 78.00)	Acc@5  99.00 ( 99.00)
Test: [ 0/27]	Time 27.919 (27.919)	Loss 5.0864e-01 (5.0864e-01)	Acc@1  91.00 ( 91.00)	Acc@5  99.00 ( 99.00)
 * Acc@1 91.000 Acc@5 99.926
lr: [0.07290000000000002]
Epoch: [3][  0/216]	Time 24.050 (24.050)	Data 23.158 (23.158)	Loss 1.0695e+00 (1.0695e+00)	Acc@1  80.00 ( 