In [1]:
import numpy as np
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
from PIL import Image
from google.cloud import storage

In [2]:
%run Accuracy_Module.py
%run DataLoading.py
%run load_and_organize_dataset.py

In [None]:
!apt-get install p7zip-full
!p7zip -d UTKFace.tar.gz
!tar -xvf UTKFace.tar.gz

In [None]:
in_path = 'UTKFace/'
out_path = 'Data/'

count = organize_files(in_path, out_path, 1, 95)

In [3]:
train_data, valid_data, test_data = load_dataset(64)

In [None]:
for data in train_data:
    data, label =  data
    break

In [4]:
vgg16 = models.vgg16(pretrained = False)
vgg16 = nn.Sequential(vgg16.features, vgg16.avgpool)

pre_trained = torch.load("vgg_face_dag.pth")
new = list(pre_trained.items())
vgg16_state = vgg16.state_dict()
count = 0
for key, value in vgg16_state.items():
    layer_name, weights = new[count]      
    vgg16_state[key] = weights
    count += 1

vgg16.load_state_dict(vgg16_state)

In [5]:
vgg16.cuda()
vgg16.eval()

Sequential(
  (0): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d

In [6]:
class VGG_regression(nn.Module):
    def __init__(self):
        super(VGG_regression, self).__init__()
        self.name = "vgg_regression"
        self.fc1 = nn.Linear(25088, 4096)
        self.fc2 = nn.Linear(4096, 4096)
        self.fc3 = nn.Linear(4096, 1024)
        self.fc4 = nn.Linear(1024, 1)
        self.dropout = nn.Dropout(0.2)
    
    def forward(self, x):
        x = x.cuda()
        x = vgg16(x)
        x = x.view(x.shape[0],-1)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = self.dropout(x)
        x = self.fc4(x)
        x = x.squeeze(1)
        
        return x

In [7]:
def evaluate(data_eval, net, criterion, batch_size =32):
    total_epoch = 0
    total_loss = 0
    for inputs, labels in data_eval:
        outputs = net(inputs.cuda())
        loss = criterion(outputs.cuda(), labels.float().cuda())
        total_loss += loss.item()
        total_epoch += len(labels)
    return float(total_loss)/(total_epoch)
  

In [8]:
def train_net(train_data, valid_data, net, pretrained = False, checkpoint = None, batch_size=32, learning_rate=5e-05, num_epochs=30):
    
    
    ########################################################################
    # Fixed PyTorch random seed for reproducible result
    from torch.autograd import Variable
    torch.manual_seed(1000)
    ########################################################################
    # Obtain the PyTorch data loader objects to load batches of the datasets
    #train_loader, val_loader, test_loader = load_dataset(batch_size)
    ########################################################################
    # Define the Loss function and optimizer
    # The loss function will be 
    # Optimizer will be SGD with Momentum.
    
    criterion = nn.MSELoss().cuda()
    optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=1e-5)
    
    pretrained_epoch = 0
    train_losses, val_losses, train_acc, val_acc = [], [], [], []
    if pretrained == True:
        net.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        pretrained_epoch = checkpoint['epoch']
        train_losses = checkpoint['train_loss']
        val_losses = checkpoint['valid_loss']
        net.train()
        print("resuming training after epoch: ", pretrained_epoch)
    
    # optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)
    ########################################################################
    # Train the network
    # Loop over the data iterator and sample a new batch of training data
    # Get the output from the network, and optimize our loss function.
    start_time = time.time()
    for epoch in range(num_epochs):  # loop over the dataset multiple times
        total_epoch = 0
        total_train_loss = 0
        #random.shuffle(train_data)
        for inputs, labels in train_data:
            # Zero the parameter gradients
            optimizer.zero_grad()
            # Forward pass, backward pass, and optimize
            outputs = net(Variable(inputs.cuda()))
            loss = criterion(outputs.cuda(), labels.float().cuda())
            loss.backward()
            optimizer.step()
            total_train_loss += loss.item()
            total_epoch += len(labels) #adding batch size
            
        # save the current training information
        train_losses.append(float(total_train_loss)/total_epoch)            # compute *average* loss
        val_losses.append(evaluate(valid_data, net, criterion, batch_size = batch_size))
        #train_acc.append(get_accuracy(net, train_data, batch_size = batch_size)) # compute training accuracy 
        #val_acc.append(get_accuracy(net, valid_data, batch_size = batch_size))  # compute validation accuracy
            
        #print("Epoch: {}, Training Loss: {:.3f}, Validation Loss: {:.3f}, Training Accuracy: {:.3f}, Validation Accuracy: {:.3f}".format(epoch+pretrained_epoch+1, train_losses[-1], val_losses[-1], train_acc[-1],val_acc[-1]))
        print("Epoch: {}, Training Loss: {:.3f}, Validation Loss: {:.3f}".format(epoch+pretrained_epoch+1, train_losses[-1], val_losses[-1]))
        # Save the current model (checkpoint) to a file
        #torch.save({ 'epoch': epoch, 'model_state_dict': net.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'train_loss': train_losses, 'train_accuracy': train_acc, 'valid_loss': val_losses, 'valid_accuracy': val_acc}, 
        #           "{}_features_bs{}_lr{}".format(pretrained, batch_size, learning_rate))
        
        torch.save({ 'epoch': epoch+pretrained_epoch+1, 'model_state_dict': net.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'train_loss': train_losses, 'valid_loss': val_losses}, 
                   "VGGfeatures_bs{}_lr{}".format(pretrained, batch_size, learning_rate))
        
        #saving it in the google cloud storage
        client = storage.Client()
        bucket = client.get_bucket("aps360team12")
        blob_name = "VGGfeatures_bs{}_lr{}_epoch{}".format(pretrained, batch_size, learning_rate, epoch+pretrained_epoch)
        blob = bucket.blob(blob_name)

        source_file_name = "VGGfeatures_bs{}_lr{}".format(pretrained, batch_size, learning_rate)
        blob.upload_from_filename(source_file_name)
        print("File uploaded to {}.".format(bucket.name))
        
    
    
        
    # plotting
    plt.title("Loss curves w/ lr={}, batch size = {}".format(learning_rate, batch_size))
    plt.axis([0, num_epochs, 0, max(train_losses[0], val_losses[0])])
    plt.plot(train_losses, label="Train loss")
    plt.plot(val_losses, label="Validation loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.show()

    plt.title("Acc curves w/ lr={}, batch size = {}".format(learning_rate, batch_size))
    plt.axis([0, num_epochs, 0, min(train_acc[0], val_acc[0])])
    plt.plot(train_acc, label="Train")
    plt.plot(val_acc, label="Validation")
    plt.xlabel("Epoch")
    plt.ylabel("Training Accuracy")
    plt.legend(loc='best')
    plt.show()

    print("Final Training Accuracy: {}".format(train_acc[-1]))
    print("Final Validation Accuracy: {}".format(val_acc[-1]))

    print('Finished Training')
    end_time = time.time()
    elapsed_time = end_time - start_time
    print("Total time elapsed: {:.2f} seconds".format(elapsed_time))

In [None]:
def get_accuracy(model, data, batch_size = 32):
    c=0
    mean = 0.0
    for imgs, labels in data:
        mean += labels.sum()
        c+=batch_size
    mean = (mean/c)
    
    #print(mean)
    
    correct = 0
    total = 0
    count = 0 
    ss_reg = 0
    ss_total = 0
    
    for imgs, labels in data:
        labels = labels.float()
        output = model(imgs) # We don't need to run F.softmax
        # print(output)
        # pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        # correct += pred.eq(labels.view_as(pred)).sum().item()
        output = torch.round(output)
        output = output.float()
        output = output.cpu().detach().numpy()
        output = torch.tensor(output)
        #correct += np.isclose(output.detach().numpy(), labels, 0.05).sum()
        total += imgs.shape[0]
        count+=1
        ss_reg += ((labels-output)**2).sum()
        ss_total += ((labels-mean)**2).sum()
    return 1-ss_reg/ss_total

In [9]:
vgg_regression = VGG_regression()
vgg_regression.cuda()
vgg_regression2 = VGG_regression()
vgg_regression2.cuda()

VGG_regression(
  (fc1): Linear(in_features=25088, out_features=4096, bias=True)
  (fc2): Linear(in_features=4096, out_features=4096, bias=True)
  (fc3): Linear(in_features=4096, out_features=1024, bias=True)
  (fc4): Linear(in_features=1024, out_features=1, bias=True)
  (dropout): Dropout(p=0.2)
)

In [None]:
train_net(train_data, valid_data, vgg_regression, batch_size = 128, num_epochs = 20)

In [10]:
client = storage.Client()
bucket_name = "aps360team12"
bucket = client.get_bucket(bucket_name)
blob_name = "VGG_features_bs64_lr5e-05_epoch6"
blob = bucket.get_blob(blob_name)

In [11]:
blob.download_to_filename('pretrained_epoch6')

In [12]:
trained_data = torch.load('pretrained_epoch6')
trained_data['epoch'] = trained_data['epoch'] + 1 #you dont need to do this anymore

In [None]:
tr_loss = trained_data['train_loss']
val_loss = trained_data['valid_loss']
vgg_regression.load_state_dict(trained_data['model_state_dict'])
vgg_regression.eval()

In [None]:
test_loss = evaluate(test_data, vgg_regression, nn.MSELoss().cuda(), batch_size = 64)

In [None]:
print(test_loss)

In [None]:
k = 0
for img, label in test_data:
    #img = img / 2 + 0.5
    #plt.subplot(3, 5, k+1)
    #plt.axis('off')
    pred = vgg_regression(img)
    for count, i in enumerate(img, 0):
        print(count)
        i = np.transpose(i, [1,2,0])
        plt.imshow(i)
        plt.show()
        print("Actual Age: ", label[count], " Prediction: ", pred[count])
        k += 1
    #print(label, vgg_regression(img))
        if k > 10:
            break
    
    break

In [None]:
trained_data

In [13]:
train_net(train_data, valid_data, vgg_regression2, pretrained = True, checkpoint = trained_data, batch_size = 64, num_epochs = 20)

resuming training after epoch:  6
Epoch: 7, Training Loss: 0.650, Validation Loss: 1.003
File uploaded to aps360team12.
Epoch: 8, Training Loss: 0.597, Validation Loss: 1.021
File uploaded to aps360team12.
Epoch: 9, Training Loss: 0.518, Validation Loss: 1.033
File uploaded to aps360team12.
Epoch: 10, Training Loss: 0.439, Validation Loss: 1.012
File uploaded to aps360team12.
Epoch: 11, Training Loss: 0.382, Validation Loss: 1.037
File uploaded to aps360team12.
Epoch: 12, Training Loss: 0.325, Validation Loss: 1.041
File uploaded to aps360team12.
Epoch: 13, Training Loss: 0.262, Validation Loss: 1.161
File uploaded to aps360team12.
Epoch: 14, Training Loss: 0.225, Validation Loss: 1.086
File uploaded to aps360team12.
Epoch: 15, Training Loss: 0.190, Validation Loss: 1.094
File uploaded to aps360team12.
Epoch: 16, Training Loss: 0.175, Validation Loss: 1.056
File uploaded to aps360team12.


KeyboardInterrupt: 

In [None]:
client = storage.Client()
bucket = client.get_bucket("aps360team12")
blob_name = "VGG_features_bs32_lr5e-05"
blob = bucket.blob(blob_name)

source_file_name = "{}_features_bs{}_lr{}".format("VGG", 32, 5e-05)
blob.upload_from_filename(source_file_name)

print("File uploaded to {}.".format(bucket.name))

In [None]:
evaluate(test_data, vgg_regression, nn.MSELoss().cuda(), batch_size = 64)