In [None]:
"""
Copyright 2021 Aonghus McGovern

This file is part of Music_Image_Matcher_Notebooks_And_Writeup.

Music_Image_Matcher_Notebooks_And_Writeup is free software: you can redistribute it and/or modify it under the terms of the 
GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) 
any later version.

Music_Image_Matcher_Notebooks_And_Writeup is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; 
without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License 
for more details.

You should have received a copy of the GNU General Public License along with Music_Image_Matcher_Notebooks_And_Writeup.  
If not, see <https://www.gnu.org/licenses/>.
"""

In [2]:
import fileinput
import math
import matplotlib.pyplot as plt
import numpy as np
import operator
import os
import pandas as pd
import pickle
from PIL import Image
from scipy import stats
import torchvision.transforms as transforms
from sklearn.decomposition import PCA
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.metrics.pairwise import pairwise_distances
import time
import torch
import torchvision.models as models
import torch.nn as nn

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Load the data from Oramas et al.
oramas_data = pd.read_csv('oramas_data_with_names.csv')

In [24]:
# Create two lists to hold the ratio of explained variance that PCA can explain for the content and style matrices
# Once we have performed PCA on the content and style matrices for all of the images we will analyse these lists
# to ensure that each image has at least 95% of variance explained
content_ratios = list()
style_ratios = list()

# The below code is reworked from the 
# style transfer tutorial available here: https://pytorch.org/tutorials/advanced/neural_style_tutorial.html
class Normalization(nn.Module):
    def __init__(self, mean, std):
        super(Normalization, self).__init__()
        # .view the mean and std to make them [C x 1 x 1] so that they can
        # directly work with image Tensor of shape [B x C x H x W].
        # B is batch size. C is number of channels. H is height and W is width.
        self.mean = torch.tensor(mean).view(-1, 1, 1)
        self.std = torch.tensor(std).view(-1, 1, 1)

    def forward(self, img):
        # normalize img
        return (img - self.mean) / self.std

def get_content_matrix(target, name): 
    # This function gets the content matrix, resizes it using PCA and saves it
    target = target.detach()
    """
    The detach returns a 4-d matrix:
    1 x 128 x 112 x 112.
    PCA expects a 2-d array so we need to reshape it to
    128 x 12544
    12544 is 112*112
    """
    reshaped = torch.reshape(target, (128, 12544))
    # Create a PCA object and fit it to the reshaped array           
    pca = PCA(n_components=50)
    pca.fit(reshaped)
    # Append the explained variance to the content_ratios list
    content_ratios.append(sum(pca.explained_variance_ratio_))
    # Create the reduced form of the reshaped matrix and save it
    reduced = pca.fit_transform(reshaped)
    pickle.dump(reduced, open('./content_matrices_50_pca/%s' % name, 'wb'))
    return reduced
    
def get_style_matrix(target_feature, name):
    # This function gets the style matrix, resizes it using PCA and saves it
    target = gram_matrix(target_feature).detach()
    # Create a PCA object and fit it to the style array
    pca = PCA(n_components=15)
    pca.fit(target)
    # Append the explained variance to the style_ratios list
    style_ratios.append(sum(pca.explained_variance_ratio_))
    # Create the reduced form of the style matrix and save it            
    reduced = pca.fit_transform(target)
    pickle.dump(reduced, open('./style_matrices_15_pca/%s' % name, 'wb'))
    return reduced

def get_image(image_path, reshape_size = (224,224)):
    # This function loads an image so that it can be converted to content and style matrices
    transform = transforms.Compose([transforms.Resize(reshape_size),
    transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
    
    image = Image.open(open(image_path, 'rb'))
    # Most of the images open in RGB mode. If an image doesn't open in RGB mode applying the functions to get 
    # content and style matrices give errors so we return None for these images
    if image.mode == 'RGB':
        try:
            return transform(image)
        except:
            return None
    else:
        return None
    
def gram_matrix(input):
    a, b, c, d = input.size()  # a=batch size(=1)
    # b=number of feature maps
    # (c,d)=dimensions of a f. map (N=c*d)

    features = input.view(a * b, c * d)  # resise F_XL into \hat F_XL

    G = torch.mm(features, features.t())  # compute the gram product

    # we 'normalize' the values of the gram matrix
    # by dividing by the number of element in each feature maps.
    return G.div(a * b * c * d)

def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated

def get_style_model_and_losses(style_img, content_img, image_name, cnn = models.vgg19(pretrained=True).features.to(device).eval(), 
                               normalization_mean = torch.tensor([0.485, 0.456, 0.406]).to(device),
normalization_std = torch.tensor([0.229, 0.224, 0.225]).to(device),  content_layers=['conv_4'],  
                               style_layers=['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5']):
    # This function is a modified version of the function in Alexis' tutorial. It creates a network,
    # and uses the network to create and save content and style matrices through calls to the get_content_matrices
    # and get_style_matrices
    # normalization module
    normalization = Normalization(normalization_mean, normalization_std).to(device)

    # just in order to have an iterable access to or list of content/syle
    # losses
    content_matrices = list()
    style_matrices = list()

    # assuming that cnn is a nn.Sequential, so we make a new nn.Sequential
    # to put in modules that are supposed to be activated sequentially
    model = nn.Sequential(normalization)

    i = 0  # increment every time we see a conv
    for layer in cnn.children():
        if isinstance(layer, nn.Conv2d):
            i += 1
            name = 'conv_{}'.format(i)
        elif isinstance(layer, nn.ReLU):
            name = 'relu_{}'.format(i)
            # The in-place version doesn't play very nicely with the ContentLoss
            # and StyleLoss we insert below. So we replace with out-of-place
            # ones here.
            layer = nn.ReLU(inplace=False)
        elif isinstance(layer, nn.MaxPool2d):
            name = 'pool_{}'.format(i)
        elif isinstance(layer, nn.BatchNorm2d):
            name = 'bn_{}'.format(i)
        else:
            raise RuntimeError('Unrecognized layer: {}'.format(layer.__class__.__name__))
            
        model.add_module(name, layer)
        
        if name in content_layers:
            # add content loss:
            target = model(content_img).detach()
            content_matrices.append(get_content_matrix(target, image_name + '_' + name))
            

        if name in style_layers:
            # add style loss:
            target_feature = model(style_img).detach()
            style_matrices.append(get_style_matrix(target_feature, image_name + '_' + name))
            
    return style_matrices, content_matrices

In [None]:
# Set the folder that contains the album images
album_images = 'Images/'
count = 0
 
for album_index in oramas_data['album_index'].unique():
    # Get one index from the Oramas data corresponding to this album index. This will allow us to load the relevant image
    img = oramas_data[oramas_data['album_index'] == album_index].index[0]
    
    image = get_image('%s/%s' % (album_images, img+'.jpg'))
    if image == None:
        print('Error getting image, skipping')
        continue
    image = torch.unsqueeze(image, 0)
    # Call the function to create content and style matrices for the image. Pass the name to be used to save the
    # matrices. This name is the album index.
    get_style_model_and_losses(image, image, album_index)
    count += 1
    if count % 100 == 0:
        print(count)
    

In [None]:
pickle.dump( content_ratios, open("content_ratios_50_element_pca", "wb" ) )
pickle.dump( style_ratios, open("style_ratios_15_element_pca", "wb" ) )

In [None]:
# Print the distribution information for the content ratios. We want to determine that each of our images
# has at least 95% of variance explained
stats.describe(content_ratios)

In [None]:
# Do the same for the style ratios
stats.describe(style_ratios)

In [None]:
# Plot the histogram of the content ratios
plt.hist(content_ratios, bins=50)
plt.gca().set(title='Frequency Histogram', ylabel='Frequency')

In [None]:
# Do the same for the style ratios
plt.hist(style_ratios, bins=50)
plt.gca().set(title='Frequency Histogram', ylabel='Frequency')