In [1]:
import os
import numpy as np
import pickle
from image_data_module import TrainTestData
from feature_extraction_module import FeatureExtractor
from salience_prediction_module import SaliencePrediction
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf

os.environ["CUDA_VISIBLE_DEVICES"]="1,2,3"

In [2]:
data_module = TrainTestData()
extraction_module = FeatureExtractor()
prediction_module = SaliencePrediction()

def get_relevant_elements(all_values, relevant_values):
    elements = []
    for value in relevant_values:
        elements.append(np.where(all_values == value)[0][0])
    return np.asarray(elements)

random_pred_mape = np.asarray([23.56, 29.23, 22.95, 26.77, 23.93])
random_pred_accuracy = np.asarray([0.48, 0.528, 0.512, 0.548, 0.469])

In [3]:
"""
style_file = open(r"learning_output/vgg_style.pickle", "rb")
style_dict_results = pickle.load(style_file)
style_gram_matrices = style_dict_results["gram_matrices"]
style_image_ids = style_dict_results["image_ids"]

content_file = open(r"learning_output/vgg_content.pickle", "rb")
content_dict_results = pickle.load(content_file)
content_gram_matrices = content_dict_results["gram_matrices"]
content_image_ids = content_dict_results["image_ids"]

data_dict = {}
for cv_id in range(0, 5):
    print("Preparing data for cv split", str(cv_id))
    (X_train_ids, X_test_ids) = data_module.get_train_test_image_ids(str(cv_id))
    
    ### STYLE MATRICES ###
    style_X_train = style_gram_matrices[get_relevant_elements(style_image_ids, X_train_ids)]
    style_X_test = style_gram_matrices[get_relevant_elements(style_image_ids, X_test_ids)]
    
    style_X_train, style_X_test = extraction_module.PCA(style_X_train, style_X_test, components=128, save_fig=False, save_model=False)
    scaler = MinMaxScaler(feature_range=(0,1))
    style_X_train = scaler.fit_transform(style_X_train)
    style_X_test = scaler.transform(style_X_test)
    
    ### CONTENT MATRICES ###
    content_X_train = content_gram_matrices[get_relevant_elements(content_image_ids, X_train_ids)]
    content_X_test = content_gram_matrices[get_relevant_elements(content_image_ids, X_test_ids)]
    
    content_X_train, content_X_test = extraction_module.PCA(content_X_train, content_X_test, components=128, save_fig=False, save_model=False)
    scaler = MinMaxScaler(feature_range=(0,1))
    content_X_train = scaler.fit_transform(content_X_train)
    content_X_test = scaler.transform(content_X_test)
    
    ### COMPLEXITY ###
    (X_train, Y_train), (X_test, Y_test) = data_module.get_train_test_salience(cv_name=str(cv_id), gray=True)
    complexity_X_train = extraction_module.complexity(X_train, mode='grid')
    complexity_X_test = extraction_module.complexity(X_test, mode='grid')
    scaler = MinMaxScaler(feature_range=(0,1))
    complexity_X_train = scaler.fit_transform(complexity_X_train)
    complexity_X_test = scaler.transform(complexity_X_test)
    
    ### MERGE DATA ###
    X_train = np.hstack((style_X_train, content_X_train, complexity_X_train))
    X_test = np.hstack((style_X_test, content_X_test, complexity_X_test))
    print(X_train.shape, X_test.shape)
    
    ### WRITE DATA ###
    data_dict[str(cv_id)] = [X_train, X_test]
    
### SAVE DATA ###
file_path = 'learning_output/representation.pickle'
with open(file_path, "wb") as output_file:
    pickle.dump(data_dict, output_file)
"""

Preparing data for cv split 0
[INFO] Starting PCA
[INFO] Finished PCA
[INFO] Starting PCA
[INFO] Finished PCA
[INFO] Starting Complexity Computation
[INFO] Starting Sobel Detection
[INFO] Finished Sobel Detection
[INFO] Finished Complexity Computation
[INFO] Starting Complexity Computation
[INFO] Starting Sobel Detection
[INFO] Finished Sobel Detection
[INFO] Finished Complexity Computation
(1014, 274) (252, 274)
Preparing data for cv split 1
[INFO] Starting PCA
[INFO] Finished PCA
[INFO] Starting PCA
[INFO] Finished PCA
[INFO] Starting Complexity Computation
[INFO] Starting Sobel Detection
[INFO] Finished Sobel Detection
[INFO] Finished Complexity Computation
[INFO] Starting Complexity Computation
[INFO] Starting Sobel Detection
[INFO] Finished Sobel Detection
[INFO] Finished Complexity Computation
(1014, 274) (252, 274)
Preparing data for cv split 2
[INFO] Starting PCA
[INFO] Finished PCA
[INFO] Starting PCA
[INFO] Finished PCA
[INFO] Starting Complexity Computation
[INFO] Starting S

In [4]:
### REGRESSION ###

data_file = open(r"learning_output/representation.pickle", "rb")
data_dict = pickle.load(data_file)

for cv_id in range(0, 5):
    print("Evaluating cv split", str(cv_id))
    
    X_train = data_dict[str(cv_id)][0]
    X_test = data_dict[str(cv_id)][1]
    
    Y_train, Y_test = data_module.get_salience_only(str(cv_id))
    Y_train, Y_test = Y_train / 5.0, Y_test / 5.0
    
    save_name = "dnn_regression_" + str(cv_id)
    model = prediction_module.initialize_dnn_regression(input_shape=(X_train.shape[1],), learning_rate=0.0004, summary=False)
    prediction_module.train_dnn_regression(model, X_train, Y_train, X_test, Y_test, epochs=1000, batch_size=32, verbose=0,
                                           evaluate=True, plot=True, save_name=save_name, save=True, delete=False)
    
    train_ids, test_ids = data_module.get_train_test_image_ids(str(cv_id))
    
    predictions = model.predict(X_test)
    
    with open('predictions.txt', 'a') as csv_file:
        for idx, value in enumerate(test_ids):
            line = str(value) + ", " + str(Y_test[idx]) + ", " + str(predictions[idx][0]) + ", " + str(abs(Y_test[idx] - predictions[idx][0])) + ", " + str(abs((Y_test[idx] - predictions[idx][0]) / Y_test[idx])*100) + "\n"
            csv_file.write(line)

Evaluating cv split 0
train loss, train mse, train mae, train mape: [0.06627044645992257, 0.008616955, 0.06627045, 13.934746]
test loss, test mse, test mae, test mape: [0.07296174126011985, 0.008830873, 0.07296175, 14.209819]
Evaluating cv split 1
train loss, train mse, train mae, train mape: [0.06237753831893499, 0.007781198, 0.062377542, 12.55329]
test loss, test mse, test mae, test mape: [0.09355804643460683, 0.016284583, 0.09355805, 21.386692]
Evaluating cv split 2
train loss, train mse, train mae, train mape: [0.05671955314437313, 0.0071218456, 0.05671955, 12.090855]
test loss, test mse, test mae, test mape: [0.09408641168995509, 0.014398588, 0.09408642, 17.550488]
Evaluating cv split 3
train loss, train mse, train mae, train mape: [0.07203111681477323, 0.009508703, 0.07203111, 14.485124]
test loss, test mse, test mae, test mape: [0.09125427724350066, 0.014628561, 0.09125428, 20.081385]
Evaluating cv split 4
train loss, train mse, train mae, train mape: [0.060616052339947414, 0.00

<Figure size 432x288 with 0 Axes>

In [4]:
### CLASSIFICATION ###

data_file = open(r"learning_output/representation.pickle", "rb")
data_dict = pickle.load(data_file)

for cv_id in range(0, 5):
    print("Evaluating cv split", str(cv_id))
    
    X_train = data_dict[str(cv_id)][0]
    X_test = data_dict[str(cv_id)][1]
    
    Y_train, Y_test = data_module.get_binary_only(str(cv_id))
    class_weights = prediction_module.compute_class_weights(Y_train)

    # adam, 0.00002, 32, 600
    
    save_name = "dnn_classification_" + str(cv_id)
    model = prediction_module.initialize_dnn_classification(input_shape=(X_train.shape[1],), learning_rate=0.00002, summary=False)
    
    def scheduler(epoch, lr):
        if epoch < 100:
            return lr
        elif epoch < 250:
            return 0.000015
        elif epoch < 400:
            return 0.00001
        else:
            return 0.000005
    callback = tf.keras.callbacks.LearningRateScheduler(scheduler)
    
    prediction_module.train_dnn_classification(model, X_train, Y_train, X_test, Y_test, epochs=600, batch_size=32,
                                               verbose=0, class_weights=class_weights,
                                               evaluate=True, plot=True, save_name=save_name, callback=[callback])

Evaluating cv split 0
  ...
    to  
  ['...']
  ...
    to  
  ['...']
train loss, train acc: [0.4925375216341113, 0.81459564]
test loss, test acc: [0.664867173111628, 0.5952381]
Evaluating cv split 1
  ...
    to  
  ['...']
  ...
    to  
  ['...']
train loss, train acc: [0.5000206291440441, 0.80177516]
test loss, test acc: [0.6345005063783555, 0.6388889]
Evaluating cv split 2
  ...
    to  
  ['...']
  ...
    to  
  ['...']
train loss, train acc: [0.49506891349833865, 0.8136095]
test loss, test acc: [0.6655937045339554, 0.60714287]
Evaluating cv split 3
  ...
    to  
  ['...']
  ...
    to  
  ['...']
train loss, train acc: [0.5017011164443262, 0.8076923]
test loss, test acc: [0.6660820917477683, 0.6031746]
Evaluating cv split 4
  ...
    to  
  ['...']
  ...
    to  
  ['...']
train loss, train acc: [0.4778894583384196, 0.8174603]
test loss, test acc: [0.6807050982186961, 0.5968992]


<Figure size 432x288 with 0 Axes>

In [7]:
### CLASSIFICATION TEST ###

acc_all = []

data_file = open(r"learning_output/representation.pickle", "rb")
data_dict = pickle.load(data_file)

for cv_id in range(0, 5):
    print("Evaluating cv split", str(cv_id))
    
    X_train = data_dict[str(cv_id)][0]
    X_test = data_dict[str(cv_id)][1]
    
    Y_train, Y_test = data_module.get_binary_only(str(cv_id))
    
    result = prediction_module.feature_trend(X_train, Y_train, X_test, Y_test)
    print(result)
    acc_all.append(result["accuracy"])
    
print("AVG ACCURACY:", sum(acc_all)/len(acc_all))

Evaluating cv split 0
[INFO] Starting Random Forest Fitting and Prediction
[INFO] Finished Random Forest Fitting and Prediction
{'accuracy': 0.6071428571428571}
Evaluating cv split 1
[INFO] Starting Random Forest Fitting and Prediction
[INFO] Finished Random Forest Fitting and Prediction
{'accuracy': 0.6428571428571429}
Evaluating cv split 2
[INFO] Starting Random Forest Fitting and Prediction
[INFO] Finished Random Forest Fitting and Prediction
{'accuracy': 0.6388888888888888}
Evaluating cv split 3
[INFO] Starting Random Forest Fitting and Prediction
[INFO] Finished Random Forest Fitting and Prediction
{'accuracy': 0.6468253968253969}
Evaluating cv split 4
[INFO] Starting Random Forest Fitting and Prediction
[INFO] Finished Random Forest Fitting and Prediction
{'accuracy': 0.6007751937984496}
AVG ACCURACY: 0.627297895902547
