In [None]:
# Useful imports and setup
import sys
import os
# Necessary to import code from ../scripts/
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path+"/scripts")
    
import warnings
warnings.filterwarnings("ignore")

import tensorflow as tf
import numpy as np
import pandas as pd
import time

from preprocessing import preprocess
from gating import *

%load_ext autoreload
%autoreload 2

data_path = '../data/'

feature_types = ['lalle_conati', 'boroujeni_et_al', 'chen_cui', 'marras_et_al']
metadata = pd.read_csv(data_path + 'metadata.csv')

In [None]:
courses = ['analysenumerique_001',
 'analysenumerique_002',
 'analysenumerique_003',
 'cpp_fr_001',
 'dsp_001',
 'dsp_002',
 'dsp_004',
 'dsp_005',
 'dsp_006',
 'geomatique_003',
 'hwts_001',
 'hwts_002',
 'initprogcpp_001',
 'microcontroleurs_003',
 'microcontroleurs_003_003',
 'microcontroleurs_004',
 'microcontroleurs_005',
 'microcontroleurs_006',
 'progfun_002',
 'progfun_003',
 'structures_001',
 'structures_002',
 'structures_003',
 'venture_001',
 'villesafricaines_001',
 'villesafricaines_002',
 'villesafricaines_003']

### Feature gating with annealing loss

Fine-tuning based on sparsity criteria can be accomplished by establishing a set of sparsity targets. Additionally, you have the option to select between two normalization methods: min-max and unit norm.

In [None]:
percentile_list = [0.4]
norm_methods = ['unit', 'min-max']
sparsity_target = [0.45, 0.5, 0.55]

for course in courses:
    
    MODEL_PATH = '../models/saved/'
    results_path = '../results/'

    MODEL_PATH += course + '/'
    results_path += course + '/inter-c3/'

    if not os.path.exists(MODEL_PATH):
        os.makedirs(MODEL_PATH)

    if not os.path.exists(results_path):
        os.makedirs(results_path)

    path = data_path + course + '/'

    for percentile in percentile_list:
        for sparsity in sparsity_target:
            for norm in norm_methods:
                x_train, x_test, x_val, y_train, y_test, y_val, feature_names = preprocess(course, path, 
                                                                                           percentile, 
                                                                                           feature_types, 
                                                                                           metadata, 
                                                                                           normalization=norm)

                # Concat features & labels for later analysis
                X = np.concatenate([x_train, x_val, x_test], axis=0)
                Y = np.concatenate([y_train, y_val, y_test], axis=0)

                # Set up parameters and model to train
                meta = {'gumbel_temp': 1, 'gumbel_noise': 1e-8}
                params = {
                    'epochs': 20,
                    'batch_size': 64,
                    'optimizer': tf.keras.optimizers.Adam(),
                    'sparsity_target': sparsity
                }
                model = MaskingModel(n_groups=x_train.shape[-1])

                filename = MODEL_PATH + 'annealing_fg_'+norm+'_norm_'+str(sparsity)+'_perc_'+str(percentile)
                
                # Train model
                print("Training model for course {0}, percentile {1} of data and sparsity target {2}\n".format(course, percentile, sparsity))
                start_time = time.time()
                scores = custom_train(model, params, meta, x_train, y_train, 
                                      x_val, y_val, verbose=True)
                print("Time spent on training: {0}".format(time.time()- start_time))
                
                # Save model
                model.save_weights(filename)

                np.save(results_path+'scores_'+norm+'_norm_'+str(sparsity)+'.npy', scores)