In [1]:
import os
import numpy as np
import pandas as pd
import pickle 

from glmnet.scorer import make_scorer
from glmnet import LogitNet

from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.decomposition import PCA

from sklearn.metrics import f1_score,brier_score_loss, log_loss
from itertools import combinations

from scripts.utils_ import load_data, get_pca_transformed_data
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
part ='full'
path_dm = "./data/density_maps/aligned_globally/"+part+"/xz/"
restriction_v1s1 = ["MC", "SOM L4 S1"]
restriction_v1 = ["MC","BC", "SC", "DBC", "BPC", "HEC", "NFC"]

restriction = restriction_v1
desc_dm, data_dm = load_data(path_dm, restriction=restriction, fl='density_map')
# sort by name
desc_dm = desc_dm.reset_index()
dm_idx = desc_dm.sort_values('file').index
desc_dm = desc_dm.loc[dm_idx]
data_dm = data_dm[dm_idx]

path_morph = "./data/morphometrics/"+part+"/"
desc_morph, data_morph = load_data(path_morph, restriction=restriction, fl='morphometrics')
desc_morph = desc_morph.reset_index()
morph_idx = desc_morph.sort_values('file').index
desc_morph = desc_morph.loc[morph_idx]
data_morph = data_morph[morph_idx]

In [3]:
data = np.hstack((data_morph, data_dm))
fm_lengths=np.array([0] +[1]*data_morph.shape[1] + [data_dm.shape[1]])

# Use features combined

In [4]:

# set up cross validation
kf = RepeatedStratifiedKFold(n_splits=5,n_repeats=10, random_state=43)

# set up PCA
pca = PCA(n_components=10, copy=True, whiten=False)
# set up pairs for pairwise classification
types, type_count = np.unique(desc_dm['type'], return_counts=True)
pairs = combinations(types, 2)  # get classification pairs
runs = []
models = []
for pair in pairs:
    # set up model
    m = LogitNet(alpha=0.5, n_splits=5, random_state=42)
    m.scoring = make_scorer(log_loss, greater_is_better=False, needs_proba=True)

    idx = (desc_dm['type'] == pair[0]) | (desc_dm['type'] == pair[1])

    X = data[idx,:]
    y = desc_dm['type'][idx].values

    k = 1
    for train_ix, test_ix in kf.split(X, y):

        y_train = y[train_ix]
        y_test = y[test_ix]

        X_train, X_test = get_pca_transformed_data(X[train_ix, :],X[test_ix,:], pca, fm_lengths)
        r_ = dict(group_a=pair[0], group_b=pair[1])
        r_['run_id'] = k

        try:
            m.fit(X_train, y_train)

            y_hat = np.array(m.predict(X_test)).reshape(-1)

            r_['avg_training_score'] = m.score(X_train, y_train)
            r_['avg_test_score'] = np.mean(y_hat == y_test)
            r_['f1_score'] = f1_score(y_test, y_hat, labels=pair, pos_label=pair[0])

            # since function can only deal with numeric types
            r_['brier_score'] = brier_score_loss(y_test == pair[0], y_hat == pair[0])

            runs.append(r_)
            k += 1
        except (ValueError, RuntimeError) as e:
            print(e)
            continue
    X_ , _ = get_pca_transformed_data(X,X,pca,fm_lengths)
    m.fit(X_, y)
    models.append(m)

result = pd.DataFrame(runs)
# save models
z = pickle.dumps(models)

save_path = "./results/classification/two_features_"+part+"_"
save_path += '_res_' + ''.join([r + '_' for r in restriction])
result.to_csv(save_path + "alpha_05.csv")
open(save_path + "alpha_05_models", 'wb').write(z)

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision'

481723