In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import sys

import numpy as np
import pandas as pd

sys.path.insert(0, '..')
from match.make_match_panel import make_match_panel
from match.make_summary_match_panel import make_summary_match_panel

In [None]:
n_row = 100
n_column = 100

In [None]:
target = pd.Series(
    np.random.random_sample(n_column),
    name='Target Name',
    index=('Sample {}'.format(i) for i in range(n_column)))

target_str = pd.Series(
    ('T0', ) * (n_column // 2) + ('T1', ) * (n_column // 2),
    index=target.index)

In [None]:
features_continuous = pd.DataFrame(
    np.random.random_sample((n_row, n_column)),
    index=('Feature {}'.format(i) for i in range(n_row)),
    columns=('Sample {}'.format(i) for i in range(n_column)))

features_categorical = pd.DataFrame(
    np.random.random_integers(0, 5, size=(n_row, n_column)),
    index=('Feature {}'.format(i) for i in range(n_row)),
    columns=('Sample {}'.format(i) for i in range(n_column)))

features_binary = pd.DataFrame(
    np.random.random_integers(0, 1, size=(n_row, n_column)),
    index=('Feature {}'.format(i) for i in range(n_row)),
    columns=('Sample {}'.format(i) for i in range(n_column)))

In [None]:
match_scores_for_continuous = make_match_panel(
    target,
    features_continuous,
    n_job=1,
    n_top_feature=10,
    n_sampling=3,
    n_permutation=3,
    max_ytick_size=3,
    plot_column_names=True)

In [None]:
match_scores_for_continuous = make_match_panel(
    target_str,
    features_continuous,
    n_job=1,
    n_top_feature=3,
    n_sampling=3,
    n_permutation=3,
    target_type='binary',
    plot_column_names=True)

In [None]:
match_scores_for_categorical = make_match_panel(
    target,
    features_categorical,
    n_job=1,
    n_top_feature=3,
    n_sampling=3,
    n_permutation=3,
    features_type='categorical',
    plot_column_names=True)

In [None]:
match_scores_for_binary = make_match_panel(
    target,
    features_binary,
    n_job=2,
    n_top_feature=3,
    n_sampling=3,
    n_permutation=3,
    features_type='binary',
    plot_column_names=True)

In [None]:
features_with_missing = features_continuous.copy()

features_with_missing.loc[
    np.random.choice(features_with_missing.index, size=n_column // 3),
    np.random.choice(features_with_missing.columns, size=n_column // 3)] = None

In [None]:
match_scores_for_binary = make_match_panel(
    target,
    features_with_missing,
    n_job=2,
    n_top_feature=0.95,
    n_sampling=3,
    n_permutation=3,
    features_type='continuous',
    plot_column_names=True)

In [None]:
match_scores_for_binary = make_match_panel(
    target_str,
    features_with_missing,
    n_job=2,
    n_top_feature=0.95,
    n_sampling=3,
    n_permutation=3,
    features_type='continuous',
    target_type='binary',
    plot_column_names=True)

In [None]:
n = 3

multiple_features = {
    'Continuous features': {
        'df': features_continuous.iloc[:n, :],
        'indices': ['Feature {}'.format(i) for i in range(n)],
        'index_aliases': ['Alias {}'.format(i) for i in range(n)],
        'emphasis': 'high',
        'data_type': 'continuous'
    },
    'Categorical features': {
        'df': features_categorical.iloc[:n, :],
        'indices': ['Feature {}'.format(i) for i in range(n)],
        'index_aliases': ['Alias {}'.format(i) for i in range(n)],
        'emphasis': 'high',
        'data_type': 'categorical'
    },
    'Binary features': {
        'df': features_binary.iloc[:n, :],
        'indices': ['Feature {}'.format(i) for i in range(n)],
        'index_aliases': ['Alias {}'.format(i) for i in range(n)],
        'emphasis': 'high',
        'data_type': 'binary'
    },
    'Cotinuous features (scores ascending)': {
        'df': features_continuous.iloc[:n, :-3],
        'indices': ['Feature {}'.format(i) for i in range(n)],
        'index_aliases': ['Alias {}'.format(i) for i in range(n)],
        'emphasis': 'low',
        'data_type': 'continuous'
    }
}

In [None]:
make_summary_match_panel(
    target, multiple_features, title='Default', plot_column_names=True)

In [None]:
make_summary_match_panel(
    target,
    multiple_features,
    plot_only_columns_shared_by_target_and_all_features=True,
    title='Only Columns Shared by Target and All Features',
    plot_column_names=True)

In [None]:
target = pd.read_table('target.tsv', index_col=0, header=None, squeeze=True)
features = pd.read_table('features.tsv', index_col=0)

make_match_panel(target, features)