# Description

It standardize (z-score) S-MultiXcan results projected into the MultiPLIER latent space.

# Modules loading

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from pathlib import Path
from IPython.display import display

import numpy as np
import pandas as pd
from sklearn.preprocessing import scale

import conf

# Settings

## Input data

In [None]:
INPUT_FILEPATH = Path(
    conf.RESULTS['PROJECTIONS_DIR'],
    'projection-smultixcan-efo_partial-mashr-zscores.pkl'
).resolve()
display(INPUT_FILEPATH)

input_filepath_stem = INPUT_FILEPATH.stem
display(input_filepath_stem)

## Output folder

In [None]:
# output dir for this notebook
RESULTS_DIR = Path(
    conf.RESULTS["DATA_TRANSFORMATIONS_DIR"],
    'z_score_std'
).resolve()
RESULTS_DIR.mkdir(parents=True, exist_ok=True)

display(RESULTS_DIR)

# Load input file

In [None]:
data = pd.read_pickle(INPUT_FILEPATH).T

In [None]:
display(data.shape)

In [None]:
display(data.head())

# z-score standardization

In [None]:
data_stats = data.iloc[:, :10].describe()
display(data_stats)

In [None]:
scaled_data = pd.DataFrame(
    data=scale(data),
    index=data.index.copy(),
    columns=data.columns.copy()
)

In [None]:
display(scaled_data.shape)

In [None]:
display(scaled_data.head())

In [None]:
scaled_data_stats = scaled_data.iloc[:,:10].describe()
display(scaled_data_stats)

## Testing

In [None]:
assert np.all([np.isclose(scaled_data_stats.loc['mean', c], 0.0) for c in scaled_data_stats.columns])

In [None]:
assert np.all([np.isclose(scaled_data_stats.loc['std', c], 1.0, atol=1e-03) for c in scaled_data_stats.columns])

# Save

In [None]:
output_file = Path(
    RESULTS_DIR,
    f'z_score_std-{input_filepath_stem}.pkl',
).resolve()

display(output_file)

In [None]:
scaled_data.to_pickle(output_file)