In [1]:
#### MRI GENERATOR

In [42]:
#### STANDART MODULES INITIALISING

### Importing standard modules and date-special modules:
import numpy as np
import pandas as pd
import math
from datetime import date

In [43]:
#### MRI CONSTANTS AND PARAMETERS SETTING

### Standart date format for notebook:
date_format = '%Y-%m-%d'
### MRI dates:
date_first = date(1990, 1, 1)
date_last = date(2018, 12, 31)
date_start = date(1993, 12, 31)
### Source xlsx file attributes:
path_mri_data_xlsx = 'Data_Files/Source_Files/mri_data.xlsx'
mri_model_name = 'Model 01'
### HDF5 file with structured source data for selected date interval attributes:
path_mri_data_hdf = 'Data_Files/Source_Files/mri_data.h5'
key_mri_data_hdf = 'source_data'

### Limitations for rolling windows for z-score calculating:
asset_window_min = 252
asset_window_max = 252 * 100
mri_window_max = 260 * 10
### Limitations for z-score winsorizing:
arr_winsor_boundary = [-4, 4]
### Limitations for moving average for MRI calculation:
mri_moving_average_window_max = 5
### HDF5 with MRI group matrices builded from z-scored means of standartized winsorized weighted z-score matrices for each group asset:
path_mri_standart_hdf = 'Data_Files/Source_Files/mri_group_z_matrix.h5'
### HDF5 with MRI asset level info:
path_mri_assets_hdf = 'Data_Files/Source_Files/mri_released_assets.h5'
object_selected_data_hdf = 'selected_data'
object_standartized_data_hdf = 'standartized_data'
### HDF5 with MRI group level info:
path_mri_groups_hdf = 'Data_Files/Source_Files/mri_released_groups.h5'
object_diag_grouped_hdf = 'diag_grouped_data'
object_perc_grouped_hdf = 'percentile_grouped_data'
### HDF5 with MRI level info:
path_mri_index_hdf = 'Data_Files/Source_Files/mri_released_index.h5'
object_diag_mri_hdf = 'diag_MRI_data'
object_released_mri_hdf = 'released_MRI_data'

In [4]:
### MRI DATA AGGREGATING FUNCTION
def get_mri_data(source_file_path, source_model_sheet, hdf_file_path, hdf_object_key, date_index, update_hdf = True):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd    
    
    ### Reading Model information from Source model sheet:
    df_model_raw = pd.read_excel(source_file_path, sheet_name = source_model_sheet, header = 1, usecols = [0, 1, 2, 3, 4, 5, 6])
    ### Group border rows deleting:
    df_model_raw = df_model_raw[df_model_raw['Asset Group'] != df_model_raw['Asset Code']]   
    ### Dividing list on asset part and MRI weights part:
    df_model_asset = df_model_raw[df_model_raw['Asset Group'] != 'MRI'] ### Asset part
    df_model_asset.reset_index(drop = True, inplace = True)
    df_model_mri = df_model_raw[df_model_raw['Asset Group'] == 'MRI'] ### MRI part
    df_model_mri.reset_index(drop = True, inplace = True) 
    ### Extracting source data from initial excel file or from saved hdf
    if (update_hdf): 
        ### Aggregating data from the source xlsx file to pd.DataFrame:
        arr_tab_data = []
        for iter_index, iter_row in df_model_asset.iterrows():
            iter_tab = iter_row['Asset Tab Name']
            iter_asset = iter_row['Asset Code']
            ser_iter_tab = pd.read_excel(source_file_path, sheet_name = iter_tab, header = 0, index_col = 0, squeeze = True)
            ser_iter_tab.name = iter_asset
            arr_tab_data.append(ser_iter_tab)
        df_source_data = pd.concat(arr_tab_data, axis = 1, join = 'outer')
        df_source_data = df_source_data.astype(float)        
        df_source_data.to_hdf(hdf_file_path, hdf_object_key, mode = 'w', format = 'fixed', append = False)
    else:
        df_source_data = pd.read_hdf(hdf_file_path, hdf_object_key)
    ### Filtering by date_index and forward filling missing values:
    df_source_data.fillna(method = 'ffill', inplace = True)
    df_selected_data = df_source_data.reindex(date_index, method = 'ffill')
    df_selected_data.index.name = 'Date'
    df_selected_data = df_selected_data.astype('float64')
    
    return [df_model_asset, df_model_mri, df_selected_data]

In [5]:
### GETTING MRI DATA FOR FUTURE CALCULATIONS
index_mri_date = pd.date_range(date_first, date_last, freq = 'B')
[df_model_asset, df_model_mri, df_selected_data] = get_mri_data(path_mri_data_xlsx, mri_model_name, path_mri_data_hdf, key_mri_data_hdf, 
                                                                 index_mri_date, update_hdf = True)
#[df_model_asset, df_model_mri, df_selected_data] = get_mri_data(path_mri_data_xlsx, mri_model_name, path_mri_data_hdf, key_mri_data_hdf, 
#                                                                index_mri_date, update_hdf = False)

In [39]:
def get_rolling_z_score(ser_source, min_wnd, max_wnd, winsor_bottom, winsor_top):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    
    ### Calculating rolling mean:
    ser_rolling_mean = ser_source.rolling(window = max_wnd, min_periods = min_wnd, win_type = None).mean()
    ### Calculating rolling standard deviation:
    ser_rolling_std = ser_source.rolling(window = max_wnd, min_periods = min_wnd, win_type = None).std()
    ### Calculating rolling z-score:
    ser_rolling_z_score = (ser_source - ser_rolling_mean) / ser_rolling_std
    ### Initializing resulting variables:
    df_z_matrix = pd.DataFrame(np.NaN, index = ser_source.index, columns = ser_source.index)
    df_z_matrix = df_z_matrix.astype('float64')
    ### Calculating z-score matrix:
    ind_valid_index = ser_source.first_valid_index()
    asset_start_index = ind_valid_index + pd.offsets.BusinessDay(min_wnd - 1)  
    for iter_end_index in ser_source.index:
        ### Checking for at list min_wnd elements of rolling window are not np.NaN:
        if (iter_end_index >= asset_start_index):          
            ### Isolating rolling window for particular data vector element:
            iter_start_index = iter_end_index - pd.offsets.BusinessDay(max_wnd)
            ser_iter_source = ser_source.loc[iter_start_index : iter_end_index]        
            ser_iter_z_score = (ser_iter_source - ser_iter_source.mean()) / ser_iter_source.std()            
            ### Winsorization process:
            bool_to_winsor = True            
            while (bool_to_winsor): 
                ### Value based winsorization:                
                ser_iter_z_score.clip(lower = winsor_bottom, upper = winsor_top, inplace = True)
                ### Recalculating of z scores:
                ser_iter_z_score = (ser_iter_z_score - ser_iter_z_score.mean()) / ser_iter_z_score.std()
                ### Checking for boundaries:
                if (ser_iter_z_score[(ser_iter_z_score <= (winsor_bottom - 0.01)) | (ser_iter_z_score >= (winsor_top + 0.01))].count() == 0):
                    bool_to_winsor = False
            ### Filling z matrix column part after the winsorizing (if needed):
            df_z_matrix.loc[iter_start_index : iter_end_index, iter_end_index] = ser_iter_z_score.values            
    ### Getting winsorized z meanings:     
    ser_rolling_z_winsor = pd.Series(list(np.diag(df_z_matrix)), index = ser_source.index)
    ### Backfilling with first not NaN column of z matrix:
    ser_rolling_z_winsor.loc[ : asset_start_index] = df_z_matrix.loc[ : asset_start_index, asset_start_index]
    df_z_matrix = df_z_matrix.astype('float32')

    return [ser_rolling_z_score, ser_rolling_z_winsor, df_z_matrix]

In [40]:
### TESTING:
asset_code = 'iv_rvx'
[ser_rolling_z_score_base, ser_rolling_z_winsor_base, df_base_z_matrix] = get_rolling_z_score(np.log(df_selected_data[asset_code]), 
                                                                                              asset_window_min, asset_window_max,
                                                                                              arr_winsor_boundary[0], arr_winsor_boundary[1])
print('raw data:', df_selected_data[asset_code].iloc[3905])
print('raw data ln:', np.log(df_selected_data[asset_code].iloc[3905]))
print('z-score 3905:', ser_rolling_z_winsor_base.iloc[3905])
print('z-score min:', ser_rolling_z_winsor_base[ : ].min())
print('z-score max:', ser_rolling_z_winsor_base[ : ].max())
print('z-score mean:', ser_rolling_z_winsor_base[ : ].mean())
print('z-score std:', ser_rolling_z_winsor_base[ : ].std())
print('z-score size:', ser_rolling_z_winsor_base[ : ].size)
print('z-score count:', ser_rolling_z_winsor_base[ : ].count())
print('z-matrix straight diag mean:', np.nanmean(np.diag(df_base_z_matrix)))

2004-12-20 00:00:00
1922-05-29 00:00:00
2018-12-31 00:00:00
raw data: 17.78
raw data ln: 2.8780742300857587
z-score 3905: -1.9173996588494062
z-score min: -2.708733440781229
z-score max: 4.007532064798355
z-score mean: -0.1067801183124964
z-score std: 1.1881988564929336
z-score size: 7566
z-score count: 3912
z-matrix straight diag mean: -0.11462475348192766


In [7]:
def get_standartized_mri_data(df_model_asset, df_selected_data, date_start, asset_window_min, asset_window_max, arr_winsor_boundary, hdf_file_path):
    ### Importing standard modules:    
    import numpy as np
    import pandas as pd
    from datetime import datetime

    ### Base assets determination (resorting by earliest value):
    df_model_asset['Asset Date'] = date_first
    for (iter_index, asset_code) in df_model_asset['Asset Code'].iteritems():
        df_model_asset.loc[iter_index, 'Asset Date'] = df_selected_data[asset_code].dropna().index.min() 
    df_model_asset.sort_values(['Asset Group', 'Asset Date'], inplace = True)
    df_model_asset = df_model_asset.reset_index(drop = True)

    ### Initialising loop visibility variables:          
    dict_group_diag_container = {} ### Group z-matrices diagonales container
    dict_asset_vector_container = {} ### Asset z-matrices diagonales container
    dict_group_matrix_container = {}
    ### Standartizing loop on group level:
    for asset_group_name, df_asset_group in df_model_asset.groupby('Asset Group'):
        ### Initialising group visibility variables:
        print('get_standartized_mri_data: group', asset_group_name, 'standartizing started')
        bool_base_asset = True
        dict_asset_matrix_container = {} ### Asset matrices collection for group mean matrix calculation
        ### Standartizing cycle on asset level with the group:
        for (asset_index, asset_code) in df_asset_group['Asset Code'].iteritems():
            ### Assignment of base asset data set:
            if (bool_base_asset):
                bool_base_asset = False
                ### Performing z scoring for base asset:
                [ser_rolling_z_score_base, ser_rolling_z_winsor_base, df_base_z_matrix] = get_rolling_z_score(np.log(df_selected_data[asset_code]), 
                                                                                                              asset_window_min, asset_window_max,
                                                                                                              arr_winsor_boundary[0], arr_winsor_boundary[1])
                ### Calculating ethalon filled quantity before date_start:
                int_base_filled = ser_rolling_z_winsor_base[ : date_start].dropna().count()                
                ### Defining of standartized values of base asset as diagonal of z matrix (without backfilling):
                dict_asset_vector_container[asset_code] = pd.Series(list(np.diag(df_base_z_matrix)), index = df_base_z_matrix.index)
                ### Creating a whole group dataset with multiplying asset matrix to asset weight:
                dict_asset_matrix_container[asset_code] = df_base_z_matrix
            ### Normalization of other asset's data sets:                
            else:
                ### Performing z scoring for asset:                
                [ser_asset_z_score_simple, ser_asset_z_score_winsor, df_asset_z_matrix] = get_rolling_z_score(np.log(df_selected_data[asset_code]), 
                                                                                                              asset_window_min, asset_window_max, 
                                                                                                              arr_winsor_boundary[0], arr_winsor_boundary[1])
                ### Calculating asset filled quantity:                
                int_asset_filled = ser_asset_z_score_winsor[ : date_start].dropna().count()            
                ### Standartizing asset if they do not have enough initial values:
                if (int_asset_filled < int_base_filled * 2 / 3):
                    index_asset_start = ser_asset_z_score_simple.first_valid_index()
                    ### RenormaLizing asset z matrix with base z matrix data:
                    for index_asset_end in ser_asset_z_score_simple.index:
                        if (index_asset_end >= index_asset_start):                
                            ser_base_z_part = df_base_z_matrix.loc[index_asset_start - pd.offsets.BusinessDay(asset_window_min - 1) : index_asset_end, index_asset_end]    
                            df_asset_z_matrix.loc[:, index_asset_end] = df_asset_z_matrix.loc[:, index_asset_end] * ser_base_z_part.std() + ser_base_z_part.mean()
                ### Defining of standartized values of asset as diagonale of modified z matrix (without backfilling):
                dict_asset_vector_container[asset_code] = pd.Series(list(np.diag(df_asset_z_matrix)), index = df_asset_z_matrix.index)
                ### Adding asset matrix to a whole group dataset with multiplying asset matrix to asset weight:          
                df_asset_z_matrix = df_asset_z_matrix.astype('float32')
                dict_asset_matrix_container[asset_code] = df_asset_z_matrix
            print('get_standartized_mri_data: asset', asset_code, 'in group', asset_group_name, 'standartized successfully')         
        ### Calculating z matrix for group from weighted asset matrices:
        df_group_mean = pd.concat(dict_asset_matrix_container, axis = 0, names = ['Asset Code', 'Date'], copy = False)
        df_group_mean = df_group_mean.groupby('Date').mean()    
        df_group_mean_z = (df_group_mean - df_group_mean.mean()) / df_group_mean.std()
        ### Adding diagonale of group weighted mean z-score matrix to MRI dataset:
        dict_group_diag_container[asset_group_name] = pd.Series(list(np.diag(df_group_mean_z)), index = df_group_mean_z.index)        
        print('get_standartized_mri_data: z-score matrix for group' , asset_group_name, 'mean matrix builded successfully') 
        ### Saving group matrix to hdf file for further manipulations:
        df_group_mean_z = df_group_mean_z.astype('float32')
        df_group_mean_z.reset_index(inplace = True)
        df_group_mean_z.columns = np.arange(len(df_group_mean_z.columns))
        df_group_mean_z.to_hdf(hdf_file_path, key = asset_group_name, mode = 'a', format = 'fixed')
        print('get_standartized_mri_data: z-score matrix for group' , asset_group_name, 'saved to HDF5 file', hdf_file_path, '(object key:', asset_group_name, ')')
    ### Collection of standartized z-scores for all assets:
    ser_asset_standartized = pd.concat(dict_asset_vector_container, axis = 0, names = ['Asset', 'Date'], copy = False)    
    print('get_standartized_mri_data: asset standartized z-score collection builded successfully')
    ### Collection of diagonales of group's z matrices for all groups:
    ser_group_mean_z_diag = pd.concat(dict_group_diag_container, axis = 0, names = ['Group', 'Date'], copy = False)    
    print('get_standartized_mri_data: data vector collection of diagonales of mean z score matrix for all groups builded successfully')    
    return [ser_asset_standartized, ser_group_mean_z_diag] 

In [8]:
##########################################################################################################################################################################

In [9]:
#### STANDARTISING SOURCE DATA FOR MRI CALCUCATION

### Standartizing dataset:
### Building collection of standartized winsorized z-scores for all assets:
### Building collection of group's z matrices diagonales for all groups:
### Saving group's z matrices:
[ser_standartized_assets, ser_diag_mean_z_groups] = get_standartized_mri_data(df_model_asset, df_selected_data, date_start, 
                                                                              asset_window_min, asset_window_max, arr_winsor_boundary, 
                                                                              path_mri_standart_hdf)
### Saving results for assets to HDF5 to avoid hard calculations with constant source model and datasets:
import tables
tables.file._open_files.close_all()
ser_standartized_assets.to_hdf(path_mri_assets_hdf, key = object_standartized_data_hdf, mode = 'w', format = 'fixed')
### Saving results for groups to HDF5 to avoid hard calculations with constant source model and datasets:
ser_diag_mean_z_groups.to_hdf(path_mri_groups_hdf, key = object_diag_grouped_hdf, mode = 'w', format = 'fixed')

get_standartized_mri_data: group EQ standartizing started
get_standartized_mri_data: asset iv_us in group EQ standartized successfully
get_standartized_mri_data: asset iv_eu in group EQ standartized successfully
get_standartized_mri_data: asset iv_uk in group EQ standartized successfully
get_standartized_mri_data: asset iv_jp in group EQ standartized successfully
get_standartized_mri_data: asset iv_rvx in group EQ standartized successfully
get_standartized_mri_data: asset iv_eem in group EQ standartized successfully
get_standartized_mri_data: z-score matrix for group EQ mean matrix builded successfully
get_standartized_mri_data: z-score matrix for group EQ saved to HDF5 file Data_Files/Source_Files/mri_group_z_matrix.h5 (object key: EQ )
get_standartized_mri_data: group FI standartizing started
get_standartized_mri_data: asset oas_hy in group FI standartized successfully
get_standartized_mri_data: asset oas_em in group FI standartized successfully
get_standartized_mri_data: z-score mat

In [9]:
### TESTING:
asset_code = 'iv_rvx'
print('z-score 3905:', ser_standartized_assets[asset_code].iloc[3905])
print('z-score min:', ser_standartized_assets[asset_code][ : ].min())
print('z-score max:', ser_standartized_assets[asset_code][ : ].max())
print('z-score mean:', ser_standartized_assets[asset_code][ : ].mean())
print('z-score std:', ser_standartized_assets[asset_code][ : ].std())
print('z-score size:', ser_standartized_assets[asset_code][ : ].size)
print('z-score count:', ser_standartized_assets[asset_code][ : ].count())

z-score 3905: -1.3802614212036133
z-score min: -2.4099533557891846
z-score max: 4.1285858154296875
z-score mean: -0.4078973956184543
z-score std: 1.1452926626074527
z-score size: 7566
z-score count: 3661


In [13]:
### TESTING:
group_code = 'FI'
print('z-score 3905:', ser_diag_mean_z_groups[group_code].iloc[3905])
print('z-score min:', ser_diag_mean_z_groups[group_code][ : ].min())
print('z-score max:', ser_diag_mean_z_groups[group_code][ : ].max())
print('z-score mean:', ser_diag_mean_z_groups[group_code][ : ].mean())
print('z-score std:', ser_diag_mean_z_groups[group_code][ : ].std())
print('z-score size:', ser_diag_mean_z_groups[group_code][ : ].size)
print('z-score count:', ser_diag_mean_z_groups[group_code][ : ].count())

z-score 3905: -1.596615195274353
z-score min: -3.740589141845703
z-score max: 4.004619121551514
z-score mean: 0.1399801089881174
z-score std: 1.2946608771337218
z-score size: 7566
z-score count: 6250


In [20]:
### TESTING:
group_code = 'FI'
df_group_z_matrix = pd.read_hdf(path_mri_standart_hdf, group_code)
df_group_z_matrix.set_index(0, drop = True, inplace = True)
df_group_z_matrix.iloc[:, 6999].abs().mean()

0.7883340120315552

In [10]:
def aggregate_mri_data(df_model_mri, hdf_z_matrix_path, date_start, mri_window_max, ma_max_wnd, arr_winsor_boundary):   
    import numpy as np
    import pandas as pd   

    ### Initialising containers for weighted mean matrix calculation:
    dict_group_matrix_container = {}
    winsor_bottom = arr_winsor_boundary[0]
    winsor_top = arr_winsor_boundary[1]    
    ### Group aggregating cycle:    
    for group_index, ser_group_info in df_model_mri.iterrows():
        group_code = ser_group_info['Asset Code']
        ### Loading group z score matrix from HDF5 file:
        df_group_z_matrix = pd.read_hdf(hdf_z_matrix_path, group_code)
        df_group_z_matrix.set_index(0, drop = True, inplace = True)
        ### Adding matrix to container:
        dict_group_matrix_container[group_index] = df_group_z_matrix
        print('aggregate_mri_data: group', group_code, 'z matrix data extracted successfully')
    ### Calculating mean matrix for MRI from group matrices:        
    df_group_mean = pd.concat(dict_group_matrix_container, axis = 0, names = ['Group', 'Date'], copy = False)
    print('aggregate_mri_data: MRI mean matrix concatenated successfully')    
    df_group_mean = df_group_mean.groupby(['Date']).mean()   
    df_group_mean.columns = df_group_z_matrix.index
    print('aggregate_mri_data: MRI mean matrix averaged successfully')  
    ### Calculating z matrix for MRI with winsorization:    
    df_mri_z_score = pd.DataFrame(np.NaN, index = df_group_mean.index, columns = df_group_mean.columns)
    df_mri_z_score = df_mri_z_score.astype('float32')
    for iter_date in df_group_mean.columns:
        if (iter_date >= pd.Timestamp(date_start)):
            ser_iter_mri = df_group_mean.loc[iter_date - pd.offsets.BusinessDay(mri_window_max) : iter_date, iter_date]
            ser_iter_z_score = (ser_iter_mri - ser_iter_mri.mean()) / ser_iter_mri.std()
            ### Winsorization process:
            bool_to_winsor = True            
            while (bool_to_winsor):       
                ### Value based winsorization:
                ser_iter_z_score.clip(lower = winsor_bottom, upper = winsor_top, inplace = True)
                ### Recalculating of z scores:
                ser_iter_z_score = (ser_iter_z_score - ser_iter_z_score.mean()) / ser_iter_z_score.std()                
                ### Checking for boundaries:
                if (ser_iter_z_score[(ser_iter_z_score <= (winsor_bottom - 0.01)) | (ser_iter_z_score >= (winsor_top + 0.01))].count() == 0):                    
                    bool_to_winsor = False    
            df_mri_z_score.loc[iter_date - pd.offsets.BusinessDay(mri_window_max) : iter_date, iter_date] = ser_iter_z_score.values
    ser_mri_z_diag = pd.Series(list(np.diag(df_mri_z_score)), index = df_mri_z_score.index) 
    ser_mri_z_diag.name = 'MRI-Z'
    print('aggregate_mri_data: MRI z matrix builded successfully')             
    ### Calculating z matrix for MRI with winsorization:       
    df_mri_z_ma = df_mri_z_score.copy()
    for iter_shift in np.arange(1, ma_max_wnd):
        df_mri_z_ma = df_mri_z_ma + df_mri_z_score.shift(iter_shift)
    df_mri_z_ma = df_mri_z_ma / ma_max_wnd
    ser_mri_released = pd.Series(list(np.diag(df_mri_z_ma)), index = df_mri_z_ma.index)
    ser_mri_released[ : pd.Timestamp(date_start)] = df_mri_z_ma.loc[ : pd.Timestamp(date_start), pd.Timestamp(date_start)]
    ser_mri_released.name = 'MRI-Z-Winsor-MA5'        
    print('aggregate_mri_data: MRI moving average resulting vector builded successfully')     
    return [ser_mri_z_diag, ser_mri_released]

In [11]:
### BUILDING MRI INDEX
[ser_mri_z_diag, ser_mri_released] = aggregate_mri_data(df_model_mri, path_mri_standart_hdf, date_start, 
                                                        mri_window_max, mri_moving_average_window_max, arr_winsor_boundary)
### Saving results for groups to HDF5 to avoid hard calculations with constant source model and datasets:
import tables
tables.file._open_files.close_all()
ser_mri_z_diag.to_hdf(path_mri_index_hdf, key = object_diag_mri_hdf, mode = 'w', format = 'fixed')
ser_mri_released.to_hdf(path_mri_index_hdf, key = object_released_mri_hdf, mode = 'a', format = 'fixed')

aggregate_mri_data: group EQ z matrix data extracted successfully
aggregate_mri_data: group FI z matrix data extracted successfully
aggregate_mri_data: group FX z matrix data extracted successfully
aggregate_mri_data: MRI mean matrix concatenated successfully
aggregate_mri_data: MRI mean matrix averaged successfully
aggregate_mri_data: MRI z matrix builded successfully
aggregate_mri_data: MRI moving average resulting vector builded successfully


In [12]:
### TESTING:
print('MRI 6999:', ser_mri_z_diag.iloc[6999 : 7000])
print('MRI min:', ser_mri_z_diag.min())
print('MRI max:', ser_mri_z_diag.max())
print('MRI mean:', ser_mri_z_diag.mean())
print('MRI std:', ser_mri_z_diag.std())
print('MRI size:', ser_mri_z_diag.size)
print('MRI count:', ser_mri_z_diag.count())

MRI 6999: Date
2016-10-28   -0.469501
Name: MRI-Z, dtype: float64
MRI min: -2.9318556785583496
MRI max: 4.007950782775879
MRI mean: -0.027571064076795933
MRI std: 1.208622109998802
MRI size: 7566
MRI count: 6522


In [13]:
### TESTING:
print('MRI 6999:', ser_mri_released.iloc[6999 : 7000])
print('MRI min:', ser_mri_released.min())
print('MRI max:', ser_mri_released.max())
print('MRI mean:', ser_mri_released.mean())
print('MRI std:', ser_mri_released.std())
print('MRI size:', ser_mri_released.size)
print('MRI count:', ser_mri_released.count())

MRI 6999: Date
2016-10-28   -0.537115
Name: MRI-Z-Winsor-MA5, dtype: float64
MRI min: -2.862577438354492
MRI max: 4.000450134277344
MRI mean: -0.02374306619829718
MRI std: 1.173413280964329
MRI size: 7566
MRI count: 7561


In [44]:
### TESTING:
ser_standartized_assets = pd.read_hdf(path_mri_assets_hdf, key = object_standartized_data_hdf)
ser_diag_mean_z_groups = pd.read_hdf(path_mri_groups_hdf, key = object_diag_grouped_hdf)
ser_mri_z_diag = pd.read_hdf(path_mri_index_hdf, key = object_diag_mri_hdf)
ser_mri_released = pd.read_hdf(path_mri_index_hdf, key = object_released_mri_hdf)

In [45]:
ser_mri_released

Date
1990-01-01         NaN
1990-01-02         NaN
1990-01-03         NaN
1990-01-04         NaN
1990-01-05         NaN
1990-01-08    0.470369
1990-01-09    0.659891
1990-01-10    0.817269
1990-01-11    0.848957
1990-01-12    1.001224
1990-01-15    1.197926
1990-01-16    1.261960
1990-01-17    1.317314
1990-01-18    1.462639
1990-01-19    1.394541
1990-01-22    1.404716
1990-01-23    1.421270
1990-01-24    1.458489
1990-01-25    1.497196
1990-01-26    1.613590
1990-01-29    1.606256
1990-01-30    1.679289
1990-01-31    1.678403
1990-02-01    1.655841
1990-02-02    1.597747
1990-02-05    1.541853
1990-02-06    1.467910
1990-02-07    1.435599
1990-02-08    1.401693
1990-02-09    1.382021
                ...   
2018-11-20   -0.362878
2018-11-21   -0.367763
2018-11-22   -0.388863
2018-11-23   -0.385297
2018-11-26   -0.394442
2018-11-27   -0.427599
2018-11-28   -0.438429
2018-11-29   -0.449689
2018-11-30   -0.465701
2018-12-03   -0.502604
2018-12-04   -0.501943
2018-12-05   -0.491896
2018-1