Covariances aren't probabilities. Let's try if we can generate usefull features if we treat them as probs anyway.
A covariance matrix is build from the fnc data. The covariances are scaled to a range between 0 and 1 and to sum up to 1 per Id. Then the convertet covariances are treated as transition probabilities and the steady state are being calculated and saved as features per Id.

In [1]:
import pandas as pd
import numpy as np
import re
import tqdm
from pathlib import Path

In [2]:
kaggle_input_path = Path('/kaggle/input/trends-assessment-prediction') # '/kaggle/input/trends-assessment-prediction'


In [3]:
icn = pd.read_csv(kaggle_input_path/'ICN_numbers.csv').values.flatten()

f_data = pd.read_csv(kaggle_input_path/'fnc.csv')
f_data = f_data.set_index('Id')
f_data = f_data.T.reset_index()

f_data.head()

Id,index,10001,10002,10003,10004,10005,10006,10007,10008,10009,...,21745,21746,21747,21748,21749,21750,21751,21752,21753,21754
0,SCN(53)_vs_SCN(69),0.36858,0.151696,0.343415,0.132793,0.291921,0.32326,0.023588,0.153818,0.242638,...,0.153193,0.039694,0.241935,-0.295271,0.124321,0.22797,0.455052,0.118257,0.051042,0.544363
1,SCN(98)_vs_SCN(69),0.166876,-0.024819,0.109974,0.258255,0.251254,0.117238,0.251784,0.218085,0.247415,...,0.287633,0.07778,0.208504,0.10885,0.027488,-0.222489,0.483856,0.452123,0.088581,0.27011
2,SCN(99)_vs_SCN(69),0.438148,0.217504,0.741641,0.490769,0.41647,0.64069,0.571558,0.488672,0.560405,...,0.452419,0.415766,0.602328,0.325825,0.378603,0.250417,0.589565,0.608328,0.551354,0.502865
3,SCN(45)_vs_SCN(69),0.341007,0.418072,0.578558,0.342717,0.511719,0.320641,0.338475,0.432211,0.290671,...,0.615736,0.437183,0.632835,0.737253,0.487026,0.442642,0.633691,0.422485,0.305542,0.651486
4,ADN(21)_vs_SCN(69),-0.186251,-0.227234,-0.676446,0.091112,-0.362626,-0.319674,-0.104604,-0.259662,0.029189,...,-0.271541,-0.117561,-0.133641,-0.322191,0.030361,-0.221094,0.161995,-0.106427,-0.034378,-0.252982


In [4]:
# regex from https://www.kaggle.com/kpriyanshu256/trends-graph?scriptVersionId=36333263

f_data['x'] = f_data['index'].apply(lambda x: int(re.findall(r'(?<=\().*?(?=\))', x)[0]))
f_data['y'] = f_data['index'].apply(lambda x: int(re.findall(r'(?<=\().*?(?=\))', x)[1]))
f_data.drop('index', inplace=True,axis=1)



In [5]:
def scale(trans_mx):
    trans_mx = (trans_mx/ trans_mx.min(axis=1))/(trans_mx.max(axis=1)-trans_mx.min(axis=1))
    trans_mx = trans_mx / trans_mx.sum(axis=1)
    return trans_mx

# https://stackoverflow.com/questions/52137856/steady-state-probabilities-markov-chain-python-implementation
def steady_state_prop(p):
    dim = p.shape[0]
    q = (p-np.eye(dim))
    ones = np.ones(dim)
    q = np.c_[q,ones]
    QTQ = np.dot(q, q.T)
    bQT = np.ones(dim)

    return np.linalg.solve(QTQ,bQT)

In [6]:
fnc_steady_state = None

ids = list(set(f_data.columns.values)-set(['x','y']) )

for i in tqdm.tqdm(ids, total=len(ids)):
    
    cov_mx = np.zeros((53,53))

    for j, r in f_data[[i, 'x', 'y']].iterrows():
        x=np.argwhere(icn==int(r['x']))[0][0]-1
        y=np.argwhere(icn==int(r['y']))[0][0]-1
        cov_mx[x,y] = r[i] 
        cov_mx[y,x] = r[i] 

    cov_mx_scaled = scale(cov_mx)
    
    ssp = steady_state_prop(cov_mx_scaled).reshape(1,53)
    
    if not isinstance(fnc_steady_state, pd.DataFrame):
        fnc_steady_state = pd.DataFrame(ssp, columns=icn)
    else:
        fnc_steady_state = fnc_steady_state.append(pd.DataFrame(ssp, columns=icn))
        

fnc_steady_state['Id'] = ids


100%|██████████| 11754/11754 [1:04:24<00:00,  3.04it/s]


In [7]:
display(fnc_steady_state.head())

Unnamed: 0,69,53,98,99,45,21,56,3,9,2,...,23,71,17,51,94,13,18,4,7,Id
0,0.018733,0.038183,0.049523,0.051159,0.011422,0.014668,0.043373,0.022666,0.027756,0.023678,...,0.026023,0.017766,0.01034,0.007409,0.020888,0.013986,0.032193,0.018258,0.019458,10001
0,0.033191,0.033822,0.047354,0.041519,0.017942,0.012788,0.015212,0.032356,0.018896,0.025656,...,0.014436,0.013984,0.013388,0.007252,0.032221,0.013654,0.024563,0.015877,0.014377,10002
0,0.02507,0.023506,0.019158,0.021388,0.013152,0.013018,0.011851,0.015497,0.016558,0.011753,...,0.028508,0.015774,0.018204,0.010793,0.012167,0.011737,0.025788,0.016225,0.011009,10003
0,0.033921,0.0192,0.025567,0.044491,0.014436,0.011268,0.023784,0.017973,0.033601,0.015146,...,0.024735,0.010726,0.019876,0.007161,0.037952,0.017911,0.062057,0.019033,0.02859,10004
0,0.028042,0.020178,0.015897,0.029166,0.020618,0.011863,0.017028,0.026289,0.030872,0.028816,...,0.019033,0.023083,0.016613,0.00766,0.030285,0.011464,0.045893,0.013735,0.021172,10005


In [8]:
fnc_steady_state.to_csv('fnc_steady_state.csv', index = False)