In [1]:
from time import time
import psycopg2
from collections import Counter
import pandas as pd
import numpy as np
from sklearn.decomposition import NMF

np.set_printoptions(suppress=True,precision=10)

In [2]:
import sys 
import os
sys.path.append(os.path.abspath("/home/scidb/HeartRatePatterns/Python"))
from LogisticRegresion import ajustLogisticRegression

In [3]:
def selectMatrix(dbname="mimic") :
    conn = psycopg2.connect("dbname="+dbname)
    cur = conn.cursor()
    select_stament = ("SELECT m.subject_id,m.word,m.counting,s.isalive "
                      " FROM matrix m LEFT JOIN subjectwords s ON m.subject_id=s.subject_id "
     #                 " WHERE m.word in (select word from wordspearson where p1>0.01 order by p1 limit 400) "
    )
    cur.execute(select_stament)
    select = []
    for row in cur :
        select.append((row))
    cur.close()
    conn.close()
    return select

In [4]:
def convertMatrix() :
    labels = ['subject_id', 'Word', 'Counting','isAlive']
    df = pd.DataFrame.from_records(selectMatrix(), columns=labels)
    print(len(df))
    table = pd.pivot_table(df,index=["subject_id","isAlive"],columns=["Word"],values=["Counting"],
                       aggfunc={"Counting":[np.sum]},fill_value=0)
    table.columns = [value[2] for value in table.columns.values]
    return table

In [5]:
t0=time()
table = convertMatrix()
print("converMatrix done in %0.3fs." % (time() - t0))
print(table.shape)

134706
converMatrix done in 2.514s.
(658, 5610)


In [6]:
survived = table.index.labels[1].tolist()
patients = table.values

In [7]:
print(table.shape)
print(len(survived))
print(patients.shape)

(658, 5610)
658
(658, 5610)


### NMF

In [8]:
def generateNMF(n_components):
    nmf = NMF(n_components=n_components, random_state=1,alpha=.1, l1_ratio=.5)
    patients_nmf = nmf.fit_transform(patients)
    components = nmf.components_
#    print("heatBeats X factor",components.shape)
#    print("patient X factor",patients_nmf.shape)
    return patients_nmf,components
#patients_nmf,components = generateNMF(2)

# Pearson

In [9]:
from scipy.stats.stats import pearsonr

In [10]:
def checkPearson(patients_nmf,survived):
    sumofPearson,maxPearson,minPearson = 0,0,0
    n_components = patients_nmf.shape[1]
    for i in range(n_components):
        pearson = pearsonr(patients_nmf[:,i],survived)
        sumofPearson += pearson[0]
        if pearson[0]>maxPearson:
            maxPearson = pearson[0]
        if pearson[0]<minPearson:
            minPearson = pearson[0]
    return (n_components,sumofPearson,(sumofPearson/n_components),maxPearson,minPearson)

In [43]:
def getPearson(patients_nmf,survived):
    n_components = patients_nmf.shape[1]
    pearson = []
    for i in range(n_components):
        pearson.append({"Topic":i,"pearson":pearsonr(patients_nmf[:,i],survived)[0]})
    return sorted(pearson, key = lambda value: value['pearson'],reverse=True)

In [44]:
for i in range(2,20):
    patients_nmf,components = generateNMF(i)
#    result = checkPearson(patients_nmf,survived)
    result = getPearson(patients_nmf,survived)
    print(i,result[:5])

2 [{'Topic': 1, 'pearson': 0.010923444702911286}, {'Topic': 0, 'pearson': -0.098164051702177041}]
3 [{'Topic': 2, 'pearson': 0.051576486119688306}, {'Topic': 1, 'pearson': 0.010921487602107989}, {'Topic': 0, 'pearson': -0.09911156579020626}]
4 [{'Topic': 2, 'pearson': 0.051612592125778033}, {'Topic': 1, 'pearson': 0.019721856687391869}, {'Topic': 3, 'pearson': -0.051550853667254017}, {'Topic': 0, 'pearson': -0.093877478126509789}]
5 [{'Topic': 2, 'pearson': 0.057568926806160658}, {'Topic': 1, 'pearson': 0.019724285322645078}, {'Topic': 4, 'pearson': 0.018563729161985232}, {'Topic': 3, 'pearson': -0.051545932478026253}, {'Topic': 0, 'pearson': -0.093918733284187858}]
6 [{'Topic': 2, 'pearson': 0.057568926957940379}, {'Topic': 5, 'pearson': 0.019983647613144445}, {'Topic': 1, 'pearson': 0.019724132802576653}, {'Topic': 4, 'pearson': 0.018563729257072177}, {'Topic': 3, 'pearson': -0.051545931697064444}]
7 [{'Topic': 2, 'pearson': 0.057610606313159757}, {'Topic': 1, 'pearson': 0.0199919107

In [45]:
for i in range(20,40):
    patients_nmf,components = generateNMF(i)
    result = getPearson(patients_nmf,survived)
    print(i,result[:5])

20 [{'Topic': 19, 'pearson': 0.048852538079433995}, {'Topic': 2, 'pearson': 0.0426906344684998}, {'Topic': 8, 'pearson': 0.040507101622158481}, {'Topic': 10, 'pearson': 0.027048084153059242}, {'Topic': 12, 'pearson': 0.024288739242923837}]
21 [{'Topic': 19, 'pearson': 0.049580706629770786}, {'Topic': 2, 'pearson': 0.047643549938765732}, {'Topic': 12, 'pearson': 0.046827937776929035}, {'Topic': 8, 'pearson': 0.041545667355194327}, {'Topic': 10, 'pearson': 0.027046932555672948}]
22 [{'Topic': 19, 'pearson': 0.049579750722445581}, {'Topic': 2, 'pearson': 0.047640979983438268}, {'Topic': 12, 'pearson': 0.046820072794736427}, {'Topic': 8, 'pearson': 0.041547121064839751}, {'Topic': 10, 'pearson': 0.02450741660076201}]
23 [{'Topic': 19, 'pearson': 0.049726794364730306}, {'Topic': 2, 'pearson': 0.048364862303391716}, {'Topic': 12, 'pearson': 0.04557566976884371}, {'Topic': 8, 'pearson': 0.041102985246113118}, {'Topic': 10, 'pearson': 0.024507408877847296}]
24 [{'Topic': 2, 'pearson': 0.049507

In [46]:
for i in range(40,60):
    patients_nmf,components = generateNMF(i)
    result = getPearson(patients_nmf,survived)
    print(i,result[:5])

40 [{'Topic': 20, 'pearson': 0.066770288446365123}, {'Topic': 19, 'pearson': 0.064011566403275183}, {'Topic': 25, 'pearson': 0.060822433995100865}, {'Topic': 37, 'pearson': 0.047760672822045928}, {'Topic': 2, 'pearson': 0.04380284994807792}]
41 [{'Topic': 20, 'pearson': 0.066844719215676779}, {'Topic': 19, 'pearson': 0.064037085735746388}, {'Topic': 25, 'pearson': 0.060660196318265344}, {'Topic': 40, 'pearson': 0.053154863109741331}, {'Topic': 37, 'pearson': 0.048238079660167334}]
42 [{'Topic': 20, 'pearson': 0.11616399648987753}, {'Topic': 19, 'pearson': 0.062788438905963348}, {'Topic': 25, 'pearson': 0.06106109486579217}, {'Topic': 40, 'pearson': 0.053276392781589021}, {'Topic': 37, 'pearson': 0.048457490713499883}]
43 [{'Topic': 20, 'pearson': 0.11580901700282005}, {'Topic': 25, 'pearson': 0.059269318835252119}, {'Topic': 19, 'pearson': 0.056041948063595512}, {'Topic': 42, 'pearson': 0.054385911497923711}, {'Topic': 40, 'pearson': 0.053023346094911751}]
44 [{'Topic': 20, 'pearson': 

In [47]:
for i in range(61,80):
    patients_nmf,components = generateNMF(i)
    result = getPearson(patients_nmf,survived)
    print(i,result[:5])

61 [{'Topic': 54, 'pearson': 0.081352102482347727}, {'Topic': 60, 'pearson': 0.074001591640284942}, {'Topic': 50, 'pearson': 0.058440760576900036}, {'Topic': 38, 'pearson': 0.056207095973460006}, {'Topic': 19, 'pearson': 0.050127596968311337}]
62 [{'Topic': 54, 'pearson': 0.089053620160214009}, {'Topic': 20, 'pearson': 0.071929733959451708}, {'Topic': 61, 'pearson': 0.062812649830206013}, {'Topic': 50, 'pearson': 0.057759934381003915}, {'Topic': 19, 'pearson': 0.050360054174235794}]
63 [{'Topic': 20, 'pearson': 0.078723829471599019}, {'Topic': 60, 'pearson': 0.070339746203365083}, {'Topic': 61, 'pearson': 0.062756989141877856}, {'Topic': 50, 'pearson': 0.05821631912125852}, {'Topic': 59, 'pearson': 0.052655254862300438}]
64 [{'Topic': 63, 'pearson': 0.078576750440668863}, {'Topic': 60, 'pearson': 0.066193139145581795}, {'Topic': 61, 'pearson': 0.060860172476589862}, {'Topic': 50, 'pearson': 0.057455438936396971}, {'Topic': 19, 'pearson': 0.04960904832823753}]
65 [{'Topic': 20, 'pearson

In [48]:
patients_nmf,components = generateNMF(42)
result = getPearson(patients_nmf,survived)

In [49]:
result[:5]

[{'Topic': 20, 'pearson': 0.11616399648987753},
 {'Topic': 19, 'pearson': 0.062788438905963348},
 {'Topic': 25, 'pearson': 0.06106109486579217},
 {'Topic': 40, 'pearson': 0.053276392781589021},
 {'Topic': 37, 'pearson': 0.048457490713499883}]