In [210]:
import boto3
import numpy as np
import pandas as pd
from sagemaker import get_execution_role
import tensorflow as tf

from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing

role = get_execution_role()
bucket='hack4med'

przyjecie_file = 'CRACoV-PRZYJECIE.csv'
biochemia_file = 'CRACoV-BIOCHEMIA_3.csv'
echo_file = 'CRACoV-ECHO.csv'
nefro_file = '_CRACoV-NEFRO.csv'
mapowanie_tomografii_file = 'CRACoV-MAPOWANIE-TOMOGRAFII.csv'
radio_file = 'CRACoV-RADIO.csv'
etykiety_file = 'CRACoV-ETYKIETY.csv'

def load_csv(file, skiprows=0):
    data_location = 's3://{}/{}'.format(bucket, file)
    return pd.read_csv(data_location, skiprows=skiprows)

# tutaj mamy wszystkie wczytane dane
przyjecia = load_csv(przyjecie_file)
biochemia = load_csv(biochemia_file)
echo = load_csv(echo_file)
# nefro ma zwalony pierwszy wiersz
nefro = load_csv(nefro_file, 1)
mapowanie_tomografii = load_csv(mapowanie_tomografii_file)
radio = load_csv(radio_file)
etykiety = load_csv(etykiety_file)

def clean_data(df):
    df = df.applymap(lambda x: x.strip().lower() if isinstance(x, str) else x)
    
    df = df.replace('nie', 0)
    df = df.replace('nie wiadomo', 1)
    df = df.replace('tak', 2)
    
    df = df.replace('covid', 1)
    df = df.replace('inny (współistniejący covid)', 1)
    
    df = df.fillna(0)
    return df

def lpAndidLabToInt(df):
    return df.astype({"LP.": int, "ID_LAB": int})

def createId(df):
    df["ID"] = df["LP."] + 100000*df["ID_LAB"]
    df = df.astype({"ID": int})
    df.drop('LP.', inplace=True, axis=1)
    df.drop('ID_LAB', inplace=True, axis=1)
    return df

def load_and_parse(file):
    df = load_csv(file)
    df = clean_data(df)
    df = lpAndidLabToInt(df)
    df = createId(df)
    return df





def load_przyjecia():
    df = load_and_parse(przyjecie_file)
    df = df.astype({"WIEK": int, "PRZENIESIENIE": int, "HOSP_PRZYCZ": int, "NT": int, "DM": int, "ASTMA": int, "POCHP": int, "HF": int, "AF": int, "UDAR": int, "CHD": int, "MI": int, "ZAP_PLUC": int, "PCHN": int, "DEKSAMETEZON": int, "HDCZ": int, "BB": int, "STATYNA": int, "ASA": int, "NOAC": int, "MRA": int, "ACE": int, "SARTANY": int, "CA_BLOKER": int})
    df = df.filter(items=['ID', 'PLEC', 'WIEK', 'WZROST', 'PRZENIESIENIE', 'HOSP_PRZYCZ', 'MASA_CIALA', 'BMI', 'RRS', 'RRD', 'PO2_ATM', 'ODDECH', 'AS', 'NT', 'DM', 'ASTMA', 'POCHP', 'HF', 'AF', 'UDAR', 'CHD', 'MI', 'ZAP_PLUC', 'PCHN', 'DEKSAMETEZON', 'HDCZ', 'BB', 'STATYNA', 'ASA', 'NOAC', 'MRA', 'ACE', 'SARTANY', 'CA_BLOKER'])
    return df

def load_biochemia():
    df = load_and_parse(biochemia_file)
    df = df.filter(items=['ID', 'KOD', 'WYNIK'])
    df = df.drop_duplicates(subset=['ID', 'KOD'], keep='last')
    df = df.pivot(index="ID", columns="KOD", values="WYNIK")
    df = df.applymap(lambda x: x.replace('<', '') if isinstance(x, str) else x)
    df = df.applymap(lambda x: x.replace('>', '') if isinstance(x, str) else x)
    df = df.applymap(lambda x: x.replace(',', '.') if isinstance(x, str) else x)
    df = df.applymap(lambda x: x.replace(' mg/l', '') if isinstance(x, str) else x)
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
    return df


def load_echo():
    df = load_and_parse(echo_file)
    return df

# def load_nefro():
#     return load_csv(nefro_file, 1)

def load_mapowanie_tomografii():
    df = load_and_parse(mapowanie_tomografii_file)
    return df


def load_radio():
    df = load_and_parse(radio_file)
    return df


def load_etykiety():
    df = load_and_parse(etykiety_file)
    return df



def all_data():
    joined = pd.merge(load_przyjecia(), load_biochemia(), on=["ID"], how='outer')
    joined = pd.merge(joined, load_echo(), on=["ID"], how='outer')
    joined = pd.merge(joined, load_mapowanie_tomografii(), on=["ID"], how='outer')
    joined = pd.merge(joined, load_radio(), on=["ID"], how='outer')
    joined = pd.merge(joined, load_etykiety(), on=["ID"], how='outer')
    joined = joined.fillna(0)
    return joined


In [208]:
labels_in_data = ['PLEC', 'WIEK', 'WZROST', 'PRZENIESIENIE', 'HOSP_PRZYCZ', 'MASA_CIALA', 'BMI', 'RRS', 'RRD', 'PO2_ATM', 'ODDECH', 'AS', 'NT', 'DM', 'ASTMA', 'POCHP', 'HF', 'AF', 'UDAR', 'CHD', 'MI', 'ZAP_PLUC', 'PCHN', 'DEKSAMETEZON', 'HDCZ', 'BB', 'STATYNA', 'ASA', 'NOAC', 'MRA', 'ACE', 'SARTANY', 'CA_BLOKER', 'ZGON_LUB_OIT', 'n58.11.11342_pct', 'i81.11.1112_crp', 'g49.122.1113_dd', 'm05_il-6', 'o59_tnhs', 'n11.126.20.1cito_mlecz', 'm37.11.191_krea', 'c55.103.02_wbc', 'c55.103.02_plt']
data = all_data().filter(labels_in_data)

for header in labels_in_data:
    data = data.astype({header: np.float32})

for x in ['ZAP_PLUC', 'WIEK', 'WZROST', 'NT', 'WZROST', 'MASA_CIALA', 'BMI', 'RRS', 'RRD', 'PO2_ATM', 'ODDECH', 'AS', 'n58.11.11342_pct', 'i81.11.1112_crp', 'g49.122.1113_dd', 'm05_il-6', 'o59_tnhs', 'n11.126.20.1cito_mlecz', 'm37.11.191_krea', 'c55.103.02_wbc', 'c55.103.02_plt']:
    data[x] = data[x]/data[x].mean()
    
print(data.mean())
print("xxxxx")

    
def mahalanobis(x=None, data=None, cov=None):

    x_mu = x - np.mean(data)
    if not cov:
        cov = np.cov(data.values.T)
    inv_covmat = np.linalg.inv(cov)
    left = np.dot(x_mu, inv_covmat)
    mahal = np.dot(left, x_mu.T)
    return mahal.diagonal()

#create new column in dataframe that contains Mahalanobis distance for each row
data['mahalanobis'] = mahalanobis(x=data, data=data)


from scipy.stats import chi2

#calculate p-value for each mahalanobis distance 
data['p'] = 1 - chi2.cdf(data['mahalanobis'], len(labels_in_data)-1)

data.mean()


PLEC                      0.384000
WIEK                      1.000000
WZROST                    1.000000
PRZENIESIENIE             0.120000
HOSP_PRZYCZ               0.942000
MASA_CIALA                1.000000
BMI                       1.000000
RRS                       1.000000
RRD                       1.000000
PO2_ATM                   1.000000
ODDECH                    1.000000
AS                        1.000000
NT                        0.999998
DM                        0.464000
ASTMA                     0.154000
POCHP                     0.088000
HF                        0.146000
AF                        0.210000
UDAR                      0.076000
CHD                       0.322000
MI                        0.170000
ZAP_PLUC                  0.999997
PCHN                      0.114000
DEKSAMETEZON              0.004000
HDCZ                      0.184000
BB                        0.752000
STATYNA                   0.520000
ASA                       0.336000
NOAC                

PLEC                       0.384000
WIEK                       1.000000
WZROST                     1.000000
PRZENIESIENIE              0.120000
HOSP_PRZYCZ                0.942000
MASA_CIALA                 1.000000
BMI                        1.000000
RRS                        1.000000
RRD                        1.000000
PO2_ATM                    1.000000
ODDECH                     1.000000
AS                         1.000000
NT                         1.000000
DM                         0.464000
ASTMA                      0.154000
POCHP                      0.088000
HF                         0.146000
AF                         0.210000
UDAR                       0.076000
CHD                        0.322000
MI                         0.170000
ZAP_PLUC                   1.000000
PCHN                       0.114000
DEKSAMETEZON               0.004000
HDCZ                       0.184000
BB                         0.752000
STATYNA                    0.520000
ASA                        0

In [186]:
from scipy.stats import chi2
import numpy as np
from sklearn.model_selection import train_test_split

labels_in_data = ['PLEC', 'WIEK', 'WZROST', 'PRZENIESIENIE', 'HOSP_PRZYCZ', 'MASA_CIALA', 'BMI', 'RRS', 'RRD', 'PO2_ATM', 'ODDECH', 'AS', 'NT', 'DM', 'ASTMA', 'POCHP', 'HF', 'AF', 'UDAR', 'CHD', 'MI', 'ZAP_PLUC', 'PCHN', 'DEKSAMETEZON', 'HDCZ', 'BB', 'STATYNA', 'ASA', 'NOAC', 'MRA', 'ACE', 'SARTANY', 'CA_BLOKER', 'ZGON_LUB_OIT', 'n58.11.11342_pct', 'i81.11.1112_crp', 'g49.122.1113_dd', 'm05_il-6', 'o59_tnhs', 'n11.126.20.1cito_mlecz', 'm37.11.191_krea', 'c55.103.02_wbc', 'c55.103.02_plt']
data = all_data().filter(labels_in_data)
# data = data[data['PLEC'] == 0]
# data = data[data['WIEK'] >= 60]

for header in labels_in_data:
    data = data.astype({header: np.float32})


random_state=200

dead = data[data['ZGON_LUB_OIT'] == 1]
alive = data[data['ZGON_LUB_OIT'] == 0]

train_dead=dead.sample(frac=0.9, random_state=random_state)
test_dead=dead.drop(train_dead.index)

train_alive=alive.sample(frac=0.9, random_state=random_state)
test_alive=alive.drop(train_alive.index)

train = pd.concat([train_dead, train_alive])
test = pd.concat([test_dead, test_alive])

train = train.sample(frac=1).reset_index(drop=True)
test = test.sample(frac=1).reset_index(drop=True)

def mahalanobis(x=None, data=None, cov=None):

    x_mu = x - np.mean(data)
    if not cov:
        cov = np.cov(data.values.T)
    inv_covmat = np.linalg.inv(cov)
    left = np.dot(x_mu, inv_covmat)
    mahal = np.dot(left, x_mu.T)
    return mahal.diagonal()


#  'DEKSAMETEZON' , 'HOSP_PRZYCZ'
# all_cols = ['WIEK', 'PRZENIESIENIE', 'MASA_CIALA', 'RRS', 'RRD', 'ODDECH', 'NT', 'DM', 'ASTMA', 'HF', 'AF', 'UDAR', 'CHD', 'MI', 'ZAP_PLUC', 'PCHN', 'HDCZ', 'BB', 'STATYNA', 'ASA', 'NOAC', 'MRA', 'ACE', 'SARTANY', 'CA_BLOKER']
all_cols = ['WIEK', 'WZROST', 'PRZENIESIENIE', 'MASA_CIALA', 'BMI', 'RRS', 'RRD', 'PO2_ATM', 'ODDECH', 'AS', 'NT', 'DM', 'ASTMA', 'POCHP', 'HF', 'AF', 'UDAR', 'CHD', 'MI', 'ZAP_PLUC', 'PCHN', 'HDCZ', 'BB', 'STATYNA', 'ASA', 'NOAC', 'MRA', 'ACE', 'SARTANY', 'CA_BLOKER', 'n58.11.11342_pct', 'i81.11.1112_crp', 'g49.122.1113_dd', 'm05_il-6', 'o59_tnhs', 'n11.126.20.1cito_mlecz', 'm37.11.191_krea', 'c55.103.02_wbc', 'c55.103.02_plt']


test['mahalanobis_bad'] = mahalanobis(x=test[all_cols], data=train[train['ZGON_LUB_OIT'] == 1][all_cols])
test['mahalanobis_good'] = mahalanobis(x=test[all_cols], data=train[train['ZGON_LUB_OIT'] == 0][all_cols])

for multiplier in np.arange(0.1, 100.0, 0.1):
    if(multiplier.is_integer()):
        print('{}%'.format(multiplier))
        
    test['mahalanobis_bad'] = np.minimum(test['mahalanobis_bad'], mahalanobis(x=test[all_cols]*multiplier, data=train[train['ZGON_LUB_OIT'] == 1][all_cols]))
    test['mahalanobis_good'] = np.minimum(test['mahalanobis_good'], mahalanobis(x=test[all_cols]*multiplier, data=train[train['ZGON_LUB_OIT'] == 0][all_cols]))

# test['mahalanobis_bad'] = mahalanobis(x=test[all_cols], data=train[train['ZGON_LUB_OIT'] == 1][all_cols])
# test['mahalanobis_good'] = mahalanobis(x=test[all_cols], data=train[train['ZGON_LUB_OIT'] == 0][all_cols])

test['mahalanobis_ZGON_LUB_OIT'] = test['mahalanobis_bad'] < test['mahalanobis_good']
test[['ZGON_LUB_OIT', 'mahalanobis_ZGON_LUB_OIT', 'mahalanobis_good','mahalanobis_bad']]

np.count_nonzero(test['mahalanobis_ZGON_LUB_OIT'] == True)
np.count_nonzero(test['ZGON_LUB_OIT'] == True)
godCount = np.count_nonzero(test['ZGON_LUB_OIT'] == test['mahalanobis_ZGON_LUB_OIT'])
allCount = len(test)

tpr = np.count_nonzero((test['ZGON_LUB_OIT'] == 0) & (test['ZGON_LUB_OIT'] == test['mahalanobis_ZGON_LUB_OIT'])) / np.count_nonzero((test['ZGON_LUB_OIT'] == 0))
tnr = np.count_nonzero((test['ZGON_LUB_OIT'] == 1) & (test['ZGON_LUB_OIT'] == test['mahalanobis_ZGON_LUB_OIT'])) / np.count_nonzero((test['ZGON_LUB_OIT'] == 1))

print('{} / {} = {}%'.format(godCount, allCount, int(godCount/allCount*100)))
print('tpr = {}'.format(tpr))
print('tnr = {}'.format(tnr))



1.0%
2.0%
4.0%
5.0%
6.0%
7.0%
8.0%
9.0%
10.0%
11.0%
12.0%
13.0%
14.0%
15.0%
16.0%
32.0%
33.0%
34.0%
35.0%
36.0%
37.0%
38.0%
64.0%
65.0%
66.0%
67.0%
68.0%
69.0%
70.0%
71.0%
72.0%
73.0%
74.0%
75.0%
76.0%
77.0%
78.0%
79.0%
80.0%
81.0%
82.0%
83.0%
84.0%
85.0%
86.0%
87.0%
88.0%
89.0%
90.0%
91.0%
92.0%
93.0%
94.0%
95.0%
96.0%
97.0%
98.0%
99.0%
45 / 50 = 90%
tpr = 1.0
tnr = 0.0


In [None]:
from scipy.stats import chi2
import numpy as np
from sklearn.model_selection import train_test_split

data = all_data().filter(['PLEC', 'WIEK', 'WZROST', 'PRZENIESIENIE', 'HOSP_PRZYCZ', 'MASA_CIALA', 'BMI', 'RRS', 'RRD', 'PO2_ATM', 'ODDECH', 'AS', 'NT', 'DM', 'ASTMA', 'POCHP', 'HF', 'AF', 'UDAR', 'CHD', 'MI', 'ZAP_PLUC', 'PCHN', 'DEKSAMETEZON', 'HDCZ', 'BB', 'STATYNA', 'ASA', 'NOAC', 'MRA', 'ACE', 'SARTANY', 'CA_BLOKER', 'ZGON_LUB_OIT'])

np.set_printoptions(precision=2, suppress=True, linewidth= 10000000)


def niewiem(data):
    all_cols = ['WIEK', 'PRZENIESIENIE', 'MASA_CIALA', 'RRS', 'RRD', 'ODDECH', 'NT', 'DM', 'ASTMA', 'HF', 'AF', 'UDAR', 'CHD', 'MI', 'ZAP_PLUC', 'PCHN', 'HDCZ', 'BB', 'STATYNA', 'ASA', 'NOAC', 'MRA', 'ACE', 'SARTANY', 'CA_BLOKER']
    for col in all_cols:
        cov = np.cov(data[all_cols.dop(col)].values.T)
        print('{} -> {}'.format(col, np.linalg.det(cov)))

niewiem(data[data['WIEK'] <= 60])

for data in [data[data['WIEK'] <= 60]]:
# for data in [data[data['WIEK'] > 60], data[data['WIEK'] <= 60]]:

    train=data.sample(frac=0.8, random_state=200) #random state is a seed value
    test=data.drop(train.index)

    def mahalanobis(x=None, data=None, cov=None):

        x_mu = x - np.mean(data)
        if not cov:
            cov = np.cov(data.values.T)
        print(cov.shape)
        print(cov)
        inv_covmat = np.linalg.inv(cov)
        left = np.dot(x_mu, inv_covmat)
        mahal = np.dot(left, x_mu.T)
        return mahal.diagonal()


    #  'DEKSAMETEZON' , 'HOSP_PRZYCZ'
    # all_cols = ['WIEK', 'PRZENIESIENIE', 'MASA_CIALA', 'RRS', 'RRD', 'ODDECH', 'NT', 'DM', 'ASTMA', 'HF', 'AF', 'UDAR', 'CHD', 'MI', 'ZAP_PLUC', 'PCHN', 'HDCZ', 'BB', 'STATYNA', 'ASA', 'NOAC', 'MRA', 'ACE', 'SARTANY', 'CA_BLOKER']
#     all_cols = ['PLEC', 'WIEK', 'WZROST', 'PRZENIESIENIE', 'MASA_CIALA', 'BMI', 'RRS', 'RRD', 'PO2_ATM', 'ODDECH', 'AS', 'NT', 'DM', 'ASTMA', 'POCHP', 'HF', 'AF', 'UDAR', 'CHD', 'MI', 'ZAP_PLUC', 'PCHN', 'HDCZ', 'BB', 'STATYNA', 'ASA', 'NOAC', 'MRA', 'ACE', 'SARTANY', 'CA_BLOKER']
    all_cols = ['PLEC', 'WIEK', 'WZROST', 'PRZENIESIENIE', 'MASA_CIALA', 'BMI', 'RRS', 'RRD', 'PO2_ATM', 'ODDECH', 'AS', 'NT', 'DM', 'UDAR', 'PCHN', 'HDCZ', 'BB', 'STATYNA', 'ACE', 'SARTANY', 'CA_BLOKER']
    print(train[train['ZGON_LUB_OIT'] == 1][all_cols])
    test['mahalanobis_bad'] = mahalanobis(x=test[all_cols], data=train[train['ZGON_LUB_OIT'] == 1][all_cols])
    test['mahalanobis_good'] = mahalanobis(x=test[all_cols], data=train[train['ZGON_LUB_OIT'] == 0][all_cols])

    # test['mahalanobis_bad'] = mahalanobis(x=test[all_cols], data=train[train['ZGON_LUB_OIT'] == 1][all_cols])
    # test['mahalanobis_good'] = mahalanobis(x=test[all_cols], data=train[train['ZGON_LUB_OIT'] == 0][all_cols])

    test['mahalanobis_ZGON_LUB_OIT'] = test['mahalanobis_bad'] < test['mahalanobis_good']
    test[['ZGON_LUB_OIT', 'mahalanobis_ZGON_LUB_OIT', 'mahalanobis_good','mahalanobis_bad']]

    np.count_nonzero(test['mahalanobis_ZGON_LUB_OIT'] == True)
    np.count_nonzero(test['ZGON_LUB_OIT'] == True)
    godCount = np.count_nonzero(test['ZGON_LUB_OIT'] == test['mahalanobis_ZGON_LUB_OIT'])
    allCount = len(test)

    print('{} / {} = {}%'.format(godCount, allCount, int(godCount/allCount*100)))

# test



In [None]:
data['PLEC'] = data['PLEC']/2.0
data['WIEK'] = data['WIEK']/178.0
data['WZROST'] = data['WZROST']/408.0
data['PRZENIESIENIE'] = data['PRZENIESIENIE']/4.0
data['HOSP_PRZYCZ'] = data['HOSP_PRZYCZ']/2.0
data['MASA_CIALA'] = data['MASA_CIALA']/360.0
data['BMI'] = data['BMI']/632.8
data['RRS'] = data['RRS']/400.0
data['RRD'] = data['RRD']/250.0
data['PO2_ATM'] = data['PO2_ATM']/200.0
data['ODDECH'] = data['ODDECH']/168.0
data['AS'] = data['AS']/300.0
data['NT'] = data['NT']/4.0
data['DM'] = data['DM']/4.0
data['ASTMA'] = data['ASTMA']/4.0
data['POCHP'] = data['POCHP']/4.0
data['HF'] = data['HF']/4.0
data['AF'] = data['AF']/4.0
data['UDAR'] = data['UDAR']/4.0
data['CHD'] = data['CHD']/4.0
data['MI'] = data['MI']/4.0
data['ZAP_PLUC'] = data['ZAP_PLUC']/4.0
data['PCHN'] = data['PCHN']/4.0
data['DEKSAMETEZON'] = data['DEKSAMETEZON']/4.0
data['HDCZ'] = data['HDCZ']/4.0
data['BB'] = data['BB']/4.0
data['STATYNA'] = data['STATYNA']/4.0
data['ASA'] = data['ASA']/4.0
data['NOAC'] = data['NOAC']/4.0
data['MRA'] = data['MRA']/4.0
data['ACE'] = data['ACE']/4.0
data['SARTANY'] = data['SARTANY']/4.0
data['CA_BLOKER'] = data['CA_BLOKER']/4.0
data['ZGON_LUB_OIT'] = data['ZGON_LUB_OIT']/2.0

In [213]:
from sklearn.manifold import MDS
import matplotlib.pyplot as plt


# labels_in_data = ['ZGON_LUB_OIT', 'PLEC', 'WIEK', 'WZROST', 'PRZENIESIENIE', 'HOSP_PRZYCZ', 'MASA_CIALA', 'BMI', 'RRS', 'RRD', 'PO2_ATM', 'ODDECH', 'AS', 'NT', 'DM', 'ASTMA', 'POCHP', 'HF', 'AF', 'UDAR', 'CHD', 'MI', 'ZAP_PLUC', 'PCHN', 'DEKSAMETEZON', 'HDCZ', 'BB', 'STATYNA', 'ASA', 'NOAC', 'MRA', 'ACE', 'SARTANY', 'CA_BLOKER', 'n58.11.11342_pct', 'i81.11.1112_crp', 'g49.122.1113_dd', 'm05_il-6', 'o59_tnhs', 'n11.126.20.1cito_mlecz', 'm37.11.191_krea', 'c55.103.02_wbc', 'c55.103.02_plt']
labels_in_data = ['PLEC', 'WIEK', 'WZROST', 'PRZENIESIENIE', 'HOSP_PRZYCZ', 'MASA_CIALA', 'BMI', 'RRS', 'RRD', 'PO2_ATM', 'ODDECH', 'AS', 'NT', 'DM', 'ASTMA', 'POCHP', 'HF', 'AF', 'UDAR', 'CHD', 'MI', 'ZAP_PLUC', 'PCHN', 'DEKSAMETEZON', 'HDCZ', 'BB', 'STATYNA', 'ASA', 'NOAC', 'MRA', 'ACE', 'SARTANY', 'CA_BLOKER', 'n58.11.11342_pct', 'i81.11.1112_crp', 'g49.122.1113_dd', 'm05_il-6', 'o59_tnhs', 'n11.126.20.1cito_mlecz', 'm37.11.191_krea', 'c55.103.02_wbc', 'c55.103.02_plt']
data = all_data().filter(labels_in_data)

for header in labels_in_data:
    data = data.astype({header: np.float32})

# for x in ['ZAP_PLUC', 'WIEK', 'WZROST', 'NT', 'WZROST', 'MASA_CIALA', 'BMI', 'RRS', 'RRD', 'PO2_ATM', 'ODDECH', 'AS', 'n58.11.11342_pct', 'i81.11.1112_crp', 'g49.122.1113_dd', 'm05_il-6', 'o59_tnhs', 'n11.126.20.1cito_mlecz', 'm37.11.191_krea', 'c55.103.02_wbc', 'c55.103.02_plt']:
#     data[x] = data[x]/data[x].mean()
    
print(data.shape)

embedding = MDS(n_components=2)
data = embedding.fit_transform(data)

print(data.shape)


plt.plot(data)
plt.show()

ModuleNotFoundError: No module named 'matplotlib'