In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time

In [3]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))

### investigate data volume

In [3]:
%%time
with open('data/ukb46359.csv') as file:
    n_rows = len(file.readlines())

print ('Exact number of rows: {}'.format(n_rows))

Exact number of rows: 502482
Wall time: 1min 39s


In [5]:
df1 = pd.read_csv('data/ukb46359.csv',nrows=10000)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [6]:
df1.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Columns: 4009 entries, eid to 131423-0.0
dtypes: float64(3148), int64(6), object(855)
memory usage: 514.7 MB


In [7]:
df1.head()

Unnamed: 0,eid,31-0.0,44-0.0,44-1.0,44-2.0,44-3.0,48-0.0,48-1.0,48-2.0,48-3.0,...,131414-0.0,131415-0.0,131416-0.0,131417-0.0,131418-0.0,131419-0.0,131420-0.0,131421-0.0,131422-0.0,131423-0.0
0,1000010,0,4675.0,,,,74.0,,,,...,,,,,,,,,,
1,1000028,1,6952.0,,,,120.0,,,,...,,,,,,,,,,
2,1000034,0,6484.0,,,,66.0,,,,...,,,,,,,,,,
3,1000045,1,5001.0,,,,110.0,,,,...,,,,,,,,,,
4,1000052,1,4210.0,,,,94.0,,,,...,,,,,,,,,,


### define outcome columns

In [29]:
outcome_names = ['myocardial_infarction', 'cardiomyopathies', 'ischemic_heart_disease', 'heart_failure', 'peripheral_vascular_disease', 'cardiac_arrest', 'cerebral_infarction', 'arrhythmia']

# myocardial infarction(0:2)
myocardial_infarction = ['131298-0.0','131300-0.0']

# cardiomyopathies(2:4)
cardiomyopathies = ['131338-0.0','131340-0.0']

# ischemic heart disease(4:8)
ischemic_heart_disease = ['131296-0.0','131302-0.0','131304-0.0','131306-0.0']

# heart failure(8)
heart_failure = ['131354-0.0']


# peripheral vascular disease(13:15)
peripheral_vascular_disease = ['131380-0.0','131387-0.0']

# cardiac arrest (15)
cardiac_arrest = ['131346-0.0']

# cerebral infarction(16:18)
cerebral_infarction = ['131366-0.0','131368-0.0']

# arrhythmia(18:21)
arrhythmia = ['131348-0.0','131350-0.0','131352-0.0']

### define predictor variables

In [30]:
predictor_names = ['protected_attributes','physical_measures', 'sociodemographics','lifestyle_environment','environmental_factors','early_life_factors','mental_health', 'blood_assays']

# protected_attributes
# 1 - sex (encoding 9)
# 1458 - age at recruitment 
# 1447, 1448, 1449 - ethnic background (encoding 1001)
protected_attributes = ['31', '21003', '21000']

# physical_measures
# # hypertension(9:13)
# hypertension = ['131286-0.0','131288-0.0','131290-0.0','131292-0.0']
physical_measures = ['48','49','4079','4080','4194','21001','21002','21021','23099','23100','23101','23102','23105','23106', '131286','131288','131290','131292']

# sociodemographics
sociodemographics = ['31','189','757','767','796','806','816','826','6138','6142','21000','34','845']

# lifestyle_environment
lifestyle_environment = ['1160','1200','1239','1249','1289','1299','1309','1319','1329','1339','1349','1359','1369','1379','1389','1408','1418','1438','1448','1458','1468','1478','1488','1498','1508',
                        '20117','1548','1428','1220','1528','1538','2654','100240','100390', '100580','104670','20403','20414']

# environmental_factors
environmental_factors = ['1050','1060']

# early_life_factors
early_life_factors = ['1687','1697','20022','20491']

# mental_health
mental_health = ['2040','2050','2090','2100']

# blood_assays
blood_assays = ['30600','30610','30620','30630','30640','30650','30680','30690','30700','30710','30730','30740','30760','30780','30790','30870','30810','30820','30880','30670','30720','30770','30890',
                '30800','30850','30830','30750','30020','30080','30000','30150','30140']

### predictor and outcome lists

In [31]:
# list of all outcome variables
outcomes = myocardial_infarction+cardiomyopathies+ischemic_heart_disease+heart_failure+peripheral_vascular_disease+cardiac_arrest+cerebral_infarction+arrhythmia

# sequence of lists of outcome variables by category
outcome_seq = [myocardial_infarction, cardiomyopathies, ischemic_heart_disease, heart_failure, peripheral_vascular_disease, cardiac_arrest, cerebral_infarction, arrhythmia]

# list of all predictor variables including/excluding blood assays
predictors = protected_attributes+physical_measures+sociodemographics+lifestyle_environment+environmental_factors+early_life_factors+mental_health+blood_assays

# sequence of lists of predictor variables by category including/excluding assays
predictors_seq = protected_attributes,physical_measures,sociodemographics,lifestyle_environment,environmental_factors,early_life_factors,mental_health,blood_assays


In [32]:
len(predictors)

114

### get all columns

In [11]:
# predictor columns of all instances (107 features, 322 columns total)
predictor_cols = []
for predictor in predictors:
    for col in df1.columns:
        if col.startswith(predictor):
            predictor_cols.append(col)

In [12]:
# predictor columns of only first instances (107 features, X columns total)
predictor_firsts=[]
for predictor in predictors:
    predictor_firsts.append(predictor + '-0.0')

In [13]:
predictor_outcome_cols = predictor_cols+outcomes

In [14]:
print(len(predictor_outcome_cols))

334


In [15]:
predictor_firsts

['31-0.0',
 '21003-0.0',
 '21000-0.0',
 '48-0.0',
 '49-0.0',
 '4079-0.0',
 '4080-0.0',
 '4194-0.0',
 '21001-0.0',
 '21002-0.0',
 '21021-0.0',
 '23099-0.0',
 '23100-0.0',
 '23101-0.0',
 '23102-0.0',
 '23105-0.0',
 '23106-0.0',
 '131286-0.0',
 '131288-0.0',
 '131290-0.0',
 '131292-0.0',
 '31-0.0',
 '189-0.0',
 '757-0.0',
 '767-0.0',
 '796-0.0',
 '806-0.0',
 '816-0.0',
 '826-0.0',
 '6138-0.0',
 '6142-0.0',
 '21000-0.0',
 '34-0.0',
 '845-0.0',
 '1160-0.0',
 '1200-0.0',
 '1239-0.0',
 '1249-0.0',
 '1289-0.0',
 '1299-0.0',
 '1309-0.0',
 '1319-0.0',
 '1329-0.0',
 '1339-0.0',
 '1349-0.0',
 '1359-0.0',
 '1369-0.0',
 '1379-0.0',
 '1389-0.0',
 '1408-0.0',
 '1418-0.0',
 '1438-0.0',
 '1448-0.0',
 '1458-0.0',
 '1468-0.0',
 '1478-0.0',
 '1488-0.0',
 '1498-0.0',
 '1508-0.0',
 '20117-0.0',
 '1548-0.0',
 '1428-0.0',
 '1220-0.0',
 '1528-0.0',
 '1538-0.0',
 '2654-0.0',
 '100240-0.0',
 '100390-0.0',
 '100580-0.0',
 '104670-0.0',
 '20403-0.0',
 '20414-0.0',
 '1050-0.0',
 '1060-0.0',
 '1687-0.0',
 '1697-0.0',

In [16]:
", ".join(df1.columns)

'eid, 31-0.0, 44-0.0, 44-1.0, 44-2.0, 44-3.0, 48-0.0, 48-1.0, 48-2.0, 48-3.0, 49-0.0, 49-1.0, 49-2.0, 49-3.0, 53-0.0, 53-1.0, 53-2.0, 53-3.0, 54-0.0, 54-1.0, 54-2.0, 54-3.0, 55-0.0, 55-1.0, 55-2.0, 55-3.0, 74-0.0, 74-1.0, 74-2.0, 74-3.0, 87-0.0, 87-0.1, 87-0.2, 87-0.3, 87-0.4, 87-0.5, 87-0.6, 87-0.7, 87-0.8, 87-0.9, 87-0.10, 87-0.11, 87-0.12, 87-0.13, 87-0.14, 87-0.15, 87-0.16, 87-0.17, 87-0.18, 87-0.19, 87-0.20, 87-0.21, 87-0.22, 87-0.23, 87-0.24, 87-0.25, 87-0.26, 87-0.27, 87-0.28, 87-0.29, 87-0.30, 87-0.31, 87-0.32, 87-0.33, 87-1.0, 87-1.1, 87-1.2, 87-1.3, 87-1.4, 87-1.5, 87-1.6, 87-1.7, 87-1.8, 87-1.9, 87-1.10, 87-1.11, 87-1.12, 87-1.13, 87-1.14, 87-1.15, 87-1.16, 87-1.17, 87-1.18, 87-1.19, 87-1.20, 87-1.21, 87-1.22, 87-1.23, 87-1.24, 87-1.25, 87-1.26, 87-1.27, 87-1.28, 87-1.29, 87-1.30, 87-1.31, 87-1.32, 87-1.33, 87-2.0, 87-2.1, 87-2.2, 87-2.3, 87-2.4, 87-2.5, 87-2.6, 87-2.7, 87-2.8, 87-2.9, 87-2.10, 87-2.11, 87-2.12, 87-2.13, 87-2.14, 87-2.15, 87-2.16, 87-2.17, 87-2.18, 87-2.19, 

### check features exist in df1 

In [17]:
full_features_joined = list(set(df1.columns) & set(predictor_cols))
first_features_joined = list(set(df1.columns) & set(predictor_firsts))
print('Number of columns present for all feature instances: ' + str((len(full_features_joined))) + ' / '+ str(len(predictor_cols)))
print('Number of columns present for first feature instances: ' + str((len(first_features_joined))) + ' / '+ str(len(predictor_firsts)))

Number of columns present for all feature instances: 297 / 317
Number of columns present for first feature instances: 67 / 114


In [18]:
lost_predictors = []
for predictor in predictor_firsts:
    if predictor in df1.columns:
        continue
    else:
        lost_predictors.append(predictor)

In [19]:
print(lost_predictors)

['4194-0.0', '21002-0.0', '21021-0.0', '23102-0.0', '23105-0.0', '23106-0.0', '189-0.0', '757-0.0', '767-0.0', '796-0.0', '806-0.0', '816-0.0', '826-0.0', '34-0.0', '1498-0.0', '100240-0.0', '100390-0.0', '100580-0.0', '104670-0.0', '1050-0.0', '1060-0.0', '1687-0.0', '1697-0.0', '20022-0.0', '2040-0.0', '30600-0.0', '30610-0.0', '30620-0.0', '30650-0.0', '30680-0.0', '30700-0.0', '30730-0.0', '30810-0.0', '30820-0.0', '30880-0.0', '30670-0.0', '30720-0.0', '30890-0.0', '30800-0.0', '30830-0.0', '30020-0.0', '30080-0.0', '30000-0.0', '30150-0.0', '30140-0.0']


### Filter df1 for needed columns

In [20]:
df1 = df1.loc[:,first_features_joined+outcomes]
df1.shape

(10000, 84)

In [21]:
pd.set_option('display.max_columns', None)
df1.head()

Unnamed: 0,1319-0.0,1408-0.0,1329-0.0,1448-0.0,1538-0.0,6142-0.0,2050-0.0,1508-0.0,1339-0.0,30710-0.0,1349-0.0,30750-0.0,1468-0.0,20117-0.0,30740-0.0,1160-0.0,2090-0.0,31-0.0,131290-0.0,1488-0.0,131286-0.0,30850-0.0,4080-0.0,1369-0.0,21000-0.0,1200-0.0,1289-0.0,30790-0.0,845-0.0,20491-0.0,48-0.0,30630-0.0,1299-0.0,131288-0.0,1220-0.0,1548-0.0,1528-0.0,23099-0.0,49-0.0,30690-0.0,1389-0.0,2654-0.0,1249-0.0,1309-0.0,1379-0.0,1239-0.0,21003-0.0,20403-0.0,30780-0.0,1438-0.0,131292-0.0,30870-0.0,1359-0.0,30770-0.0,21001-0.0,1458-0.0,23100-0.0,6138-0.0,1418-0.0,1478-0.0,4079-0.0,20414-0.0,30760-0.0,23101-0.0,2100-0.0,1428-0.0,30640-0.0,131298-0.0,131300-0.0,131338-0.0,131340-0.0,131296-0.0,131302-0.0,131304-0.0,131306-0.0,131354-0.0,131380-0.0,131387-0.0,131346-0.0,131366-0.0,131368-0.0,131348-0.0,131350-0.0,131352-0.0
0,0.0,1.0,2.0,3.0,2.0,1.0,2.0,3.0,2.0,0.34,1.0,,,2.0,5.622,7.0,1.0,0,,6.0,,0.508,110.0,1.0,1001.0,3.0,6.0,,,4.0,74.0,1.593,10.0,,0.0,2.0,2.0,35.6,102.0,6.477,1.0,,1.0,2.0,1.0,0.0,54,2.0,3.888,10.0,,0.977,2.0,26.339,24.579,-10.0,25.0,1.0,3.0,1.0,77.0,4.0,1.706,45.2,1.0,0.0,1.211,,,,,,,,,,,,,,,,,2008-07-01
1,0.0,3.0,2.0,-1.0,0.0,1.0,-1.0,2.0,2.0,3.94,4.0,40.9,5.0,2.0,5.052,9.0,0.0,1,,2.0,2010-08-16,13.088,166.0,2.0,1001.0,2.0,2.0,15.4,16.0,,120.0,1.39,2.0,,0.0,2.0,-10.0,36.5,113.0,5.512,1.0,,1.0,1.0,2.0,0.0,65,,3.52,12.0,,2.358,3.0,10.701,35.0861,7.0,42.9,-7.0,2.0,1.0,91.0,,1.173,74.6,0.0,1.0,1.019,1997-08-01,,,,,,,2010-08-16,,,,,,,,,
2,0.0,3.0,3.0,2.0,1.0,2.0,1.0,2.0,2.0,0.55,1.0,40.0,1.0,0.0,5.31,5.0,0.0,0,,0.0,,0.515,132.0,1.0,1001.0,3.0,2.0,32.1,16.0,,66.0,2.005,4.0,,0.0,1.0,1.0,29.5,88.0,7.079,1.0,,3.0,4.0,2.0,0.0,69,,4.227,8.0,,0.655,2.0,10.693,19.3835,7.0,15.2,3.0,2.0,1.0,67.0,,2.49,36.3,0.0,1.0,1.097,,,,,,,,,,,,,,,,,
3,3.0,3.0,3.0,3.0,0.0,2.0,1.0,,2.0,0.45,2.0,37.3,4.0,2.0,4.449,7.0,0.0,1,,5.0,,4.675,178.0,2.0,1001.0,1.0,3.0,,18.0,,110.0,1.474,2.0,,0.0,1.0,2.0,28.5,117.0,5.028,0.0,7.0,,1.0,2.0,1.0,66,,3.041,10.0,,3.108,2.0,25.317,35.1281,7.0,31.7,3.0,2.0,1.0,84.0,,1.169,79.6,0.0,3.0,0.923,,,,,,,,,,,,,,,,,
4,0.0,3.0,2.0,1.0,0.0,5.0,2.0,2.0,2.0,0.75,2.0,32.2,1.0,2.0,4.616,6.0,0.0,1,,-10.0,,20.162,178.0,1.0,1001.0,3.0,1.0,71.11,,,94.0,2.149,1.0,,0.0,2.0,2.0,24.8,100.0,7.958,1.0,7.0,2.0,1.0,1.0,0.0,48,,4.983,8.0,,1.173,1.0,26.523,25.8866,1.0,20.1,1.0,2.0,1.0,88.0,,2.053,61.0,0.0,3.0,1.443,,,,,,,,,,,,,,,,,


### transform outcome columns to show a binary result per pathology

In [22]:
df1['131338-0.0'].value_counts()

2018-02-06    2
2015-10-01    1
2001-09-25    1
2003-03-16    1
2013-10-29    1
             ..
2013-04-01    1
2007-11-06    1
2020-05-20    1
2012-05-02    1
2010-05-15    1
Name: 131338-0.0, Length: 69, dtype: int64

In [23]:
cardiomyopathy_result = df1['131338-0.0']
y = np.where(cardiomyopathy_result.notnull(),1,0)
count_arr = np.bincount(y)

In [24]:
print(df1['131338-0.0'].isna().sum())
print(len(df1['131338-0.0'].index))

9930
10000


In [25]:
for outcome in outcomes:
    df1[outcome] = np.where(df1[outcome].notnull(),1,0)

In [26]:
df1.head()

Unnamed: 0,1319-0.0,1408-0.0,1329-0.0,1448-0.0,1538-0.0,6142-0.0,2050-0.0,1508-0.0,1339-0.0,30710-0.0,1349-0.0,30750-0.0,1468-0.0,20117-0.0,30740-0.0,1160-0.0,2090-0.0,31-0.0,131290-0.0,1488-0.0,131286-0.0,30850-0.0,4080-0.0,1369-0.0,21000-0.0,1200-0.0,1289-0.0,30790-0.0,845-0.0,20491-0.0,48-0.0,30630-0.0,1299-0.0,131288-0.0,1220-0.0,1548-0.0,1528-0.0,23099-0.0,49-0.0,30690-0.0,1389-0.0,2654-0.0,1249-0.0,1309-0.0,1379-0.0,1239-0.0,21003-0.0,20403-0.0,30780-0.0,1438-0.0,131292-0.0,30870-0.0,1359-0.0,30770-0.0,21001-0.0,1458-0.0,23100-0.0,6138-0.0,1418-0.0,1478-0.0,4079-0.0,20414-0.0,30760-0.0,23101-0.0,2100-0.0,1428-0.0,30640-0.0,131298-0.0,131300-0.0,131338-0.0,131340-0.0,131296-0.0,131302-0.0,131304-0.0,131306-0.0,131354-0.0,131380-0.0,131387-0.0,131346-0.0,131366-0.0,131368-0.0,131348-0.0,131350-0.0,131352-0.0
0,0.0,1.0,2.0,3.0,2.0,1.0,2.0,3.0,2.0,0.34,1.0,,,2.0,5.622,7.0,1.0,0,,6.0,,0.508,110.0,1.0,1001.0,3.0,6.0,,,4.0,74.0,1.593,10.0,,0.0,2.0,2.0,35.6,102.0,6.477,1.0,,1.0,2.0,1.0,0.0,54,2.0,3.888,10.0,,0.977,2.0,26.339,24.579,-10.0,25.0,1.0,3.0,1.0,77.0,4.0,1.706,45.2,1.0,0.0,1.211,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
1,0.0,3.0,2.0,-1.0,0.0,1.0,-1.0,2.0,2.0,3.94,4.0,40.9,5.0,2.0,5.052,9.0,0.0,1,,2.0,2010-08-16,13.088,166.0,2.0,1001.0,2.0,2.0,15.4,16.0,,120.0,1.39,2.0,,0.0,2.0,-10.0,36.5,113.0,5.512,1.0,,1.0,1.0,2.0,0.0,65,,3.52,12.0,,2.358,3.0,10.701,35.0861,7.0,42.9,-7.0,2.0,1.0,91.0,,1.173,74.6,0.0,1.0,1.019,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
2,0.0,3.0,3.0,2.0,1.0,2.0,1.0,2.0,2.0,0.55,1.0,40.0,1.0,0.0,5.31,5.0,0.0,0,,0.0,,0.515,132.0,1.0,1001.0,3.0,2.0,32.1,16.0,,66.0,2.005,4.0,,0.0,1.0,1.0,29.5,88.0,7.079,1.0,,3.0,4.0,2.0,0.0,69,,4.227,8.0,,0.655,2.0,10.693,19.3835,7.0,15.2,3.0,2.0,1.0,67.0,,2.49,36.3,0.0,1.0,1.097,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,3.0,3.0,3.0,3.0,0.0,2.0,1.0,,2.0,0.45,2.0,37.3,4.0,2.0,4.449,7.0,0.0,1,,5.0,,4.675,178.0,2.0,1001.0,1.0,3.0,,18.0,,110.0,1.474,2.0,,0.0,1.0,2.0,28.5,117.0,5.028,0.0,7.0,,1.0,2.0,1.0,66,,3.041,10.0,,3.108,2.0,25.317,35.1281,7.0,31.7,3.0,2.0,1.0,84.0,,1.169,79.6,0.0,3.0,0.923,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0.0,3.0,2.0,1.0,0.0,5.0,2.0,2.0,2.0,0.75,2.0,32.2,1.0,2.0,4.616,6.0,0.0,1,,-10.0,,20.162,178.0,1.0,1001.0,3.0,1.0,71.11,,,94.0,2.149,1.0,,0.0,2.0,2.0,24.8,100.0,7.958,1.0,7.0,2.0,1.0,1.0,0.0,48,,4.983,8.0,,1.173,1.0,26.523,25.8866,1.0,20.1,1.0,2.0,1.0,88.0,,2.053,61.0,0.0,3.0,1.443,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [27]:
df1.shape

(10000, 84)

### convert to one column per outcome

In [28]:
for name,outcome in zip(outcome_names,outcome_seq):
    df1['outcome_'+str(name)] = 0
    for subcategory in outcome:
        selected = df1[df1[subcategory]==1].index
        df1['outcome_'+str(name)].iloc[selected] = 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, val

### drop original outcome columns

In [29]:
df1.drop(outcomes, axis=1, inplace=True)
df1.head()

Unnamed: 0,1319-0.0,1408-0.0,1329-0.0,1448-0.0,1538-0.0,6142-0.0,2050-0.0,1508-0.0,1339-0.0,30710-0.0,1349-0.0,30750-0.0,1468-0.0,20117-0.0,30740-0.0,1160-0.0,2090-0.0,31-0.0,131290-0.0,1488-0.0,131286-0.0,30850-0.0,4080-0.0,1369-0.0,21000-0.0,1200-0.0,1289-0.0,30790-0.0,845-0.0,20491-0.0,48-0.0,30630-0.0,1299-0.0,131288-0.0,1220-0.0,1548-0.0,1528-0.0,23099-0.0,49-0.0,30690-0.0,1389-0.0,2654-0.0,1249-0.0,1309-0.0,1379-0.0,1239-0.0,21003-0.0,20403-0.0,30780-0.0,1438-0.0,131292-0.0,30870-0.0,1359-0.0,30770-0.0,21001-0.0,1458-0.0,23100-0.0,6138-0.0,1418-0.0,1478-0.0,4079-0.0,20414-0.0,30760-0.0,23101-0.0,2100-0.0,1428-0.0,30640-0.0,outcome_myocardial_infarction,outcome_cardiomyopathies,outcome_ischemic_heart_disease,outcome_heart_failure,outcome_peripheral_vascular_disease,outcome_cardiac_arrest,outcome_cerebral_infarction,outcome_arrhythmia
0,0.0,1.0,2.0,3.0,2.0,1.0,2.0,3.0,2.0,0.34,1.0,,,2.0,5.622,7.0,1.0,0,,6.0,,0.508,110.0,1.0,1001.0,3.0,6.0,,,4.0,74.0,1.593,10.0,,0.0,2.0,2.0,35.6,102.0,6.477,1.0,,1.0,2.0,1.0,0.0,54,2.0,3.888,10.0,,0.977,2.0,26.339,24.579,-10.0,25.0,1.0,3.0,1.0,77.0,4.0,1.706,45.2,1.0,0.0,1.211,0,0,0,0,0,0,0,1
1,0.0,3.0,2.0,-1.0,0.0,1.0,-1.0,2.0,2.0,3.94,4.0,40.9,5.0,2.0,5.052,9.0,0.0,1,,2.0,2010-08-16,13.088,166.0,2.0,1001.0,2.0,2.0,15.4,16.0,,120.0,1.39,2.0,,0.0,2.0,-10.0,36.5,113.0,5.512,1.0,,1.0,1.0,2.0,0.0,65,,3.52,12.0,,2.358,3.0,10.701,35.0861,7.0,42.9,-7.0,2.0,1.0,91.0,,1.173,74.6,0.0,1.0,1.019,1,0,1,0,0,0,0,0
2,0.0,3.0,3.0,2.0,1.0,2.0,1.0,2.0,2.0,0.55,1.0,40.0,1.0,0.0,5.31,5.0,0.0,0,,0.0,,0.515,132.0,1.0,1001.0,3.0,2.0,32.1,16.0,,66.0,2.005,4.0,,0.0,1.0,1.0,29.5,88.0,7.079,1.0,,3.0,4.0,2.0,0.0,69,,4.227,8.0,,0.655,2.0,10.693,19.3835,7.0,15.2,3.0,2.0,1.0,67.0,,2.49,36.3,0.0,1.0,1.097,0,0,0,0,0,0,0,0
3,3.0,3.0,3.0,3.0,0.0,2.0,1.0,,2.0,0.45,2.0,37.3,4.0,2.0,4.449,7.0,0.0,1,,5.0,,4.675,178.0,2.0,1001.0,1.0,3.0,,18.0,,110.0,1.474,2.0,,0.0,1.0,2.0,28.5,117.0,5.028,0.0,7.0,,1.0,2.0,1.0,66,,3.041,10.0,,3.108,2.0,25.317,35.1281,7.0,31.7,3.0,2.0,1.0,84.0,,1.169,79.6,0.0,3.0,0.923,0,0,0,0,0,0,0,0
4,0.0,3.0,2.0,1.0,0.0,5.0,2.0,2.0,2.0,0.75,2.0,32.2,1.0,2.0,4.616,6.0,0.0,1,,-10.0,,20.162,178.0,1.0,1001.0,3.0,1.0,71.11,,,94.0,2.149,1.0,,0.0,2.0,2.0,24.8,100.0,7.958,1.0,7.0,2.0,1.0,1.0,0.0,48,,4.983,8.0,,1.173,1.0,26.523,25.8866,1.0,20.1,1.0,2.0,1.0,88.0,,2.053,61.0,0.0,3.0,1.443,0,0,0,0,0,0,0,0


***

### Perform preprocessing on entire dataset

In [37]:
chunks = []
for chunk in pd.read_csv('data/ukb46359.csv',chunksize=10000):

    chunk = chunk.reset_index(drop=True)
    
    # select predictor and outcome columns from entire dataframe
    chunk = chunk.loc[:,first_features_joined+outcomes]
    
    # transform outcome columns to show a binary result per pathology
    for outcome in outcomes:
        chunk[outcome] = np.where(chunk[outcome].notnull(),1,0)
    
    # convert to one column per outcome
    for name,outcome in zip(outcome_names,outcome_seq):
        chunk['outcome_'+str(name)] = 0
        for subcategory in outcome:
            selected = chunk[chunk[subcategory]==1].index
            chunk['outcome_'+str(name)].iloc[selected] = 1
    
    # drop original outcome columns
    chunk.drop(outcomes, axis=1, inplace=True)
    
    # append processed chunk to dataframe
    chunks.append(chunk)

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#return

In [38]:
chunks

[      1319-0.0  1408-0.0  1329-0.0  1448-0.0  1538-0.0  6142-0.0  2050-0.0  \
 0          0.0       1.0       2.0       3.0       2.0       1.0       2.0   
 1          0.0       3.0       2.0      -1.0       0.0       1.0      -1.0   
 2          0.0       3.0       3.0       2.0       1.0       2.0       1.0   
 3          3.0       3.0       3.0       3.0       0.0       2.0       1.0   
 4          0.0       3.0       2.0       1.0       0.0       5.0       2.0   
 ...        ...       ...       ...       ...       ...       ...       ...   
 9995       0.0       5.0       1.0       4.0       2.0       2.0       2.0   
 9996       0.0       3.0       1.0       2.0       0.0       1.0       1.0   
 9997       2.0       4.0       1.0       3.0       2.0       2.0       1.0   
 9998       0.0       2.0       2.0       3.0       0.0       1.0       1.0   
 9999       0.0       1.0       1.0       3.0       2.0       1.0       1.0   
 
       1508-0.0  1339-0.0  30710-0.0  1349-0.0  30

In [39]:
df = pd.concat(chunks)

In [40]:
df

Unnamed: 0,1319-0.0,1408-0.0,1329-0.0,1448-0.0,1538-0.0,6142-0.0,2050-0.0,1508-0.0,1339-0.0,30710-0.0,1349-0.0,30750-0.0,1468-0.0,20117-0.0,30740-0.0,1160-0.0,2090-0.0,31-0.0,131290-0.0,1488-0.0,131286-0.0,30850-0.0,4080-0.0,1369-0.0,21000-0.0,1200-0.0,1289-0.0,30790-0.0,845-0.0,20491-0.0,48-0.0,30630-0.0,1299-0.0,131288-0.0,1220-0.0,1548-0.0,1528-0.0,23099-0.0,49-0.0,30690-0.0,1389-0.0,2654-0.0,1249-0.0,1309-0.0,1379-0.0,1239-0.0,21003-0.0,20403-0.0,30780-0.0,1438-0.0,131292-0.0,30870-0.0,1359-0.0,30770-0.0,21001-0.0,1458-0.0,23100-0.0,6138-0.0,1418-0.0,1478-0.0,4079-0.0,20414-0.0,30760-0.0,23101-0.0,2100-0.0,1428-0.0,30640-0.0,outcome_myocardial_infarction,outcome_cardiomyopathies,outcome_ischemic_heart_disease,outcome_heart_failure,outcome_peripheral_vascular_disease,outcome_cardiac_arrest,outcome_cerebral_infarction,outcome_arrhythmia
0,0.0,1.0,2.0,3.0,2.0,1.0,2.0,3.0,2.0,0.34,1.0,,,2.0,5.622,7.0,1.0,0.0,,6.0,,0.508,110.0,1.0,1001.0,3.0,6.0,,,4.0,74.0,1.593,10.0,,0.0,2.0,2.0,35.6,102.0,6.477,1.0,,1.0,2.0,1.0,0.0,54.0,2.0,3.888,10.0,,0.977,2.0,26.339,24.5790,-10.0,25.0,1.0,3.0,1.0,77.0,4.0,1.706,45.2,1.0,0.0,1.211,0,0,0,0,0,0,0,1
1,0.0,3.0,2.0,-1.0,0.0,1.0,-1.0,2.0,2.0,3.94,4.0,40.9,5.0,2.0,5.052,9.0,0.0,1.0,,2.0,2010-08-16,13.088,166.0,2.0,1001.0,2.0,2.0,15.40,16.0,,120.0,1.390,2.0,,0.0,2.0,-10.0,36.5,113.0,5.512,1.0,,1.0,1.0,2.0,0.0,65.0,,3.520,12.0,,2.358,3.0,10.701,35.0861,7.0,42.9,-7.0,2.0,1.0,91.0,,1.173,74.6,0.0,1.0,1.019,1,0,1,0,0,0,0,0
2,0.0,3.0,3.0,2.0,1.0,2.0,1.0,2.0,2.0,0.55,1.0,40.0,1.0,0.0,5.310,5.0,0.0,0.0,,0.0,,0.515,132.0,1.0,1001.0,3.0,2.0,32.10,16.0,,66.0,2.005,4.0,,0.0,1.0,1.0,29.5,88.0,7.079,1.0,,3.0,4.0,2.0,0.0,69.0,,4.227,8.0,,0.655,2.0,10.693,19.3835,7.0,15.2,3.0,2.0,1.0,67.0,,2.490,36.3,0.0,1.0,1.097,0,0,0,0,0,0,0,0
3,3.0,3.0,3.0,3.0,0.0,2.0,1.0,,2.0,0.45,2.0,37.3,4.0,2.0,4.449,7.0,0.0,1.0,,5.0,,4.675,178.0,2.0,1001.0,1.0,3.0,,18.0,,110.0,1.474,2.0,,0.0,1.0,2.0,28.5,117.0,5.028,0.0,7.0,,1.0,2.0,1.0,66.0,,3.041,10.0,,3.108,2.0,25.317,35.1281,7.0,31.7,3.0,2.0,1.0,84.0,,1.169,79.6,0.0,3.0,0.923,0,0,0,0,0,0,0,0
4,0.0,3.0,2.0,1.0,0.0,5.0,2.0,2.0,2.0,0.75,2.0,32.2,1.0,2.0,4.616,6.0,0.0,1.0,,-10.0,,20.162,178.0,1.0,1001.0,3.0,1.0,71.11,,,94.0,2.149,1.0,,0.0,2.0,2.0,24.8,100.0,7.958,1.0,7.0,2.0,1.0,1.0,0.0,48.0,,4.983,8.0,,1.173,1.0,26.523,25.8866,1.0,20.1,1.0,2.0,1.0,88.0,,2.053,61.0,0.0,3.0,1.443,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2476,0.0,3.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.45,3.0,28.0,1.0,2.0,4.509,7.0,0.0,1.0,,0.0,,17.137,147.0,1.0,1001.0,2.0,2.0,5.30,16.0,4.0,110.0,1.345,0.0,,0.0,2.0,0.0,27.0,113.0,5.754,1.0,4.0,1.0,2.0,1.0,2.0,63.0,3.0,3.843,4.0,,3.820,2.0,20.777,30.6094,7.0,29.8,-7.0,1.0,3.0,91.0,3.0,1.026,80.7,0.0,3.0,1.137,0,0,0,0,0,0,0,0
2477,0.0,1.0,1.0,3.0,2.0,2.0,1.0,1.0,1.0,2.75,1.0,33.2,1.0,1.0,5.673,6.0,1.0,0.0,,0.0,2011-02-23,0.725,148.0,2.0,1001.0,3.0,4.0,5.09,16.0,,102.0,1.365,3.0,,0.0,3.0,3.0,46.3,110.0,4.664,2.0,,1.0,6.0,2.0,0.0,64.0,,2.863,4.0,,2.134,3.0,23.647,31.4652,7.0,38.7,3.0,3.0,2.0,73.0,,1.159,44.9,0.0,1.0,0.898,0,0,1,0,0,0,0,0
2478,2.0,1.0,1.0,2.0,2.0,-7.0,4.0,2.0,2.0,0.85,3.0,31.5,3.0,2.0,4.851,7.0,1.0,1.0,,20.0,2020-07-13,12.040,133.0,2.0,1001.0,2.0,2.0,,-2.0,,99.0,1.241,2.0,,1.0,3.0,1.0,25.7,98.0,5.184,1.0,,1.0,2.0,2.0,0.0,43.0,,3.358,20.0,,2.547,3.0,22.507,28.6196,10.0,23.6,-7.0,1.0,1.0,81.0,,1.043,68.1,1.0,1.0,0.985,1,0,1,0,0,1,0,1
2479,3.0,3.0,1.0,3.0,0.0,1.0,1.0,,2.0,3.57,2.0,45.7,1.0,2.0,5.506,6.0,0.0,0.0,,4.0,,,138.0,1.0,1001.0,3.0,4.0,27.48,,4.0,86.0,1.669,2.0,,0.0,2.0,2.0,41.3,111.0,6.827,1.0,,4.0,1.0,1.0,0.0,56.0,1.0,4.200,12.0,,2.090,3.0,30.498,27.3702,7.0,32.7,1.0,2.0,1.0,83.0,1.0,1.589,46.4,0.0,0.0,1.063,0,0,0,0,0,0,0,0


### export processed csv

In [41]:
df.to_csv('preprocessed-binary_CVD.csv')

### alter preprocessed data for adjusted problem statement

In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
from sklearn import preprocessing
from sklearn.ensemble import RandomForestRegressor
from collections import Counter
from sklearn.datasets import make_classification
from imblearn.over_sampling import ADASYN 
import sys
import sklearn.neighbors._base
sys.modules['sklearn.neighbors.base'] = sklearn.neighbors._base

import global_variables as gv

In [6]:
df = pd.read_csv(gv.pre_data_link)
df.drop('Unnamed: 0', axis=1, inplace=True)

df.rename(columns = {'outcome_hypertension':'hypertension'}, inplace=True)

In [7]:
df

Unnamed: 0,30850-0.0,30780-0.0,30690-0.0,1488-0.0,30790-0.0,1418-0.0,1329-0.0,4079-0.0,1220-0.0,23101-0.0,...,1448-0.0,outcome_myocardial_infarction,outcome_cardiomyopathies,outcome_ischemic_heart_disease,outcome_heart_failure,hypertension,outcome_peripheral_vascular_disease,outcome_cardiac_arrest,outcome_cerebral_infarction,outcome_arrhythmia
0,0.508,3.888,6.477,6.0,,3.0,2.0,77.0,0.0,45.2,...,3.0,0,0,0,0,0,0,0,0,1
1,13.088,3.520,5.512,2.0,15.40,2.0,2.0,91.0,0.0,74.6,...,-1.0,1,0,1,0,1,0,0,0,0
2,4.675,3.041,5.028,5.0,,2.0,3.0,84.0,0.0,79.6,...,3.0,0,0,0,0,0,0,0,0,0
3,,,,0.0,,2.0,1.0,99.0,0.0,71.7,...,3.0,0,0,1,0,1,0,1,1,1
4,1.788,2.887,5.565,0.0,,2.0,2.0,71.0,0.0,40.2,...,3.0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101724,,3.875,6.190,0.0,169.20,2.0,0.0,82.0,1.0,46.9,...,3.0,0,0,0,0,1,0,0,1,1
101725,9.036,2.467,4.035,0.0,,1.0,1.0,90.0,0.0,66.3,...,2.0,0,0,0,1,1,0,0,0,0
101726,0.485,3.802,6.507,4.0,,2.0,3.0,69.0,0.0,41.6,...,3.0,0,0,1,0,0,0,0,0,0
101727,0.725,2.863,4.664,0.0,5.09,3.0,1.0,73.0,0.0,44.9,...,3.0,0,0,1,0,1,0,0,0,0


In [4]:
df2 = df[~((df['outcome_myocardial_infarction'] == 0) & (df['outcome_cardiomyopathies'] == 0) & (df['outcome_ischemic_heart_disease'] == 0) & (df['outcome_heart_failure'] == 0) & (df['outcome_peripheral_vascular_disease'] == 0) & (df['outcome_cardiac_arrest'] == 0) & (df['outcome_cerebral_infarction'] == 0) & (df['outcome_arrhythmia'] == 0))]

In [5]:
df2

Unnamed: 0,30850-0.0,30780-0.0,30690-0.0,1488-0.0,30790-0.0,1418-0.0,1329-0.0,4079-0.0,1220-0.0,23101-0.0,...,outcome_myocardial_infarction,outcome_cardiomyopathies,outcome_ischemic_heart_disease,outcome_heart_failure,hypertension,outcome_peripheral_vascular_disease,outcome_cardiac_arrest,outcome_cerebral_infarction,outcome_arrhythmia,age
0,0.508,3.888,6.477,6.0,,3.0,2.0,77.0,0.0,45.2,...,0,0,0,0,0,0,0,0,1,54.0
1,13.088,3.520,5.512,2.0,15.40,2.0,2.0,91.0,0.0,74.6,...,1,0,1,0,1,0,0,0,0,65.0
3,,,,0.0,,2.0,1.0,99.0,0.0,71.7,...,0,0,1,0,1,0,1,1,1,55.0
4,1.788,2.887,5.565,0.0,,2.0,2.0,71.0,0.0,40.2,...,0,0,0,0,0,0,0,0,1,49.0
5,0.756,2.670,4.680,7.0,4.77,3.0,2.0,73.0,1.0,46.5,...,1,0,0,0,1,0,0,0,0,61.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101724,,3.875,6.190,0.0,169.20,2.0,0.0,82.0,1.0,46.9,...,0,0,0,0,1,0,0,1,1,68.0
101725,9.036,2.467,4.035,0.0,,1.0,1.0,90.0,0.0,66.3,...,0,0,0,1,1,0,0,0,0,58.0
101726,0.485,3.802,6.507,4.0,,2.0,3.0,69.0,0.0,41.6,...,0,0,1,0,0,0,0,0,0,57.0
101727,0.725,2.863,4.664,0.0,5.09,3.0,1.0,73.0,0.0,44.9,...,0,0,1,0,1,0,0,0,0,64.0


In [10]:
cols= gv.continuous_cols+gv.numerical_cols+gv.categorical_cols
col_list2 = cols+list(df)[61:]
df=df.loc[:,col_list2]
df2=df2.loc[:,col_list2]

In [11]:
df2

Unnamed: 0,30850-0.0,30780-0.0,30690-0.0,30790-0.0,23101-0.0,23099-0.0,48-0.0,23100-0.0,30710-0.0,30760-0.0,...,1448-0.0,outcome_myocardial_infarction,outcome_cardiomyopathies,outcome_ischemic_heart_disease,outcome_heart_failure,hypertension,outcome_peripheral_vascular_disease,outcome_cardiac_arrest,outcome_cerebral_infarction,outcome_arrhythmia
0,0.508,3.888,6.477,,45.2,35.6,74.0,25.0,0.34,1.706,...,3.0,0,0,0,0,0,0,0,0,1
1,13.088,3.520,5.512,15.40,74.6,36.5,120.0,42.9,3.94,1.173,...,-1.0,1,0,1,0,1,0,0,0,0
3,,,,,71.7,29.7,112.0,30.3,,,...,3.0,0,0,1,0,1,0,1,1,1
4,1.788,2.887,5.565,,40.2,29.8,67.0,17.0,0.87,2.115,...,3.0,0,0,0,0,0,0,0,0,1
5,0.756,2.670,4.680,4.77,46.5,30.1,85.0,20.0,0.18,1.493,...,3.0,1,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
101724,,3.875,6.190,169.20,46.9,35.8,94.0,26.2,3.83,1.008,...,3.0,0,0,0,0,1,0,0,1,1
101725,9.036,2.467,4.035,,66.3,36.9,114.0,38.7,2.24,1.087,...,2.0,0,0,0,1,1,0,0,0,0
101726,0.485,3.802,6.507,,41.6,37.1,82.0,24.5,0.52,1.857,...,3.0,0,0,1,0,0,0,0,0,0
101727,0.725,2.863,4.664,5.09,44.9,46.3,102.0,38.7,2.75,1.159,...,3.0,0,0,1,0,1,0,0,0,0


In [21]:
df2.to_csv('data/tabnet_preprocessed.csv')

### Second Alternative: keep all records and create binary CVD column

In [4]:
import global_variables as gv

In [17]:
df = pd.read_csv('data/preprocessed-binary_CVD.csv')
df.drop('Unnamed: 0', axis=1, inplace=True)
df

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,1319-0.0,1408-0.0,1329-0.0,1448-0.0,1538-0.0,6142-0.0,2050-0.0,1508-0.0,1339-0.0,30710-0.0,...,30640-0.0,outcome_myocardial_infarction,outcome_cardiomyopathies,outcome_ischemic_heart_disease,outcome_heart_failure,outcome_peripheral_vascular_disease,outcome_cardiac_arrest,outcome_cerebral_infarction,outcome_arrhythmia,CVD
0,0.0,1.0,2.0,3.0,2.0,1.0,2.0,3.0,2.0,0.34,...,1.211,0,0,0,0,0,0,0,1,1
1,0.0,3.0,2.0,-1.0,0.0,1.0,-1.0,2.0,2.0,3.94,...,1.019,1,0,1,0,0,0,0,0,0
2,0.0,3.0,3.0,2.0,1.0,2.0,1.0,2.0,2.0,0.55,...,1.097,0,0,0,0,0,0,0,0,0
3,3.0,3.0,3.0,3.0,0.0,2.0,1.0,,2.0,0.45,...,0.923,0,0,0,0,0,0,0,0,0
4,0.0,3.0,2.0,1.0,0.0,5.0,2.0,2.0,2.0,0.75,...,1.443,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
502476,0.0,3.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.45,...,1.137,0,0,0,0,0,0,0,0,0
502477,0.0,1.0,1.0,3.0,2.0,2.0,1.0,1.0,1.0,2.75,...,0.898,0,0,1,0,0,0,0,0,0
502478,2.0,1.0,1.0,2.0,2.0,-7.0,4.0,2.0,2.0,0.85,...,0.985,1,0,1,0,0,1,0,1,1
502479,3.0,3.0,1.0,3.0,0.0,1.0,1.0,,2.0,3.57,...,1.063,0,0,0,0,0,0,0,0,0


In [18]:
df['CVD'] = 0

In [19]:
for outcome in gv.outcomes:
    df['CVD'] = np.where(df[outcome]==1,1,0)

In [23]:
pd.set_option('display.max_columns', None)
df

Unnamed: 0,1319-0.0,1408-0.0,1329-0.0,1448-0.0,1538-0.0,6142-0.0,2050-0.0,1508-0.0,1339-0.0,30710-0.0,1349-0.0,30750-0.0,1468-0.0,20117-0.0,30740-0.0,1160-0.0,2090-0.0,31-0.0,1488-0.0,30850-0.0,4080-0.0,1369-0.0,21000-0.0,1200-0.0,1289-0.0,30790-0.0,845-0.0,20491-0.0,48-0.0,30630-0.0,1299-0.0,1220-0.0,1548-0.0,1528-0.0,23099-0.0,49-0.0,30690-0.0,1389-0.0,2654-0.0,1249-0.0,1309-0.0,1379-0.0,1239-0.0,21003-0.0,20403-0.0,30780-0.0,1438-0.0,30870-0.0,1359-0.0,30770-0.0,21001-0.0,1458-0.0,23100-0.0,6138-0.0,1418-0.0,1478-0.0,4079-0.0,20414-0.0,30760-0.0,23101-0.0,2100-0.0,1428-0.0,30640-0.0,outcome_myocardial_infarction,outcome_cardiomyopathies,outcome_ischemic_heart_disease,outcome_heart_failure,outcome_peripheral_vascular_disease,outcome_cardiac_arrest,outcome_cerebral_infarction,outcome_arrhythmia,CVD,hypertension
0,0.0,1.0,2.0,3.0,2.0,1.0,2.0,3.0,2.0,0.34,1.0,,,2.0,5.622,7.0,1.0,0.0,6.0,0.508,110.0,1.0,1001.0,3.0,6.0,,,4.0,74.0,1.593,10.0,0.0,2.0,2.0,35.6,102.0,6.477,1.0,,1.0,2.0,1.0,0.0,54.0,2.0,3.888,10.0,0.977,2.0,26.339,24.5790,-10.0,25.0,1.0,3.0,1.0,77.0,4.0,1.706,45.2,1.0,0.0,1.211,0,0,0,0,0,0,0,1,1,0
1,0.0,3.0,2.0,-1.0,0.0,1.0,-1.0,2.0,2.0,3.94,4.0,40.9,5.0,2.0,5.052,9.0,0.0,1.0,2.0,13.088,166.0,2.0,1001.0,2.0,2.0,15.40,16.0,,120.0,1.390,2.0,0.0,2.0,-10.0,36.5,113.0,5.512,1.0,,1.0,1.0,2.0,0.0,65.0,,3.520,12.0,2.358,3.0,10.701,35.0861,7.0,42.9,-7.0,2.0,1.0,91.0,,1.173,74.6,0.0,1.0,1.019,1,0,1,0,0,0,0,0,0,1
2,0.0,3.0,3.0,2.0,1.0,2.0,1.0,2.0,2.0,0.55,1.0,40.0,1.0,0.0,5.310,5.0,0.0,0.0,0.0,0.515,132.0,1.0,1001.0,3.0,2.0,32.10,16.0,,66.0,2.005,4.0,0.0,1.0,1.0,29.5,88.0,7.079,1.0,,3.0,4.0,2.0,0.0,69.0,,4.227,8.0,0.655,2.0,10.693,19.3835,7.0,15.2,3.0,2.0,1.0,67.0,,2.490,36.3,0.0,1.0,1.097,0,0,0,0,0,0,0,0,0,0
3,3.0,3.0,3.0,3.0,0.0,2.0,1.0,,2.0,0.45,2.0,37.3,4.0,2.0,4.449,7.0,0.0,1.0,5.0,4.675,178.0,2.0,1001.0,1.0,3.0,,18.0,,110.0,1.474,2.0,0.0,1.0,2.0,28.5,117.0,5.028,0.0,7.0,,1.0,2.0,1.0,66.0,,3.041,10.0,3.108,2.0,25.317,35.1281,7.0,31.7,3.0,2.0,1.0,84.0,,1.169,79.6,0.0,3.0,0.923,0,0,0,0,0,0,0,0,0,0
4,0.0,3.0,2.0,1.0,0.0,5.0,2.0,2.0,2.0,0.75,2.0,32.2,1.0,2.0,4.616,6.0,0.0,1.0,-10.0,20.162,178.0,1.0,1001.0,3.0,1.0,71.11,,,94.0,2.149,1.0,0.0,2.0,2.0,24.8,100.0,7.958,1.0,7.0,2.0,1.0,1.0,0.0,48.0,,4.983,8.0,1.173,1.0,26.523,25.8866,1.0,20.1,1.0,2.0,1.0,88.0,,2.053,61.0,0.0,3.0,1.443,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
502476,0.0,3.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.45,3.0,28.0,1.0,2.0,4.509,7.0,0.0,1.0,0.0,17.137,147.0,1.0,1001.0,2.0,2.0,5.30,16.0,4.0,110.0,1.345,0.0,0.0,2.0,0.0,27.0,113.0,5.754,1.0,4.0,1.0,2.0,1.0,2.0,63.0,3.0,3.843,4.0,3.820,2.0,20.777,30.6094,7.0,29.8,-7.0,1.0,3.0,91.0,3.0,1.026,80.7,0.0,3.0,1.137,0,0,0,0,0,0,0,0,0,0
502477,0.0,1.0,1.0,3.0,2.0,2.0,1.0,1.0,1.0,2.75,1.0,33.2,1.0,1.0,5.673,6.0,1.0,0.0,0.0,0.725,148.0,2.0,1001.0,3.0,4.0,5.09,16.0,,102.0,1.365,3.0,0.0,3.0,3.0,46.3,110.0,4.664,2.0,,1.0,6.0,2.0,0.0,64.0,,2.863,4.0,2.134,3.0,23.647,31.4652,7.0,38.7,3.0,3.0,2.0,73.0,,1.159,44.9,0.0,1.0,0.898,0,0,1,0,0,0,0,0,0,1
502478,2.0,1.0,1.0,2.0,2.0,-7.0,4.0,2.0,2.0,0.85,3.0,31.5,3.0,2.0,4.851,7.0,1.0,1.0,20.0,12.040,133.0,2.0,1001.0,2.0,2.0,,-2.0,,99.0,1.241,2.0,1.0,3.0,1.0,25.7,98.0,5.184,1.0,,1.0,2.0,2.0,0.0,43.0,,3.358,20.0,2.547,3.0,22.507,28.6196,10.0,23.6,-7.0,1.0,1.0,81.0,,1.043,68.1,1.0,1.0,0.985,1,0,1,0,0,1,0,1,1,1
502479,3.0,3.0,1.0,3.0,0.0,1.0,1.0,,2.0,3.57,2.0,45.7,1.0,2.0,5.506,6.0,0.0,0.0,4.0,,138.0,1.0,1001.0,3.0,4.0,27.48,,4.0,86.0,1.669,2.0,0.0,2.0,2.0,41.3,111.0,6.827,1.0,,4.0,1.0,1.0,0.0,56.0,1.0,4.200,12.0,2.090,3.0,30.498,27.3702,7.0,32.7,1.0,2.0,1.0,83.0,1.0,1.589,46.4,0.0,0.0,1.063,0,0,0,0,0,0,0,0,0,0


In [21]:
# fix hypertension columns
hypertension = ['131286-0.0','131288-0.0','131290-0.0','131292-0.0']
df['hypertension'] = 0
for feature in hypertension:
    df[feature] = np.where(df[feature].notnull(),1,0)
    selected = df[df[feature]==1].index
    df['hypertension'].iloc[selected] = 1
    
df.drop(hypertension, axis=1, inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, val

In [26]:
# reorder inputs by datatype
cols= gv.continuous_cols+gv.numerical_cols+gv.categorical_cols
# col_list2 = cols+list(df)[61:]
# df=df.loc[:,col_list2]

In [35]:
cols2 = df.iloc[:,:-10].columns.to_list()

In [39]:
df.to_csv('data/preprocessed-binary_CVD.csv')

In [37]:
lost_predictors = []
for predictor in cols2:
    if predictor in cols:
        continue
    else:
        lost_predictors.append(predictor)

In [38]:
lost_predictors

['20491-0.0', '20403-0.0', '20414-0.0']