In [1]:
import pandas as pd

In [2]:
data = pd.read_csv('./data/data_for_model.csv')

In [3]:
# Convert binary values to boolean

data['pyrexia'] = data['pyrexia'] == 'Y'
data['meconium'] = data['meconium'] == 'Y'

In [4]:
# Analgesics used

def check_analgesia(x, code):
    analgesics = list(x['analab'])
    for analgesic in list(x['analdel']):
        analgesics.extend(analgesic)
    return code in analgesics

analgesic_types = ['I', 'P', 'E', 'G', 'S', 'C', 'T', 'L', 'B', 'O']

for analgesic_type in analgesic_types:
    label = f'analgesia_{analgesic_type}'
    data[label] = data.apply(check_analgesia, axis=1, args=analgesic_type)

data.drop(['analab', 'analdel'], axis=1, inplace=True)

In [5]:
# Induction used

def check_induction(x, code):
    try:
        return code in list(x['methind'])
    except:
        return False

induction_types = ['A', 'O', 'P']

for induction_type in induction_types:
    label = f'induction_{induction_type}'
    data[label] = data.apply(check_induction, axis=1, args=induction_type)

data.drop(['methind'], axis=1, inplace=True)

In [6]:
# CTG
ctg = pd.get_dummies(data['ctg'],prefix='ctg')
data = pd.concat([data, ctg], axis=1)
data.drop(['ctg'], axis=1, inplace=True)

In [7]:
# Delivery
delivery = pd.get_dummies(data['methdel'],prefix='delivery')
data = pd.concat([data, delivery], axis=1)
data.drop(['methdel'], axis=1, inplace=True)

In [8]:
# Hospital
hospital = pd.get_dummies(data['hospname'],prefix='hospital')
data = pd.concat([data, hospital], axis=1)
data.drop(['hospname'], axis=1, inplace=True)

In [9]:
# Labour onset
onset = pd.get_dummies(data['onsetla'],prefix='onset')
data = pd.concat([data, onset], axis=1)
data.drop(['onsetla'], axis=1, inplace=True)

In [10]:
data.head().T

Unnamed: 0,0,1,2,3,4
parity,4.0,0.0,0.0,0.0,1.0
prevcaes,0.0,0.0,0.0,0.0,0.0
noscans,1.0,3.0,1.0,2.0,1.0
pyrexia,False,False,False,False,False
meconium,False,True,False,False,False
apgar5,9.0,7.0,10.0,10.0,10.0
bmi,22,20,21,23,21
age,25,34,34,30,29
gest,41,41,39,39,39
analgesia_I,True,True,False,True,True


Split into APGAR <5 and 5+

In [11]:
mask = data['apgar5'] <= 5
apgar_less_equal_5 = data[mask]

mask = data['apgar5'] > 5
apgar_five_plus = data[mask]

Compare classes

In [12]:
results = pd.DataFrame()
results['apgar less equal 5'] = apgar_less_equal_5.mean()
results['apgar 5+'] = apgar_five_plus.mean()
results

Unnamed: 0,apgar less equal 5,apgar 5+
parity,0.786185,0.859139
prevcaes,0.133489,0.081825
noscans,2.351184,2.089381
pyrexia,0.041133,0.013764
meconium,0.264261,0.162315
apgar5,3.826542,9.52857
bmi,24.369422,23.76782
age,28.435002,28.688372
gest,36.969732,39.593588
analgesia_I,0.46721,0.617663


In [13]:
data.to_csv('./data/processed_data_for_model.csv', index=False)