In [1]:
import pandas as pd

In [2]:
data = pd.read_csv('./data/data_for_model.csv')

In [3]:
# Convert binary values to boolean

data['pyrexia'] = data['pyrexia'] == 'Y'
data['meconium'] = data['meconium'] == 'Y'

In [4]:
# Limit BMI 
mask = (data['bmi'] >= 15) & (data['bmi'] <= 50)
data = data[mask]

# Limit age 
mask = (data['age'] >= 15) & (data['age'] <= 45)
data = data[mask]

In [5]:
# Analgesics used

def check_analgesia(x, code):
    analgesics = list(x['analab'])
    for analgesic in list(x['analdel']):
        analgesics.extend(analgesic)
    return code in analgesics

analgesic_types = ['I', 'P', 'E', 'G', 'S', 'C', 'T', 'L', 'B', 'O']

for analgesic_type in analgesic_types:
    label = f'analgesia_{analgesic_type}'
    data[label] = data.apply(check_analgesia, axis=1, args=analgesic_type)

data.drop(['analab', 'analdel'], axis=1, inplace=True)

In [6]:
# Induction used

def check_induction(x, code):
    try:
        return code in list(x['methind'])
    except:
        return False

induction_types = ['A', 'O', 'P']

for induction_type in induction_types:
    label = f'induction_{induction_type}'
    data[label] = data.apply(check_induction, axis=1, args=induction_type)

data.drop(['methind'], axis=1, inplace=True)

In [7]:
# CTG
ctg = pd.get_dummies(data['ctg'],prefix='ctg')
data = pd.concat([data, ctg], axis=1)
data.drop(['ctg'], axis=1, inplace=True)

In [8]:
# Delivery
delivery = pd.get_dummies(data['methdel'],prefix='delivery')
data = pd.concat([data, delivery], axis=1)
data.drop(['methdel'], axis=1, inplace=True)

In [9]:
# Hospital
hospital = pd.get_dummies(data['hospname'],prefix='hospital')
data = pd.concat([data, hospital], axis=1)
data.drop(['hospname'], axis=1, inplace=True)

In [10]:
# Labour onset
onset = pd.get_dummies(data['onsetla'],prefix='onset')
data = pd.concat([data, onset], axis=1)
data.drop(['onsetla'], axis=1, inplace=True)

In [11]:
# Change types
data['parity'] = data['parity'].astype(int)
data['prevcaes'] = data['prevcaes'].astype(int)
data['noscans'] = data['noscans'].astype(int)
data['apgar5'] = data['apgar5'].astype(int)
for col in list(data):
    if col[0:3] == 'ctg':
        data[col] = data[col].astype(bool)
for col in list(data):
    if col[0:8] == 'delivery':
        data[col] = data[col].astype(bool)
for col in list(data):
    if col[0:8] == 'hospital':
        data[col] = data[col].astype(bool)
for col in list(data):
    if col[0:5] == 'onset':
        data[col] = data[col].astype(bool)

In [12]:
data.head().T

Unnamed: 0,0,1,2,3,4
parity,1,2,0,2,1
prevcaes,0,0,0,0,0
noscans,1,2,5,1,2
pyrexia,False,False,False,False,False
meconium,False,False,True,False,False
apgar5,10,9,10,9,10
bmi,18,20,27,33,25
age,27,25,34,29,37
gest,38,40,36,26,41
ethnic_white,False,False,False,False,False


In [13]:
data.to_csv('./data/processed_data_for_model_one_hot.csv', index=False)