# Electrolytes in COVID-19 Infection 

### David K Ryan
### Academic Foundation Doctor, St George's Hospital, London 


_____________

This notebook describes the code used to harmonise the data for the Electrolyte Covid Project at St George's Hospital, London.

Harmonisation refers to the fact that patients present to hospital at different stages of illness. This code therefore reorganises data to reflect electrolyte tests measurement according to date since symptom onset.

No patient data is available online. This notebook describes the code that was used to manipulate data and generate graph for study conducted by team at St George's Hospital. Awaiting publication. 

____________

In [None]:
#import libraries 
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline  
import seaborn as sns
import numpy as np

In [None]:
#display settings 
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 500)

In [None]:
#csv file location 
#csv_file = 'please-insert-here'

#create dataframe 
#df = pd.read_csv(csv_file)

In [None]:
#convert to datetime format 
df['admission_date'] =  pd.to_datetime(df['admission_date'], format = "%d/%m/%Y")
df['symptom_onset_date'] =  pd.to_datetime(df['symptom_onset_date'], format = "%d/%m/%Y")
df['swab_positive_date'] = pd.to_datetime(df['swab_positive_date'],  format = "%d/%m/%Y")
df['outcome_date'] = pd.to_datetime(df['outcome_date'])
df['peak_oxygen_requirement_date'] = pd.to_datetime(df['peak_oxygen_requirement_date'])
df[ 'wbc_peak_date'] = pd.to_datetime(df['wbc_peak_date'])
df['neuts_peak_date'] = pd.to_datetime(df['neuts_peak_date'])
df['lymph_low_date'] = pd.to_datetime(df['lymph_low_date'])
df['plt_date'] = pd.to_datetime(df['plt_date'])
df['d_dimer_date'] = pd.to_datetime(df['d_dimer_date'])
df['ferritin_date'] = pd.to_datetime(df['ferritin_date'], errors='coerce')
df['crp_date'] = pd.to_datetime(df['crp_date'])
df['ldh_date'] = pd.to_datetime(df['ldh_date'])
df['trop_date'] = pd.to_datetime(df['trop_date'])
df['bili_date'] = pd.to_datetime(df['bili_date'])
df['alt_date'] = pd.to_datetime(df['alt_date'])
df['alp_date'] = pd.to_datetime(df['alp_date'])
df['ggt_date'] = pd.to_datetime(df['ggt_date'])

In [None]:
#Functions to generate new outcomes 

def death(row): 
    """Creates a new categorical outcome if patient died during admission"""
    
    if row['outcome'] == 'Death': 
        return(1)
    else:
        return(0)
    
df['death'] = df.apply(death, axis=1)


def severe(row): 
    """Creates a new categorical outcome for severity of infection"""
    
    if row['itu_admission'] == 1: 
        return(1)
    if row['death'] == 1: 
        return(1)
    elif row['peak_ox_requirement'] == 15.0: 
        return(1)
    else: 
        return(0)
    
df['severe'] = df.apply(severe, axis=1)

#Calculate admisison day of illness 
df['admission_day_of_illness'] = df['admission_date'] - df['symptom_onset_date']

In [None]:
#Function to extract electrolyte 

def electrolyte_df(df, cols, electrolyte): 
    electrolyte_list = ['annonymous_code','death']
    
    for i in range(1, cols): 
        electrolyte_list.append(f'{electrolyte}_{i}')
        
    dataframe = df[electrolyte_list]
    
    return(dataframe)


In [None]:
#Create a sodium dataframe 
na = electrolyte_df(df, 30, 'na')

In [None]:
#Harmonise the dataframe according to date of illness (rather than date of admission)

#First create an empty dictionary for each patient (patient as key, value as sodium value for each day since symptom onset)
na_dict =  {}

for i in range(0,101):
    na_dict[f'patient_{i}'] = {}
    for j in range(1, 53): 
        na_dict[f'patient_{i}'][f'na_{j}'] = np.nan
        
#fill in this dictionary 
for patient in range(0, 101): 
    
    for j in range(1, 30): 
        if na.loc[patient][f'na_{j}'] is np.nan: 
            pass
        else: 
            days = 'na_'+str(na.loc[patient]['admission_day_of_illness'].days + (j))
            
            na_dict[f'patient_{patient}'][days] = na.loc[patient][f'na_{j}']
    

In [None]:
#convert dictionary to dataframe transpose dataframe
correct_na = pd.DataFrame(na_dict).transpose()

In [None]:
#Add some severity markers to harmonised electrolyte dataframe 
correct_na['severe'] = df['severe'].values
correct_na['itu_admission'] = df['itu_admission'].values
correct_na['dm'] = df['dm'].values 
correct_na['htn'] = df['htn'].values

In [None]:
#display dataframe
correct_na

In [None]:
#Graph according to severity of infection 

#Define severity and other markers 
severe_m = correct_na['severe'] == 1
non_severe_m = correct_na['severe'] == 0 

severe = correct_na[severe_m]
non_severe = correct_na[non_severe_m]

itu_y = correct_na['itu_admission']==1
itu = correct_na[itu_y]
no_itu_m = correct_na['itu_admission']==0 
no_itu = correct_na[no_itu_m]

#in this instance we will just segregate according to severity of infection 
#severe.loc['mean'] = severe.mean()
#non_severe.loc['mean'] = non_severe.mean()
severe.loc['median'] = severe.median()
non_severe.loc['median']=non_severe.median()
#itu.loc['mean']=itu.mean()
itu.loc['median']=itu.median()
#no_itu.loc['mean']=no_itu.mean()
no_itu.loc['median'] = no_itu.median()

In [None]:
#Graph generation
severe.drop(columns=['severe','severe','itu_admission', 'dm', 'htn', 'white']).loc['median'].plot(kind='line', label='Severe')
non_severe.drop(columns=['severe','severe', 'itu_admission','dm', 'htn', 'white']).loc['median'].plot(kind='line', label='Mild')
plt.legend()
plt.axhline(y=136, color='r', linestyle='--', label='lower limit of normal')
plt.axhline(y=142, color='r', linestyle='--', label='upper limit of normal')
plt.xlabel('Days from symptom onset')
plt.ylabel('Sodium (mmol/L)')
plt.title('Median Sodium since symptom onset')
plt.savefig('sodium.png')

In [None]:
#Show graph with individual patient trends in electrolyte 
ax = plt.figure(figsize=(20,10))

for i in range(2, 101): 
    
    ax = correct_na.loc['median'][:-4].plot(legend=False, color='b', linewidth=3)
    
    if correct_na.loc[f'patient_{i}']['severe'] == 1.0:  
        
        ax = correct_na.loc[f'patient_{i}'][:-4].plot(legend=False, color='r', alpha=0.5, linewidth=2)
        
    else: 
        
        ax =  correct_na.loc[f'patient_{i}'][:-4].plot(legend=False, color='g')

ax.set_xlabel("Day")
ax.set_ylabel("Sodium")
ax.set_title('Sodium in patients with covid infection')
plt.savefig('sodium.png')
plt.ylim(120, 200)