In [None]:
#let's get start!

In [2]:
import pandas as pd
import numpy as np

class format_convert(object):
    def __init__(self, file):
        self.file = file
        self.read_file()
        
    def read_file(self):
        self.df = pd.read_csv(self.file,index_col=0)
        
    def ymd(self):
        from datetime import datetime
        EGDTC = self.df[["Day of ECG","Month of ECG","Year of ECG"]].apply(lambda x: '-'.join(x.map(str)), axis=1).apply(lambda x: datetime.strptime(x, "%d-%M-%Y"))
        EGDTC = EGDTC.apply(lambda x: datetime.strftime(x, "%d-%b-%Y").upper())
        return EGDTC
    
    def column_convert(self,column_name):
        column_name = column_name
        result = pd.DataFrame(columns=["STUDYID","DOMAIN","USUBJID","EGEVAL","EGDY","EGDTC","VISITNUM", "VISIT","VISITDY", "EGPOS","EGMETHOD","EGTEST","EGTESTCD" ,"EGORRES","EGORRESU","EGSTRRESC","EGSTRESN","EGSTRESU","EGCAT","EGSTAT"])
        
        #general value copy
        result[["STUDYID","EGEVAL","EGDY"]]= self.df[["Clinical Trial Number","Evaluation Method","Rel day of ECG to Start of Trt"]]
        result["VISITNUM"] =result["VISIT"] = result["VISITDY"] = self.df["Visit"]
        
        #special copy
        result["EGORRES"] = result["EGSTRRESC"] = result["EGSTRESN"] = self.df[column_name]
        result["EGSTAT"] = np.where(self.df[column_name]==".","Not done",self.df[column_name])
        result["USUBJID"] = self.df[["Subject Number","Clinical Trial Number"]].apply(lambda x:'||'.join(x.map(str)),axis=1)
        result['EGDTC'] = self.ymd()

        #hardcode 
        hardcode_dict = {
            "Ventricular Rate":{"EGTEST":"VRMEAN","EGTESTCD":"Summary (Mean) Ventricular Rate"},
            "PR Interval":{"EGTEST":"PRMEAN","EGTESTCD":"Summary (Mean) PR Duration"},
            "QRS Interval":{"EGTEST":"QRSDUR","EGTESTCD":"Summary (Mean) QRS Duration"},
            "QT Interval":{"EGTEST":"QTMEAN","EGTESTCD":"Summary (Mean) QT Duration"},
            "Sinus Rhythm":{"EGTEST":"RHYRATE","EGTESTCD":"Rhythm and rate"},
            "QTc Interval Calc Bazett":{"EGTEST":"QTCBMEAN","EGTESTCD":"QTcB - Bazetts Correction Formula (Mean)"},
            "QTc Interval Calc Fridericia":{"EGTEST":"QTCFMEAN","EGTESTCD":"QTcF-Fridericias Correction Formul(Mean)"}
        }#modify dictionary when need to change
        result["EGTEST"]= hardcode_dict[column_name]["EGTEST"]
        result["EGTESTCD"]= hardcode_dict[column_name]["EGTESTCD"]
        result["DOMAIN"] = "EG"
        result["EGPOS"] = "SUPINE"
        result["EGMETHOD"] = "12 LEAD STANDARD"
        result["EGORRESU"] = result["EGORRES"]
        result["EGSTRESU"] = result["EGTEST"]
        result["EGCAT"]= "Measurement"
        return result
        
    def generate_result(self):
        #modify when need to change
        column_list= ["Ventricular Rate","PR Interval","QRS Interval","QT Interval","Sinus Rhythm","QTc Interval Calc Bazett","QTc Interval Calc Fridericia"]
        
        frames = []
        for column_name in self.df.columns:
            if column_name in column_list:
                column_name_df = self.column_convert(column_name)
                frames.append(column_name_df)
            
        result_df = pd.concat(frames)
        result_df = result_df.reset_index(drop=True)
        return result_df
    
        
        

In [5]:
#the reason of not combining the file then run the method one time is for convenience of adding other file like "ecg4" in the future
def create_egstandard(directory):
    f = []
    for i in range(1,4):
        name = "ecg{0}".format(i)
        d = directory+"/{0}.csv".format(name)
        res = format_convert(file=d).generate_result()
        f.append(res)

    eg_standard = pd.concat(f).reset_index(drop=True)
    eg_standard["EGSEQ"] = eg_standard.groupby(["USUBJID"]).cumcount() + 1
    eg_standard.to_csv(directory+"/EG Standard.csv")
    return eg_standard

In [6]:
eg_standard = create_egstandard('C:/Users/gzsuy/Desktop/To Candidates[13236]/raw') #result can also be found in local drive

In [7]:
eg_standard

Unnamed: 0,STUDYID,DOMAIN,USUBJID,EGEVAL,EGDY,EGDTC,VISITNUM,VISIT,VISITDY,EGPOS,...,EGTEST,EGTESTCD,EGORRES,EGORRESU,EGSTRRESC,EGSTRESN,EGSTRESU,EGCAT,EGSTAT,EGSEQ
0,30121,EG,571331322||30121,INVESTIGATOR,-6,26-JAN-2011,0.000,0.000,0.000,SUPINE,...,VRMEAN,Summary (Mean) Ventricular Rate,63,63,63,63,VRMEAN,Measurement,63,1
1,30121,EG,571331323||30121,INVESTIGATOR,-23,02-JAN-2011,0.000,0.000,0.000,SUPINE,...,VRMEAN,Summary (Mean) Ventricular Rate,61,61,61,61,VRMEAN,Measurement,61,1
2,30121,EG,571331324||30121,INVESTIGATOR,1,01-JAN-2011,0.000,0.000,0.000,SUPINE,...,VRMEAN,Summary (Mean) Ventricular Rate,57,57,57,57,VRMEAN,Measurement,57,1
3,30121,EG,411331322||30121,INVESTIGATOR,-14,09-JAN-2010,0.000,0.000,0.000,SUPINE,...,VRMEAN,Summary (Mean) Ventricular Rate,86,86,86,86,VRMEAN,Measurement,86,1
4,30121,EG,411331323||30121,INVESTIGATOR,-21,23-JAN-2010,0.000,0.000,0.000,SUPINE,...,VRMEAN,Summary (Mean) Ventricular Rate,96,96,96,96,VRMEAN,Measurement,96,1
5,30121,EG,411331325||30121,INVESTIGATOR,-21,20-JAN-2011,0.000,0.000,0.000,SUPINE,...,VRMEAN,Summary (Mean) Ventricular Rate,62,62,62,62,VRMEAN,Measurement,62,1
6,30121,EG,411331326||30121,INVESTIGATOR,-3,09-JAN-2011,0.000,0.000,0.000,SUPINE,...,VRMEAN,Summary (Mean) Ventricular Rate,67,67,67,67,VRMEAN,Measurement,67,1
7,30121,EG,541341322||30121,INVESTIGATOR,.,19-JAN-2011,0.000,0.000,0.000,SUPINE,...,VRMEAN,Summary (Mean) Ventricular Rate,62,62,62,62,VRMEAN,Measurement,62,1
8,30121,EG,541341323||30121,INVESTIGATOR,-11,20-JAN-2011,0.000,0.000,0.000,SUPINE,...,VRMEAN,Summary (Mean) Ventricular Rate,77,77,77,77,VRMEAN,Measurement,77,1
9,30121,EG,541341324||30121,INVESTIGATOR,-3,28-JAN-2011,0.000,0.000,0.000,SUPINE,...,VRMEAN,Summary (Mean) Ventricular Rate,83,83,83,83,VRMEAN,Measurement,83,1
