### Import Packages

In [6]:
# Import packages

import glob
import csv
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import psycopg2


### Append each .txt file into a DataFrame
Each txt file is a row

In [2]:
# Iterate through each file name

main = pd.DataFrame()

for filename in glob.iglob('./training_set_a/*.txt'):
    
    # Open each file as data
    with open(filename) as inputfile:
        
        data = list(csv.reader(inputfile))                      # list of list
        data = pd.DataFrame(data[1:],columns=data[0])           # Convert list of list to DataFrame
        data.Value = data.Value.astype(float)                   # Change Value to float
        
        
        
        # Pivot_table to convert from long to wide dataset

        # Creation of new features - aggregate across the time series to find mean, min, max values
        # mean is chosen rather than median because we want to take into the account of 'outlier values'

        wide_data = pd.pivot_table(data,values=['Value'],columns='Parameter',aggfunc=[np.mean,np.min,np.max])
        wide_data.columns = wide_data.columns.droplevel(level=0)
        
        
        
        # rename new columns & lower capitalise
        new_columns = []

        for ind, col in enumerate(wide_data.columns):
    
            if ind < wide_data.columns.shape[0]/3:
                col = 'mean_'+col            
                new_columns.append(col)

            elif ind >= wide_data.columns.shape[0]/3 and ind < 2*wide_data.columns.shape[0]/3:
                col = 'min_'+col
                new_columns.append(col)

            else:
                col = 'max_'+col
                new_columns.append(col)
        
        wide_data.columns = new_columns
        wide_data.columns = wide_data.columns.str.lower()
        
        
        # rename descriptor row
        wide_data.rename(columns={'mean_age':'age','mean_gender':'gender','mean_height':'height',
                                    'mean_icutype':'icutype','mean_recordid':'recordid'},inplace=True)
            
        # drop min/max descriptor rows
        wide_data.drop(['min_age','max_age','min_gender','max_gender','min_height','max_height',
                          'min_icutype','max_icutype','min_recordid','max_recordid'],axis=1,inplace=True)
        
        # set recordid as index
        wide_data.set_index(['recordid'],inplace = True)
        
        main = main.append(wide_data)

In [16]:
# Open set a outcomes file as dataframe
with open('training_outcomes_a.txt') as outcomesfile:
        
        label = list(csv.reader(outcomesfile))                      # list of list
        label = pd.DataFrame(label[1:],columns=label[0])            # Convert list of list to DataFrame
        
        label = label.astype(float)                                 # Change all values to float
        label.columns = label.columns.str.lower()                   # Change all column to lowercase
        
        
        label.set_index(['recordid'],inplace = True)                # set recordid as index

In [14]:
# merge main data and label data
mortality = main.merge(label,how='outer',left_index=True,right_index=True)

In [15]:
mortality.head(5)

Unnamed: 0_level_0,age,gender,height,icutype,max_albumin,max_alp,max_alt,max_ast,max_bilirubin,max_bun,...,min_troponini,min_troponint,min_urine,min_wbc,min_weight,saps-i,sofa,length_of_stay,survival,in-hospital_death
recordid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
132539.0,54.0,0.0,-1.0,4.0,,,,,,13.0,...,,,0.0,9.4,-1.0,6.0,1.0,5.0,-1.0,0.0
132540.0,76.0,1.0,175.3,2.0,,,,,,21.0,...,,,0.0,7.4,76.0,16.0,8.0,8.0,-1.0,0.0
132541.0,44.0,0.0,-1.0,3.0,2.7,127.0,91.0,235.0,3.0,8.0,...,,,18.0,3.7,56.7,21.0,11.0,19.0,-1.0,0.0
132543.0,68.0,1.0,180.3,3.0,4.4,105.0,12.0,15.0,0.2,23.0,...,,,100.0,7.9,84.6,7.0,1.0,9.0,575.0,0.0
132545.0,88.0,0.0,-1.0,3.0,3.3,,,,,45.0,...,,,16.0,3.8,-1.0,17.0,2.0,4.0,918.0,0.0


RecordID (a unique integer for each ICU stay)

Age (years)<br>
Gender (0: female, or 1: male)<br>
Height (cm)<br>
ICUType (1: Coronary Care Unit, 2: Cardiac Surgery Recovery Unit, 3: Medical ICU, or 4: Surgical ICU)<br>
Weight (kg)

Variables Description 

ALB Albumin (g/dL) <br>
ALP Alkaline phosphatase (IU/L) <br>
ALT Alanine transaminase (IU/L) <br>
AST Aspartate transaminase (IU/L) <br>
BIL Bilirubin (mg/dL) <br>
BUN Blood urea nitrogen (mg/dL) <br>
CHO Cholesterol (mg/dL) <br>
CREA Serum creatinine (mg/dL) <br>
DBP Invasive diastolic arterial blood pressure (mmHg) <br>
FIO Fractional inspired O2 (0-1) <br>
GCS Glasgow Coma Score (3-15) <br>
GLU Serum glucose (mg/dL) <br>
HCO Serum bicarbonate (mmol/L)  <br> 
HCT Hematocrit (%) <br>
HR Heart rate (bpm) <br>
K Serum potassium (mEq/L) <br>
LAC Lactate (mmol/L) <br>
MG Serum magnesium (mmol/L) <br>
MAP Invasive mean arterial blood pressure (mmHg) <br>
MEVE Mechanical ventilation respiration <br>
NA Serum sodium (mEq/L) <br>
NBP Non-invasive diastolic arterial blood pressure (mmHg) <br>
NAP Non-invasive mean arterial blood pressure (mmHg) <br>
NSP Non-invasive systolic arterial blood pressure (mmHg) <br>
PCO partial pressure of arterial <br>
CO2 (mmHg) <br>
PO2 Partial pressure of arterial <br>
O2 (mmHg) <br>
PH Arterial pH (0-14) <br>
PLA cells/nL RRA Respiration rate (bpm) <br>
SO2 O2 saturation in hemoglobin (%) <br>
SBP Invasive systolic arterial blood pressure (mmHg) <br>
TEM Temperature (°C) <br>
TRI Troponin-I (μg/L) <br>
TRT Troponin-T (μg/L) <br>
URI Urine output (mL) <br>
WBC White blood cell count (cells/nL) <br>
WEI kg <br>

In [3]:
# Open file

with open('./training_set_a/132539.txt') as inputfile:
    
    results = list(csv.reader(inputfile))           # Open file in list of list
    results = pd.DataFrame(results[1:],columns=results[0])     # Convert list of list to DataFrame
    results.Value = results.Value.astype(float)     # Change Value to float
    
results.head(8)

Unnamed: 0,Time,Parameter,Value
0,00:00,RecordID,132539.0
1,00:00,Age,54.0
2,00:00,Gender,0.0
3,00:00,Height,-1.0
4,00:00,ICUType,4.0
5,00:00,Weight,-1.0
6,00:07,GCS,15.0
7,00:07,HR,73.0


In [4]:
# Pivot_table to convert from long to wide dataset

# Creation of new features - aggregate across the time series to find mean, min, max values
# mean is chosen rather than median because we want to take into the account of 'outlier values'

wide_result = pd.pivot_table(results,values=['Value'],columns='Parameter',aggfunc=[np.mean,np.min,np.max])

wide_result.columns = wide_result.columns.droplevel(level=0)

In [5]:
new_columns = []

for ind, col in enumerate(wide_result.columns):
    
    if ind < wide_result.columns.shape[0]/3:
        col = 'mean_'+col            
        new_columns.append(col)

    elif ind >= wide_result.columns.shape[0]/3 and ind < 2*wide_result.columns.shape[0]/3:
        col = 'min_'+col
        new_columns.append(col)

    else:
        col = 'max_'+col
        new_columns.append(col)
        
print new_columns

['mean_Age', 'mean_BUN', 'mean_Creatinine', 'mean_GCS', 'mean_Gender', 'mean_Glucose', 'mean_HCO3', 'mean_HCT', 'mean_HR', 'mean_Height', 'mean_ICUType', 'mean_K', 'mean_Mg', 'mean_NIDiasABP', 'mean_NIMAP', 'mean_NISysABP', 'mean_Na', 'mean_Platelets', 'mean_RecordID', 'mean_RespRate', 'mean_Temp', 'mean_Urine', 'mean_WBC', 'mean_Weight', 'min_Age', 'min_BUN', 'min_Creatinine', 'min_GCS', 'min_Gender', 'min_Glucose', 'min_HCO3', 'min_HCT', 'min_HR', 'min_Height', 'min_ICUType', 'min_K', 'min_Mg', 'min_NIDiasABP', 'min_NIMAP', 'min_NISysABP', 'min_Na', 'min_Platelets', 'min_RecordID', 'min_RespRate', 'min_Temp', 'min_Urine', 'min_WBC', 'min_Weight', 'max_Age', 'max_BUN', 'max_Creatinine', 'max_GCS', 'max_Gender', 'max_Glucose', 'max_HCO3', 'max_HCT', 'max_HR', 'max_Height', 'max_ICUType', 'max_K', 'max_Mg', 'max_NIDiasABP', 'max_NIMAP', 'max_NISysABP', 'max_Na', 'max_Platelets', 'max_RecordID', 'max_RespRate', 'max_Temp', 'max_Urine', 'max_WBC', 'max_Weight']


In [6]:
# rename the columns and lower capitalise

#new_columns = [u'Age', u'mean_BUN', u'mean_Creatinine', u'mean_GCS', u'Gender', u'mean_Glucose', u'mean_HCO3',
 #      u'mean_HCT', u'mean_HR', u'Height', u'ICUType', u'mean_K', u'mean_Mg', u'mean_NIDiasABP',
  #     u'mean_NIMAP', u'mean_NISysABP', u'mean_Na', u'mean_Platelets', u'RecordID', u'mean_RespRate',
   #    u'mean_Temp', u'mean_Urine', u'mean_WBC', u'mean_Weight', u'min_Age', u'min_BUN', u'min_Creatinine',
    #   u'min_GCS', u'min_Gender', u'min_Glucose', u'min_HCO3', u'min_HCT', u'min_HR', u'min_Height',
     #  u'min_ICUType', u'min_K', u'min_Mg', u'min_NIDiasABP', u'min_NIMAP', u'min_NISysABP', u'min_Na',
      # u'min_Platelets', u'min_RecordID', u'min_RespRate', u'min_Temp', u'min_Urine', u'min_WBC',
       #u'min_Weight', u'max_Age', u'max_BUN', u'max_Creatinine', u'max_GCS', u'max_Gender', u'max_Glucose',
       #u'max_HCO3', u'max_HCT', u'max_HR', u'max_Height', u'max_ICUType', u'max_K', u'max_Mg',
       #u'max_NIDiasABP', u'max_NIMAP', u'max_NISysABP', u'max_Na', u'max_Platelets', u'max_RecordID',
       #u'max_RespRate', u'max_Temp', u'max_Urine', u'max_WBC', u'max_Weight']
wide_result.columns = new_columns
wide_result.columns = wide_result.columns.str.lower()

In [7]:
wide_result.head()

Unnamed: 0,mean_age,mean_bun,mean_creatinine,mean_gcs,mean_gender,mean_glucose,mean_hco3,mean_hct,mean_hr,mean_height,...,max_nimap,max_nisysabp,max_na,max_platelets,max_recordid,max_resprate,max_temp,max_urine,max_wbc,max_weight
Value,54.0,10.5,0.75,14.923077,0.0,160.0,27.0,32.5,70.810811,-1.0,...,92.33,157.0,137.0,221.0,132539.0,24.0,38.2,900.0,11.2,-1.0


In [8]:
# rename descriptor row
wide_result.rename(columns={'mean_age':'age','mean_gender':'gender','mean_height':'height',
                   'mean_icutype':'icutype','mean_recordid':'recordid'},inplace=True)

In [9]:
wide_result.columns

Index([u'age', u'mean_bun', u'mean_creatinine', u'mean_gcs', u'gender',
       u'mean_glucose', u'mean_hco3', u'mean_hct', u'mean_hr', u'height',
       u'icutype', u'mean_k', u'mean_mg', u'mean_nidiasabp', u'mean_nimap',
       u'mean_nisysabp', u'mean_na', u'mean_platelets', u'recordid',
       u'mean_resprate', u'mean_temp', u'mean_urine', u'mean_wbc',
       u'mean_weight', u'min_age', u'min_bun', u'min_creatinine', u'min_gcs',
       u'min_gender', u'min_glucose', u'min_hco3', u'min_hct', u'min_hr',
       u'min_height', u'min_icutype', u'min_k', u'min_mg', u'min_nidiasabp',
       u'min_nimap', u'min_nisysabp', u'min_na', u'min_platelets',
       u'min_recordid', u'min_resprate', u'min_temp', u'min_urine', u'min_wbc',
       u'min_weight', u'max_age', u'max_bun', u'max_creatinine', u'max_gcs',
       u'max_gender', u'max_glucose', u'max_hco3', u'max_hct', u'max_hr',
       u'max_height', u'max_icutype', u'max_k', u'max_mg', u'max_nidiasabp',
       u'max_nimap', u'max_nisysabp', 

In [10]:
# drop descriptor rows

wide_result.drop(['min_age','max_age','min_gender','max_gender','min_height','max_height'
                  ,'min_icutype','max_icutype','min_recordid','max_recordid'],axis=1,inplace=True)

In [11]:
wide_result.set_index(['recordid'],inplace = True)

In [26]:
wide_result

Unnamed: 0_level_0,age,mean_bun,mean_creatinine,mean_gcs,gender,mean_glucose,mean_hco3,mean_hct,mean_hr,height,...,max_nidiasabp,max_nimap,max_nisysabp,max_na,max_platelets,max_resprate,max_temp,max_urine,max_wbc,max_weight
recordid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
132539.0,54.0,10.5,0.75,14.923077,0.0,160.0,27.0,32.5,70.810811,-1.0,...,67.0,92.33,157.0,137.0,221.0,24.0,38.2,900.0,11.2,-1.0


In [27]:
main = pd.DataFrame()

In [30]:
main = main.append(wide_result)

In [31]:
main

Unnamed: 0_level_0,age,mean_bun,mean_creatinine,mean_gcs,gender,mean_glucose,mean_hco3,mean_hct,mean_hr,height,...,max_nidiasabp,max_nimap,max_nisysabp,max_na,max_platelets,max_resprate,max_temp,max_urine,max_wbc,max_weight
recordid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
132539.0,54.0,10.5,0.75,14.923077,0.0,160.0,27.0,32.5,70.810811,-1.0,...,67.0,92.33,157.0,137.0,221.0,24.0,38.2,900.0,11.2,-1.0


In [37]:
# Open each file as result
with open('./training_set_a/132599.txt') as inputfile:
        
    data = list(csv.reader(inputfile))                      # list of list
    data = pd.DataFrame(data[1:],columns=data[0])     # Convert list of list to DataFrame
    data.Value = data.Value.astype(float)                   # Change Value to float
        
        
        
        # Pivot_table to convert from long to wide dataset

        # Creation of new features - aggregate across the time series to find mean, min, max values
        # mean is chosen rather than median because we want to take into the account of 'outlier values'

    wide_data = pd.pivot_table(data,values=['Value'],columns='Parameter',aggfunc=[np.mean,np.min,np.max])
    wide_data.columns = wide_data.columns.droplevel(level=0)
        
        
        
        # rename new columns & lower capitalise
    new_columns = []

    for ind, col in enumerate(wide_data.columns):
    
        if ind < wide_data.columns.shape[0]/3:   
            col = 'mean_'+col            
            new_columns.append(col)

        elif ind >= wide_data.columns.shape[0]/3 and ind < 2*wide_data.columns.shape[0]/3:
            col = 'min_'+col
            new_columns.append(col)

        else:
            col = 'max_'+col
            new_columns.append(col)
        
    wide_data.columns = new_columns
    wide_data.columns = wide_data.columns.str.lower()
        
        
        # rename descriptor row
    wide_data.rename(columns={'mean_age':'age','mean_gender':'gender','mean_height':'height',
                                    'mean_icutype':'icutype','mean_recordid':'recordid'},inplace=True)
            
        # drop min/max descriptor rows
    wide_data.drop(['min_age','max_age','min_gender','max_gender','min_height','max_height',
                          'min_icutype','max_icutype','min_recordid','max_recordid'],axis=1,inplace=True)
        
        # set recordid as index
    wide_data.set_index(['recordid'],inplace = True)

In [38]:
main = main.append(wide_data)

In [39]:
main

Unnamed: 0_level_0,age,gender,height,icutype,max_albumin,max_alp,max_alt,max_ast,max_bilirubin,max_bun,...,min_ph,min_platelets,min_resprate,min_sao2,min_sysabp,min_temp,min_troponint,min_urine,min_wbc,min_weight
recordid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
132539.0,54.0,0.0,-1.0,4.0,,,,,,13.0,...,,185.0,12.0,,,35.1,,0.0,9.4,-1.0
132599.0,53.0,0.0,177.8,4.0,2.0,124.0,14.0,20.0,2.0,33.0,...,7.45,196.0,,98.0,86.0,36.2,0.02,45.0,9.4,73.5


In [41]:
for col in main.columns:
    print col

age
gender
height
icutype
max_albumin
max_alp
max_alt
max_ast
max_bilirubin
max_bun
max_creatinine
max_diasabp
max_fio2
max_gcs
max_glucose
max_hco3
max_hct
max_hr
max_k
max_lactate
max_map
max_mechvent
max_mg
max_na
max_nidiasabp
max_nimap
max_nisysabp
max_paco2
max_pao2
max_ph
max_platelets
max_resprate
max_sao2
max_sysabp
max_temp
max_troponint
max_urine
max_wbc
max_weight
mean_albumin
mean_alp
mean_alt
mean_ast
mean_bilirubin
mean_bun
mean_creatinine
mean_diasabp
mean_fio2
mean_gcs
mean_glucose
mean_hco3
mean_hct
mean_hr
mean_k
mean_lactate
mean_map
mean_mechvent
mean_mg
mean_na
mean_nidiasabp
mean_nimap
mean_nisysabp
mean_paco2
mean_pao2
mean_ph
mean_platelets
mean_resprate
mean_sao2
mean_sysabp
mean_temp
mean_troponint
mean_urine
mean_wbc
mean_weight
min_albumin
min_alp
min_alt
min_ast
min_bilirubin
min_bun
min_creatinine
min_diasabp
min_fio2
min_gcs
min_glucose
min_hco3
min_hct
min_hr
min_k
min_lactate
min_map
min_mechvent
min_mg
min_na
min_nidiasabp
min_nimap
min_nisysabp
min_p

In [14]:
#wide_result.reset_index(inplace=True)
#wide_result.drop('index',axis=1,inplace=True)

In [15]:
# Pivot_table to convert from long to wide dataset

#wide_result = pd.pivot_table(results,values=['Value'],columns='Parameter',index=['Time'])

#wide_result.columns = wide_result.columns.droplevel(level=0)
#wide_result.reset_index(inplace=True)

In [16]:
# Trying to convert time to an 'aggreable' data type

#def str_time2(time):
 #   hours, minutes = map(int, time.split(':'))
  #  time = (hours,minutes)
    
   # return time

#def str_time(time):
 #   hours, minutes = map(int, time.split(':'))
  #  time = time.format(int(hours),int(minutes))
   # return time

#for time in wide_result.index:
 #   hours, minutes = map(int, time.split(':'))
  #  time = (hours,minutes)
   # print time
    
#wide_result.Time = wide_result.Time.apply(str_time)

In [17]:
#class patient_details(object):
 #   """Run description of the patient when admitted on the 48th hour"""
    
  #  def __init__(self,df = wide_result):
         
   #     self.record_id = df[df.Time == '00:00']['RecordID'][0]
    #    self.age = df[df.Time == '00:00']['Age'][0]
     #   self.gender = df[df.Time == '00:00']['Gender'][0]
      #  self.height = df[df.Time == '00:00']['Height'][0]
       # self.ICUtype = df[df.Time == '00:00']['ICUType'][0]

        
        
  #  def fill(self,df = wide_result,details='RecordID'):
   #     """Filling of the NaN values with patient's details can be automated 
    #    by specifying the descriptor(column) in **kwargs """
     #   """Default set as RecordID"""
        
      #  wide_result[details].fillna(value=df[df.Time == '00:00'][details][0],inplace=True)
    

In [18]:
# Initiate the class patient_details
#patient = patient_details()

# Fill NaN values in respective descriptor columns
#patient.fill()
#patient.fill(details='Age')
#patient.fill(details='Gender')
#patient.fill(details='Height')
#patient.fill(details='ICUType')

In [19]:
# change all column names to lower key
#wide_result.columns = wide_result.columns.str.lower()

In [20]:
# Connect to database

conn = psycopg2.connect(host="localhost",dbname="mortality")
cur = conn.cursor()

## EDA

### 1. Check if the data is unbalanced

In [21]:
# Open outcomes file

with open('./training_outcomes_a.txt') as outcomefile:
    
    # Open file in list of list
    
    outcome = list(csv.reader(outcomefile))
    
outcome = pd.DataFrame(outcome[1:],columns=outcome[0])    # Convert list of list to DataFrame
outcome = outcome.astype(float,'ignore')    # Change values to float

In [22]:
# Count the number of positives in dataset
# Positives = 1 = Death, Negative = 0 = Survived

def imbalance_check(column,labels):
    """labels can be a list or a tuple."""
    
    for x in labels:
        label = float(column[column == x].count())
        total = float(column.count())
        
        percentage = float((label/total)*100)
        
        print 'percentage of',x,'in dataset:',percentage,'%'


In [23]:
imbalance_check(outcome['In-hospital_death'],[0,1])    # Conclude that this is an imbalanced dataset

percentage of 0 in dataset: 86.15 %
percentage of 1 in dataset: 13.85 %


### 2. Create outcomes table in database

In [24]:
outcome.head(5)

Unnamed: 0,RecordID,SAPS-I,SOFA,Length_of_stay,Survival,In-hospital_death
0,132539.0,6.0,1.0,5.0,-1.0,0.0
1,132540.0,16.0,8.0,8.0,-1.0,0.0
2,132541.0,21.0,11.0,19.0,-1.0,0.0
3,132543.0,7.0,1.0,9.0,575.0,0.0
4,132545.0,17.0,2.0,4.0,918.0,0.0


In [25]:
pd.to_sql()

AttributeError: 'module' object has no attribute 'to_sql'