## Data Preprocessing: Importing, Cleaning, and Feature Transformation

In [1]:
# Import the libraries needed

import pandas as pd
from ydata_profiling import ProfileReport
import numpy as np
import seaborn as sns
from sklearn.preprocessing import OneHotEncoder


  @nb.jit


In [2]:
# Read in the dataset
 
data=pd.read_csv("diabetic_data.csv")

In [3]:
# Replace "?" with NaN

# raw = raw.replace('?', np.nan)
data.replace('?', np.nan, inplace = True)

In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101766 entries, 0 to 101765
Data columns (total 50 columns):
 #   Column                    Non-Null Count   Dtype 
---  ------                    --------------   ----- 
 0   encounter_id              101766 non-null  int64 
 1   patient_nbr               101766 non-null  int64 
 2   race                      99493 non-null   object
 3   gender                    101766 non-null  object
 4   age                       101766 non-null  object
 5   weight                    3197 non-null    object
 6   admission_type_id         101766 non-null  int64 
 7   discharge_disposition_id  101766 non-null  int64 
 8   admission_source_id       101766 non-null  int64 
 9   time_in_hospital          101766 non-null  int64 
 10  payer_code                61510 non-null   object
 11  medical_specialty         51817 non-null   object
 12  num_lab_procedures        101766 non-null  int64 
 13  num_procedures            101766 non-null  int64 
 14  num_

In [5]:
# Remove unwanted attributes/columns

# Remove encounter_id, patient_nbr, weight, payer_code, medical specialty
data = data.drop(['encounter_id', 'patient_nbr', 'weight', 'payer_code', 'medical_specialty'], axis=1)

# Remove RACE from data - MAKING A SEPARATE COMMAND SO I CAN CHOOSE TO LEAVE 
# IT IN FOR FEATURE SELECTION ANALYSIS
data = data.drop(['race'], axis = 1)

# Remove drugs with 0 users
data = data.drop(['examide', 'citoglipton'], axis = 1)

# View results
#data

In [6]:
# Remove records of patients who died or were transferred to hospice

# REMOVE RECORDS OF PATIENTS WHO DIED OR WERE TRANSFERRED TO HOSPICE

# Remove records of patients (based on discharge_disposition_id) who died 
# (4 categories) or transferred to hospice (2 categories)

Removal_codes_discharge = [11, 13, 14, 19, 20, 21]

data = data[~data['discharge_disposition_id'].isin(Removal_codes_discharge)]

data.shape

(99343, 42)

In [7]:
# Change the non-drug attributes to categorical

#data['race'] = data['race'].astype('category')
data['gender'] = data['gender'].astype('category')
data['age'] = data['age'].astype('category')
data['admission_type_id'] = data['admission_type_id'].astype('category')
data['discharge_disposition_id'] = data['discharge_disposition_id'].astype('category')
data['admission_source_id'] = data['admission_source_id'].astype('category')

#data['medical_specialty'] = data['medical_specialty'].astype('category')
data['diag_1'] = data['diag_1'].astype('category')
data['diag_2'] = data['diag_2'].astype('category')
data['diag_3'] = data['diag_3'].astype('category')

#data['max_glu_serum'] = data['max_glu_serum'].astype('category')
#data['A1Cresult'] = data['A1Cresult'].astype('category')
data['change'] = data['change'].astype('category')
data['diabetesMed'] = data['diabetesMed'].astype('category')
data['readmitted'] = data['readmitted'].astype('category')

In [8]:
# MUST DO BEFORE CONVERTING DRUGS TO DRUG CLASSES
# Change drug attributes to categorical

data['metformin'] = data['metformin'].astype('category')
data['repaglinide'] = data['repaglinide'].astype('category')
data['nateglinide'] = data['nateglinide'].astype('category')
data['chlorpropamide'] = data['chlorpropamide'].astype('category')
data['glimepiride'] = data['glimepiride'].astype('category')
data['acetohexamide'] = data['acetohexamide'].astype('category')
data['glipizide'] = data['glipizide'].astype('category')
data['glyburide'] = data['glyburide'].astype('category')
data['tolbutamide'] = data['tolbutamide'].astype('category')
data['pioglitazone'] = data['pioglitazone'].astype('category')
data['rosiglitazone'] = data['rosiglitazone'].astype('category')
data['acarbose'] = data['acarbose'].astype('category')
data['miglitol'] = data['miglitol'].astype('category')
data['troglitazone'] = data['troglitazone'].astype('category')
data['tolazamide'] = data['tolazamide'].astype('category')
data['insulin'] = data['insulin'].astype('category')
data['glyburide-metformin'] = data['glyburide-metformin'].astype('category')
data['glipizide-metformin'] = data['glipizide-metformin'].astype('category')
data['glimepiride-pioglitazone'] = data['glimepiride-pioglitazone'].astype('category')
data['metformin-rosiglitazone'] = data['metformin-rosiglitazone'].astype('category')
data['metformin-pioglitazone'] = data['metformin-pioglitazone'].astype('category')

## Converting Drugs into Drug Classes

In [9]:
# *** CONVERTING DRUGS INTO DRUG CLASSES ***
#  MEGLITINIDES

# using a function to check values
# Create a function that will transfer values from drugs into one drug class
def classify_drug(row):
    if (row['repaglinide'] == 'Down' or row['repaglinide'] == 'Up' 
        or row['nateglinide'] == 'Down' or row['nateglinide'] == 'Up'):
        return 'Adjusted'
    elif row['repaglinide'] == 'Steady' or row['nateglinide'] == 'Steady':
        return 'Steady'
    else:
        return 'No'

# creating a new column for the drug class and creating values with the function    

data['Meglitinides'] = data.apply(classify_drug, axis = 1)





In [10]:
# *** CONVERTING DRUGS INTO DRUG CLASSES ***
#  THIAZOLIDINEDIONES

# using a function to check values
# Create a function that will transfer values from drugs into one drug class
def classify_drug(row):
    if (row['pioglitazone'] == 'Down' or row['pioglitazone'] == 'Up' or 
        row['rosiglitazone'] == 'Down' or row['rosiglitazone'] == 'Up' or 
        row['troglitazone'] == 'Down' or row['troglitazone'] == 'Up'):
        return 'Adjusted'
    elif (row['pioglitazone'] == 'Steady' or row['rosiglitazone'] == 'Steady' 
          or row['troglitazone'] == 'Steady'):
        return 'Steady'
    else:
        return 'No'

# creating a new column for the drug class and creating values with the function    

data["Thiazolidinediones"] = data.apply(classify_drug, axis = 1)





In [11]:
# *** CONVERTING DRUGS INTO DRUG CLASSES ***
#  SULFONYLUREAS

# using a function to check values
# Create a function that will transfer values from drugs into one drug class
def classify_drug(row):
    if (row['chlorpropamide'] == 'Down' or row['chlorpropamide'] == 'Up' or 
        row['glimepiride'] == 'Down' or row['glimepiride'] == 'Up' or
        row['acetohexamide'] == 'Down' or row['acetohexamide'] == 'Up' or
        row['glipizide'] == 'Down' or row['glipizide'] == 'Up' or
        row['glyburide'] == 'Down' or row['glyburide'] == 'Up'or
        row['tolbutamide'] == 'Down' or row['tolbutamide'] == 'Up' or
        row['tolazamide'] == 'Down' or row['tolazamide'] == 'Up'):
        return 'Adjusted'
    elif (row['chlorpropamide'] == 'Steady' or row['glimepiride'] == 'Steady'
          or row['acetohexamide'] == 'Steady'or row['glipizide'] == 'Steady'
          or row['glyburide'] == 'Steady'or row['tolbutamide'] == 'Steady'
          or row['tolazamide'] == 'Steady'):
        return 'Steady'
    else:
        return 'No'

# creating a new column for the drug class and creating values with the function    

data['Sulfonylureas'] = data.apply(classify_drug, axis = 1)





In [12]:
# *** CONVERTING DRUGS INTO DRUG CLASSES ***
#  ALPHA-GLUCOSIDASE INHIBITORS

# using a function to check values
# Create a function that will transfer values from drugs into one drug class

def classify_drug(row):
    if row['acarbose'] == 'Down' or row['acarbose'] == 'Up' or row['miglitol'] == 'Down' or row['nateglinide'] == 'Up':
        return 'Adjusted'
    elif row['acarbose'] == 'Steady' or row['nateglinide'] == 'Steady':
        return 'Steady'
    else:
        return 'No'

# creating a new column for the drug class and creating values with the function    

data['AG_Inhibitors'] = data.apply(classify_drug, axis = 1)





In [13]:
# *** CONVERTING DRUGS INTO DRUG CLASSES ***
#  METFORMIN

# using a function to check values
# Create a function that will transfer values from drugs into one drug class
def classify_drug(row):
    if row['metformin'] == 'Down' or row['metformin'] == 'Up':
        return 'Adjusted'
    elif row['metformin'] == 'Steady':
        return 'Steady'
    else:
        return 'No'

# creating a new column for the drug class and creating values with the function    

data['Metformin'] = data.apply(classify_drug, axis = 1)





In [14]:
# *** CONVERTING DRUGS INTO DRUG CLASSES ***
#  INSULIN

# using a function to check values
# Create a function that will transfer values from drugs into one drug class
def classify_drug(row):
    if row['insulin'] == 'Down' or row['insulin'] == 'Up':
        return 'Adjusted'
    elif row['insulin'] == 'Steady':
        return 'Steady'
    else:
        return 'No'

# creating a new column for the drug class and creating values with the function    

data['Insulin'] = data.apply(classify_drug, axis = 1)





In [15]:
# **** CONVERTING COMBO-DRUGS INTO DRUG CLASSES ****
# GLYBURIDE-METFORMIN

# Create a function that will transfer values from combo drugs into two drug classes

def classify_drug(row):
    if row['glyburide-metformin'] == 'Down' or row['glyburide-metformin'] == 'Up':
        return 'Adjusted'
    elif row['glyburide-metformin'] == 'Steady':
        return 'Steady'
    else:
        return 'No'

# creating a new column for the drug class and creating values with the function    

data['Sulfonylureas'] = data.apply(classify_drug, axis = 1)
data['Metformin'] = data.apply(classify_drug, axis = 1)



In [16]:
# **** CONVERTING COMBO-DRUGS INTO DRUG CLASSES ****
# GLIPIZIDE-METFORMIN

# Create a function that will transfer values from combo drugs into two drug classes

def classify_drug(row):
    if row['glipizide-metformin'] == 'Down' or row['glipizide-metformin'] == 'Up':
        return 'Adjusted'
    elif row['glipizide-metformin'] == 'Steady':
        return 'Steady'
    else:
        return 'No'

# creating a new column for the drug class and creating values with the function    

data['Sulfonylureas'] = data.apply(classify_drug, axis = 1)
data['Metformin'] = data.apply(classify_drug, axis = 1)



In [17]:
# **** CONVERTING COMBO-DRUGS INTO DRUG CLASSES ****
# GLIMEPIRIDE-PIOGLITAZONE

# Create a function that will transfer values from combo drugs into two drug classes

def classify_drug(row):
    if row['glimepiride-pioglitazone'] == 'Down' or row['glimepiride-pioglitazone'] == 'Up':
        return 'Adjusted'
    elif row['glimepiride-pioglitazone'] == 'Steady':
        return 'Steady'
    else:
        return 'No'

# creating a new column for the drug class and creating values with the function    

data['Sulfonylureas'] = data.apply(classify_drug, axis = 1)
data['Thiazolidinediones'] = data.apply(classify_drug, axis = 1)



In [18]:
# **** CONVERTING COMBO-DRUGS INTO DRUG CLASSES ****
# METFORMIN-PIOGLITAZONE

# Create a function that will transfer values from combo drugs into two drug classes

def classify_drug(row):
    if row['metformin-pioglitazone'] == 'Down' or row['metformin-pioglitazone'] == 'Up':
        return 'Adjusted'
    elif row['metformin-pioglitazone'] == 'Steady':
        return 'Steady'
    else:
        return 'No'

# creating a new column for the drug class and creating values with the function    

data['Metformin'] = data.apply(classify_drug, axis = 1)
data['Thiazolidinediones'] = data.apply(classify_drug, axis = 1)



In [19]:
# **** CONVERTING COMBO-DRUGS INTO DRUG CLASSES ****
# METFORMIN-ROSIGLITAZONE

# Create a function that will transfer values from combo drugs into two drug classes

def classify_drug(row):
    if row['metformin-rosiglitazone'] == 'Down' or row['metformin-rosiglitazone'] == 'Up':
        return 'Adjusted'
    elif row['metformin-rosiglitazone'] == 'Steady':
        return 'Steady'
    else:
        return 'No'

# creating a new column for the drug class and creating values with the function    

data['Metformin'] = data.apply(classify_drug, axis = 1)
data['Thiazolidinediones'] = data.apply(classify_drug, axis = 1)



In [20]:
# REMOVE DRUGS AND LEAVE DRUG CLASSES BEHIND

data = data.drop(['repaglinide', 'nateglinide'], axis=1)
data = data.drop(['pioglitazone', 'rosiglitazone', 'troglitazone'], axis=1)
data = data.drop(['chlorpropamide', 'glimepiride', 'acetohexamide', 'glipizide',
                  'glyburide', 'tolbutamide', 'tolazamide'], axis=1)
data = data.drop(['acarbose', 'miglitol'], axis=1)
data = data.drop(['glyburide-metformin', 'glipizide-metformin',
                 'glimepiride-pioglitazone', 'metformin-rosiglitazone',
                 'metformin-pioglitazone'], axis=1)
data = data.drop(['metformin'], axis = 1)
data = data.drop(['insulin'], axis = 1)

In [21]:
# CHANGE DRUG CLASSES TO CATEGORICAL

data['Meglitinides'] = data['Meglitinides'].astype('category')
data['Thiazolidinediones'] = data['Thiazolidinediones'].astype('category')
data['Sulfonylureas'] = data['Sulfonylureas'].astype('category')
data['AG_Inhibitors'] = data['AG_Inhibitors'].astype('category')
data['Metformin'] = data['Metformin'].astype('category')
data['Insulin'] = data['Insulin'].astype('category')


In [22]:
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 99343 entries, 0 to 101765
Data columns (total 27 columns):
 #   Column                    Non-Null Count  Dtype   
---  ------                    --------------  -----   
 0   gender                    99343 non-null  category
 1   age                       99343 non-null  category
 2   admission_type_id         99343 non-null  category
 3   discharge_disposition_id  99343 non-null  category
 4   admission_source_id       99343 non-null  category
 5   time_in_hospital          99343 non-null  int64   
 6   num_lab_procedures        99343 non-null  int64   
 7   num_procedures            99343 non-null  int64   
 8   num_medications           99343 non-null  int64   
 9   number_outpatient         99343 non-null  int64   
 10  number_emergency          99343 non-null  int64   
 11  number_inpatient          99343 non-null  int64   
 12  diag_1                    99323 non-null  category
 13  diag_2                    98987 non-null  category

## Aggregating Values

In [23]:
# REPLACING VALUES IN ADMISSION_TYPE TO MISSING
# replacing "Newborn, Not Available, NULL and Not Mapped" with 'Missing'
# replacing codes to more readable labels

print(data.admission_type_id.value_counts())

admission_type_map = {1:'Emergency', 2: 'Urgent', 3:'Elective', 4: 'Missing',
                      5: 'Missing', 6: 'Missing', 7: 'Trauma Centre', 8 : 'Missing'}

# map method in Pandas overwrites all categories.  MUST use a complete dictionary

print("\n")

data['admission_type_id'] = data['admission_type_id'].map(admission_type_map)


print(data.admission_type_id.value_counts())

admission_type_id
1    52371
3    18668
2    18132
6     5207
5     4617
8      320
7       18
4       10
Name: count, dtype: int64


admission_type_id
Emergency        52371
Elective         18668
Urgent           18132
Missing          10154
Trauma Centre       18
Name: count, dtype: int64


In [24]:
# REPLACE DATA IN ADMISSION_SOURCE TO MISSING
# ********* DUE TO VALUE TYPE, MISSING WILL BE CODED 999 ***************
# replacing "Not Available x 2, NULL, Not Mapped, Unknown/Invalid"
# Sick Baby, Extramural Birth due to incorrect coding

data['admission_source_id'] = data['admission_source_id'].replace(9, 999)
data['admission_source_id'] = data['admission_source_id'].replace(13, 999)
data['admission_source_id'] = data['admission_source_id'].replace(14, 999)
data['admission_source_id'] = data['admission_source_id'].replace(15, 999)
data['admission_source_id'] = data['admission_source_id'].replace(17, 999)
data['admission_source_id'] = data['admission_source_id'].replace(20, 999)
data['admission_source_id'] = data['admission_source_id'].replace(21, 999)

print(data['admission_source_id'].value_counts())

admission_source_id
7      55850
1      29168
999     6857
4       3118
6       2239
2       1081
5        806
3        185
8         15
22        12
10         8
11         2
25         2
Name: count, dtype: int64


## Replacing Values / Correcting Errors

In [25]:
# REPLACING VALUES IN DISCHARGE DISPOSITION TO MISSING
# ********* DUE TO VALUE TYPE, MISSING WILL BE CODED 999 ***************
# replacing "Neonate discharged" to "Missing" due to entry error
# replacing "NULL, Not Mapped, "Unknown/Invalid" to "Missing"

data['discharge_disposition_id'] = data['discharge_disposition_id'].replace(10, 999)
data['discharge_disposition_id'] = data['discharge_disposition_id'].replace(18, 999)
data['discharge_disposition_id'] = data['discharge_disposition_id'].replace(25, 999)
data['discharge_disposition_id'] = data['discharge_disposition_id'].replace(26, 999)

# can't use map method because it will overwrite all the values

data['discharge_disposition_id'].value_counts()

discharge_disposition_id
1      60234
3      13954
6      12902
999     4686
2       2128
22      1993
5       1184
4        815
7        623
23       412
28       139
8        108
15        63
24        48
9         21
17        14
16        11
27         5
12         3
Name: count, dtype: int64

In [26]:
# Check which records are coded as "Normal Delivery"

data[data['admission_source_id'] == 11]

# The record for the patient '70-80' is clearly an error

Unnamed: 0,gender,age,admission_type_id,discharge_disposition_id,admission_source_id,time_in_hospital,num_lab_procedures,num_procedures,num_medications,number_outpatient,...,A1Cresult,change,diabetesMed,readmitted,Meglitinides,Thiazolidinediones,Sulfonylureas,AG_Inhibitors,Metformin,Insulin
83510,Female,[20-30),Emergency,1,11,3,1,2,4,0,...,,No,Yes,NO,No,No,No,No,No,Steady
85588,Female,[70-80),Elective,1,11,2,10,6,19,1,...,,Ch,Yes,NO,No,No,No,No,No,Steady


In [27]:
# VIEWING THE INCORRECT RECORD
# Need to re-code the incorrect file admission_source 
# from "Normal Delivery" to "Missing"
# Correction done in next cell

data.iloc[85588]

gender                         Female
age                           [60-70)
admission_type_id           Emergency
discharge_disposition_id            3
admission_source_id                 7
time_in_hospital                    4
num_lab_procedures                 46
num_procedures                      1
num_medications                     8
number_outpatient                   0
number_emergency                    0
number_inpatient                    2
diag_1                            518
diag_2                            295
diag_3                            276
number_diagnoses                    9
max_glu_serum                     NaN
A1Cresult                         NaN
change                             No
diabetesMed                        No
readmitted                         NO
Meglitinides                       No
Thiazolidinediones                 No
Sulfonylureas                      No
AG_Inhibitors                      No
Metformin                          No
Insulin     

In [28]:
# replacing file 85588 due to incorrect coding

data.at[85588, 'admission_source_id'] = 999

data.iloc[85588]

gender                         Female
age                           [60-70)
admission_type_id           Emergency
discharge_disposition_id            3
admission_source_id                 7
time_in_hospital                    4
num_lab_procedures                 46
num_procedures                      1
num_medications                     8
number_outpatient                   0
number_emergency                    0
number_inpatient                    2
diag_1                            518
diag_2                            295
diag_3                            276
number_diagnoses                    9
max_glu_serum                     NaN
A1Cresult                         NaN
change                             No
diabetesMed                        No
readmitted                         NO
Meglitinides                       No
Thiazolidinediones                 No
Sulfonylureas                      No
AG_Inhibitors                      No
Metformin                          No
Insulin     

In [29]:
# REPLACING VALUES

# gender - replace "Unknown/Invalid" with "missing"

print(data.gender.value_counts())

data['gender'] = data['gender'].replace("Unknown/Invalid", "Missing")

print('\nReplacement done.\n')

print(data.gender.value_counts())

gender
Female             53454
Male               45886
Unknown/Invalid        3
Name: count, dtype: int64

Replacement done.

gender
Female     53454
Male       45886
Missing        3
Name: count, dtype: int64


In [30]:
# RECODING VALUES IN ADMISSION_SOURCE COLUMN TO NEW ATTRIBUTE
# 'Other' category includes 15 records 'Court/Law Enforcement'
# and 1 record 'Normal Delivery'

# Map for original values to new category values

conditions = [(data['admission_source_id'].isin([1, 2, 3])),
              (data['admission_source_id'].isin([4, 5, 6, 10, 18, 19, 22, 25])),
              (data['admission_source_id'].isin([7])),
              (data['admission_source_id'].isin([999])),
              (data['admission_source_id'].isin([8, 11]))]

# Names of new categories in order that matches the above conditions

new_labels = ['Referral', 'Transfer', 'Emerg_Dept', 'Missing', 'Other']

# Use Numpy Select to create a new column with the mapped categories

data['Admission_Source'] = np.select(conditions, new_labels, default = 'Unknown')

print(data)


        gender      age admission_type_id discharge_disposition_id  \
0       Female   [0-10)           Missing                      999   
1       Female  [10-20)         Emergency                        1   
2       Female  [20-30)         Emergency                        1   
3         Male  [30-40)         Emergency                        1   
4         Male  [40-50)         Emergency                        1   
...        ...      ...               ...                      ...   
101761    Male  [70-80)         Emergency                        3   
101762  Female  [80-90)         Emergency                        4   
101763    Male  [70-80)         Emergency                        1   
101764  Female  [80-90)            Urgent                        3   
101765    Male  [70-80)         Emergency                        1   

       admission_source_id  time_in_hospital  num_lab_procedures  \
0                        1                 1                  41   
1                      

In [31]:
# VERIFYING ADMISSION_SOURCE WAS CORRECTLY RELABELED

print(data['Admission_Source'].unique())
print(data['Admission_Source'].value_counts())

['Referral' 'Emerg_Dept' 'Transfer' 'Missing' 'Other']
Admission_Source
Emerg_Dept    55850
Referral      30434
Missing        6858
Transfer       6185
Other            16
Name: count, dtype: int64


In [32]:
# REMOVE ORIGINAL DISCHARGE ATTRIBUTE 

data = data.drop(['admission_source_id'], axis=1)
#data('Admission_Source').astype('category')
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 99343 entries, 0 to 101765
Data columns (total 27 columns):
 #   Column                    Non-Null Count  Dtype   
---  ------                    --------------  -----   
 0   gender                    99343 non-null  category
 1   age                       99343 non-null  category
 2   admission_type_id         99343 non-null  object  
 3   discharge_disposition_id  99343 non-null  category
 4   time_in_hospital          99343 non-null  int64   
 5   num_lab_procedures        99343 non-null  int64   
 6   num_procedures            99343 non-null  int64   
 7   num_medications           99343 non-null  int64   
 8   number_outpatient         99343 non-null  int64   
 9   number_emergency          99343 non-null  int64   
 10  number_inpatient          99343 non-null  int64   
 11  diag_1                    99323 non-null  category
 12  diag_2                    98987 non-null  category
 13  diag_3                    97924 non-null  category

In [33]:
## CLEANING UP data - THIS SHOULD BE IN THE EARLIER CODE !!!

#data = data.drop(['metformin', 'insulin'], axis = 1)
data['admission_type_id'] = data['admission_type_id'].astype('category')
data['Admission_Source'] = data['Admission_Source'].astype('category')

## Generate Exploratory Data Analysis & Export Data

In [34]:
# FINAL STEP - Run the EDA report

profile = ProfileReport(data, title="Diabetic Data EDA Profile Report Nov 1st_2")
profile.to_file("EDA_report_Nov01.html")

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

In [35]:
# Pass data to next module

data.to_csv('Mod_1_data_to_pass.csv', index = False)