## Connecting to SQL Database and Loading data table in as dataframe


In [77]:
# Import Dependencies
import numpy as np
import matplotlib.pyplot as plt
from google.colab import files
import io
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

from pathlib import Path
from collections import Counter

In [78]:
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.linear_model import LogisticRegression
from imblearn.over_sampling import SMOTE

In [79]:
# Installing packages to use postgresql
!pip install ipython-sql
!pip install sqlalchemy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [80]:
# dependencies necessary for connecting to sql database
import os
from sqlalchemy import create_engine
from config import db_password

In [81]:
db_string = f"postgresql://root:{db_password}@unc-capstone-db.chbhjul7q0jr.us-east-2.rds.amazonaws.com/cleaning_database_beta"

In [82]:
engine = create_engine(db_string)

In [83]:
!pip install ipython-sql

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [84]:
%load_ext sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [85]:
%sql postgresql://root:{db_password}@unc-capstone-db.chbhjul7q0jr.us-east-2.rds.amazonaws.com/cleaning_database_beta

'Connected: root@cleaning_database_beta'

In [86]:
%%sql 
SELECT * FROM updated_animal_data1 LIMIT 5

 * postgresql://root:***@unc-capstone-db.chbhjul7q0jr.us-east-2.rds.amazonaws.com/cleaning_database_beta
5 rows affected.


animal_id,state,age,sex,animal_type,breed_class,color,weight_lbs,temperature,heart_rate_bpm,resp_rate_bpm,mm,crt,mentation,vomiting,diarrhea,inappetence,lethargic,lameness,muscle_pain,joint_swelling,reported_weight_loss,skin_condition,is_4dx_tested
A803419,TX,2 years,Intact Male,Dog,Small,Black/White,7,101,146,137,Pale,UTO,BAR,,,Mild,Moderate,,Mild,,,Normal,Not Tested
A800463,TX,7 months,Intact Female,Dog,Medium,Tan/Black,45,102,116,38,Light Pink,>2 sec,Dull/Depressed,Mild,Mild,Mild,,,,,,Irritation,Not Tested
A803414,TX,2 years,Intact Female,Dog,Large,Black/Tricolor,63,102,161,44,Pink,1-2 sec,BAR,Mild,,,Moderate,,,,,Normal,Not Tested
A803417,TX,1 month,Intact Female,Dog,Medium,Brown/Tan,39,101,129,49,Pink,1-2 sec,BAR,Mild,Mild,,Moderate,Present,,,,Bruising,Not Tested
A803412,TX,5 months,Intact Male,Dog,Small,Brown/Black,21,102,165,128,Pale,UTO,QAR,,,,Moderate,,,,,Normal,Not Tested


In [87]:
# Convert sql data table into pandas dataframe
animal_Data_df = pd.read_sql('SELECT * FROM updated_animal_data1', engine)
animal_Data_df.head()

Unnamed: 0,animal_id,state,age,sex,animal_type,breed_class,color,weight_lbs,temperature,heart_rate_bpm,...,vomiting,diarrhea,inappetence,lethargic,lameness,muscle_pain,joint_swelling,reported_weight_loss,skin_condition,is_4dx_tested
0,A803419,TX,2 years,Intact Male,Dog,Small,Black/White,7,101,146,...,,,Mild,Moderate,,Mild,,,Normal,Not Tested
1,A800463,TX,7 months,Intact Female,Dog,Medium,Tan/Black,45,102,116,...,Mild,Mild,Mild,,,,,,Irritation,Not Tested
2,A803414,TX,2 years,Intact Female,Dog,Large,Black/Tricolor,63,102,161,...,Mild,,,Moderate,,,,,Normal,Not Tested
3,A803417,TX,1 month,Intact Female,Dog,Medium,Brown/Tan,39,101,129,...,Mild,Mild,,Moderate,Present,,,,Bruising,Not Tested
4,A803412,TX,5 months,Intact Male,Dog,Small,Brown/Black,21,102,165,...,,,,Moderate,,,,,Normal,Not Tested


In [88]:
# checking columns and data types
animal_Data_df.dtypes

animal_id               object
state                   object
age                     object
sex                     object
animal_type             object
breed_class             object
color                   object
weight_lbs               int64
temperature              int64
heart_rate_bpm           int64
resp_rate_bpm           object
mm                      object
crt                     object
mentation               object
vomiting                object
diarrhea                object
inappetence             object
lethargic               object
lameness                object
muscle_pain             object
joint_swelling          object
reported_weight_loss    object
skin_condition          object
is_4dx_tested           object
dtype: object

## Pre-processing data for supervised learning

In [89]:
# Removing columns that we do not need for model
u1_animalData_df = animal_Data_df.drop(['animal_id', 'state', 'sex', 'animal_type', 'breed_class', 'color'], axis=1)
print(u1_animalData_df.shape[0])
u1_animalData_df.head()

49509


Unnamed: 0,age,weight_lbs,temperature,heart_rate_bpm,resp_rate_bpm,mm,crt,mentation,vomiting,diarrhea,inappetence,lethargic,lameness,muscle_pain,joint_swelling,reported_weight_loss,skin_condition,is_4dx_tested
0,2 years,7,101,146,137,Pale,UTO,BAR,,,Mild,Moderate,,Mild,,,Normal,Not Tested
1,7 months,45,102,116,38,Light Pink,>2 sec,Dull/Depressed,Mild,Mild,Mild,,,,,,Irritation,Not Tested
2,2 years,63,102,161,44,Pink,1-2 sec,BAR,Mild,,,Moderate,,,,,Normal,Not Tested
3,1 month,39,101,129,49,Pink,1-2 sec,BAR,Mild,Mild,,Moderate,Present,,,,Bruising,Not Tested
4,5 months,21,102,165,128,Pale,UTO,QAR,,,,Moderate,,,,,Normal,Not Tested


In [90]:
# Remove the `Not Tested` 4Dx status
not_Tested_mask = u1_animalData_df['is_4dx_tested'] != 'Not Tested'
tested_df = u1_animalData_df.loc[not_Tested_mask]

print(tested_df.shape[0])
print(tested_df.columns)
tested_df.head(10)

16979
Index(['age', 'weight_lbs', 'temperature', 'heart_rate_bpm', 'resp_rate_bpm',
       'mm', 'crt', 'mentation', 'vomiting', 'diarrhea', 'inappetence',
       'lethargic', 'lameness', 'muscle_pain', 'joint_swelling',
       'reported_weight_loss', 'skin_condition', 'is_4dx_tested'],
      dtype='object')


Unnamed: 0,age,weight_lbs,temperature,heart_rate_bpm,resp_rate_bpm,mm,crt,mentation,vomiting,diarrhea,inappetence,lethargic,lameness,muscle_pain,joint_swelling,reported_weight_loss,skin_condition,is_4dx_tested
6,1 year,15,102,183,44,Pink,1-2 sec,QAR,Mild,Mild,,,,,,,Normal,Negative
8,2 years,72,100,138,21,Pink,>2 sec,QAR,Mild,,,,,,,Present,Normal,Negative
10,2 days,63,102,160,50,Pink,1-2 sec,QAR,,,Mild,,,,,,Normal,Negative
11,2 days,59,102,157,18,Pink,>2 sec,Dull/Depressed,Mild,Mild,Moderate,,,,Mild,,Normal,Negative
19,6 years,42,104,110,40,Light Pink,1-2 sec,Dull/Depressed,,Moderate,Mild,,,Mild,Mild,,Normal,Positive
20,2 months,90,100,161,131,Pink,>2 sec,Anxious/Agitated,,,,,,,,,Normal,Negative
24,4 years,19,102,88,150,Pink,<1 sec,Dull/Depressed,,,Moderate,,,,,,Bruising,Negative
25,2 years,71,102,147,39,Pink,<1 sec,BAR,Mild,,Mild,Mild,,,,,Normal,Negative
32,4 months,47,99,93,39,Pink,1-2 sec,QAR,,Mild,Mild,,,Mild,,,Normal,Negative
35,9 years,69,105,178,145,Pink,1-2 sec,QAR,Mild,,Mild,Mild,,,Mild,Present,Normal,Positive


In [91]:
# Checking the values in the RR column - before converting to numerical
tested_df['resp_rate_bpm'].unique()

array(['44', '21', '50', '18', '40', '131', '150', '39', '145', '141',
       '29', '31', '52', '32', '60', '24', '19', '33', '57', '121', '123',
       '42', '59', '55', '25', '58', '48', '126', '49', '56', '144', '53',
       '138', '22', '139', '20', '148', '122', '120', '41', '37', '129',
       '128', '46', '125', '38', '27', '34', '142', '54', '136', '28',
       '147', '36', '137', '26', '47', '43', '146', '45', '30', '124',
       '133', '127', '143', '35', '23', '51', '149', '135', '134', '130',
       '132', '140'], dtype=object)

In [92]:
# Converting RR column to integer
tested_df['resp_rate_bpm'] = tested_df['resp_rate_bpm'].astype('int')

tested_df.dtypes

age                     object
weight_lbs               int64
temperature              int64
heart_rate_bpm           int64
resp_rate_bpm            int64
mm                      object
crt                     object
mentation               object
vomiting                object
diarrhea                object
inappetence             object
lethargic               object
lameness                object
muscle_pain             object
joint_swelling          object
reported_weight_loss    object
skin_condition          object
is_4dx_tested           object
dtype: object

In [93]:
# Converting the age column to just numbers - years
# First, splitting the column into two new colummns - one for number and the other for the string (years, months, etc.)
tested_df[['age_num', 'age_str']] = tested_df['age'].apply(lambda x: pd.Series(str(x).split(" ")))

# Setting the original age column equal to the age_num column - i.e. changing the data in the age column to just the numbers
tested_df['age'] = tested_df['age_num']

# Changing the data type of the age column to float
tested_df['age'] = tested_df['age'].astype('float')

In [94]:
# Checking the column to see what the unique string values are
tested_df["age_str"].unique()

array(['year', 'years', 'days', 'months', 'month', 'weeks', 'week', 'day'],
      dtype=object)

In [95]:
# Using the string column to change the number value - converting all ages to years
tested_df.loc[tested_df.age_str == 'days', ['age']] = tested_df['age'] / 365
tested_df.loc[tested_df.age_str == 'day', ['age']] = tested_df['age'] / 365
tested_df.loc[tested_df.age_str == 'months', ['age']] = tested_df['age'] / 12
tested_df.loc[tested_df.age_str == 'month', ['age']] = tested_df['age'] / 12
tested_df.loc[tested_df.age_str == 'weeks', ['age']] = tested_df['age'] / 52
tested_df.loc[tested_df.age_str == 'week', ['age']] = tested_df['age'] / 52

print(tested_df.shape)
tested_df.head()

(16979, 20)


Unnamed: 0,age,weight_lbs,temperature,heart_rate_bpm,resp_rate_bpm,mm,crt,mentation,vomiting,diarrhea,inappetence,lethargic,lameness,muscle_pain,joint_swelling,reported_weight_loss,skin_condition,is_4dx_tested,age_num,age_str
6,1.0,15,102,183,44,Pink,1-2 sec,QAR,Mild,Mild,,,,,,,Normal,Negative,1,year
8,2.0,72,100,138,21,Pink,>2 sec,QAR,Mild,,,,,,,Present,Normal,Negative,2,years
10,0.005479,63,102,160,50,Pink,1-2 sec,QAR,,,Mild,,,,,,Normal,Negative,2,days
11,0.005479,59,102,157,18,Pink,>2 sec,Dull/Depressed,Mild,Mild,Moderate,,,,Mild,,Normal,Negative,2,days
19,6.0,42,104,110,40,Light Pink,1-2 sec,Dull/Depressed,,Moderate,Mild,,,Mild,Mild,,Normal,Positive,6,years


In [96]:
# Dropping the extra (created) columns - age_num and age_str
final_animalData_df = tested_df.drop(['age_str', 'age_num'], axis=1)
print(final_animalData_df.columns)
print(final_animalData_df.dtypes)
print(final_animalData_df.shape)
final_animalData_df.head()

Index(['age', 'weight_lbs', 'temperature', 'heart_rate_bpm', 'resp_rate_bpm',
       'mm', 'crt', 'mentation', 'vomiting', 'diarrhea', 'inappetence',
       'lethargic', 'lameness', 'muscle_pain', 'joint_swelling',
       'reported_weight_loss', 'skin_condition', 'is_4dx_tested'],
      dtype='object')
age                     float64
weight_lbs                int64
temperature               int64
heart_rate_bpm            int64
resp_rate_bpm             int64
mm                       object
crt                      object
mentation                object
vomiting                 object
diarrhea                 object
inappetence              object
lethargic                object
lameness                 object
muscle_pain              object
joint_swelling           object
reported_weight_loss     object
skin_condition           object
is_4dx_tested            object
dtype: object
(16979, 18)


Unnamed: 0,age,weight_lbs,temperature,heart_rate_bpm,resp_rate_bpm,mm,crt,mentation,vomiting,diarrhea,inappetence,lethargic,lameness,muscle_pain,joint_swelling,reported_weight_loss,skin_condition,is_4dx_tested
6,1.0,15,102,183,44,Pink,1-2 sec,QAR,Mild,Mild,,,,,,,Normal,Negative
8,2.0,72,100,138,21,Pink,>2 sec,QAR,Mild,,,,,,,Present,Normal,Negative
10,0.005479,63,102,160,50,Pink,1-2 sec,QAR,,,Mild,,,,,,Normal,Negative
11,0.005479,59,102,157,18,Pink,>2 sec,Dull/Depressed,Mild,Mild,Moderate,,,,Mild,,Normal,Negative
19,6.0,42,104,110,40,Light Pink,1-2 sec,Dull/Depressed,,Moderate,Mild,,,Mild,Mild,,Normal,Positive


## Defining our Target

In [97]:
# Creating features
X = final_animalData_df.drop(columns='is_4dx_tested')
X = pd.get_dummies(X)

# Target
y = final_animalData_df['is_4dx_tested']

X.head()

Unnamed: 0,age,weight_lbs,temperature,heart_rate_bpm,resp_rate_bpm,mm_Light Pink,mm_Pale,mm_Pink,crt_1-2 sec,crt_<1 sec,...,joint_swelling_Mild,joint_swelling_Moderate,joint_swelling_None,reported_weight_loss_None,reported_weight_loss_Present,skin_condition_Bruising,skin_condition_Irritation,skin_condition_Normal,skin_condition_Petechia,skin_condition_Petechiae
6,1.0,15,102,183,44,0,0,1,1,0,...,0,0,1,1,0,0,0,1,0,0
8,2.0,72,100,138,21,0,0,1,0,0,...,0,0,1,0,1,0,0,1,0,0
10,0.005479,63,102,160,50,0,0,1,1,0,...,0,0,1,1,0,0,0,1,0,0
11,0.005479,59,102,157,18,0,0,1,0,0,...,1,0,0,1,0,0,0,1,0,0
19,6.0,42,104,110,40,1,0,0,1,0,...,1,0,0,1,0,0,0,1,0,0


In [98]:
X.describe()

Unnamed: 0,age,weight_lbs,temperature,heart_rate_bpm,resp_rate_bpm,mm_Light Pink,mm_Pale,mm_Pink,crt_1-2 sec,crt_<1 sec,...,joint_swelling_Mild,joint_swelling_Moderate,joint_swelling_None,reported_weight_loss_None,reported_weight_loss_Present,skin_condition_Bruising,skin_condition_Irritation,skin_condition_Normal,skin_condition_Petechia,skin_condition_Petechiae
count,16979.0,16979.0,16979.0,16979.0,16979.0,16979.0,16979.0,16979.0,16979.0,16979.0,...,16979.0,16979.0,16979.0,16979.0,16979.0,16979.0,16979.0,16979.0,16979.0,16979.0
mean,2.677442,52.787738,100.941987,138.631898,65.820779,0.129218,0.139996,0.730785,0.479298,0.211909,...,0.167796,0.070263,0.761941,0.841157,0.158843,0.150716,0.024442,0.799635,0.016373,0.008834
std,3.062319,31.285347,1.586213,29.759678,44.605111,0.335451,0.346994,0.443565,0.499586,0.408673,...,0.373696,0.255598,0.425908,0.365541,0.365541,0.357782,0.154421,0.400285,0.12691,0.093578
min,0.0,5.0,99.0,88.0,18.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.666667,29.0,100.0,113.0,32.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
50%,2.0,51.0,101.0,138.0,47.0,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
75%,4.0,73.0,102.0,164.0,123.0,0.0,0.0,1.0,1.0,0.0,...,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
max,20.0,200.0,105.0,190.0,150.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [99]:
# Check the balance of the target variable
y.value_counts()

Negative    14853
Positive     2126
Name: is_4dx_tested, dtype: int64

## Scaling the data and Splitting our data into Training and Testing sets

In [100]:
# Splitting data into testing and training sets

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
print(Counter(y_train))
print(Counter(y_test))

Counter({'Negative': 11177, 'Positive': 1557})
Counter({'Negative': 3676, 'Positive': 569})


In [101]:
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Random Oversampling Model

In [102]:
# Resampling the data with the RandomOverSampler
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(X_train, y_train)

Counter(y_resampled)

Counter({'Negative': 11177, 'Positive': 11177})

In [103]:
# Training the model with resampled data
model = LogisticRegression(solver='lbfgs', random_state=42)
model.fit(X_resampled, y_resampled)

LogisticRegression(random_state=42)

In [104]:
# Calculating an accuracy score
y_pred = model.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.9647538013160939

In [105]:
# Creating the confusion matrix
Confusion_matrix = confusion_matrix(y_test, y_pred)
cm_df = pd.DataFrame(Confusion_matrix, index=["Actual Negative", "Actual Positive"], columns=["Predicted Negative", "Predicted Positive"])
cm_df

Unnamed: 0,Predicted Negative,Predicted Positive
Actual Negative,3559,117
Actual Positive,22,547


In [106]:
# Displaying the classification report
report_df = pd.DataFrame(classification_report_imbalanced(y_test, y_pred, output_dict=True)).transpose()
report_df

Unnamed: 0,pre,rec,spe,f1,geo,iba,sup
Negative,0.993856,0.968172,0.961336,0.980846,0.964748,0.931374,3676.0
Positive,0.823795,0.961336,0.968172,0.887267,0.964748,0.930102,569.0
avg_pre,0.971061,0.971061,0.971061,0.971061,0.971061,0.971061,0.971061
avg_rec,0.967256,0.967256,0.967256,0.967256,0.967256,0.967256,0.967256
avg_spe,0.962252,0.962252,0.962252,0.962252,0.962252,0.962252,0.962252
avg_f1,0.968303,0.968303,0.968303,0.968303,0.968303,0.968303,0.968303
avg_geo,0.964748,0.964748,0.964748,0.964748,0.964748,0.964748,0.964748
avg_iba,0.931204,0.931204,0.931204,0.931204,0.931204,0.931204,0.931204
total_support,4245.0,4245.0,4245.0,4245.0,4245.0,4245.0,4245.0


## SMOTE Oversampling Model

In [107]:
# Resampling data with SMOTE method
X_resampled, y_resampled = SMOTE(random_state=42, sampling_strategy='auto').fit_resample(
    X_train, y_train)

Counter(y_resampled)

Counter({'Negative': 11177, 'Positive': 11177})

In [108]:
# Training the logistic regression model using the SMOTE resampled data
model_SMOTE = LogisticRegression(solver='lbfgs', random_state=42)
model_SMOTE.fit(X_resampled, y_resampled)

LogisticRegression(random_state=42)

In [109]:
# Calculating the accuracy score
y_pred = model_SMOTE.predict(X_test)
balanced_accuracy_score(y_test, y_pred)

0.9497545949501922

In [110]:
# Creating the confusion matrix
cm_SMOTE = confusion_matrix(y_test, y_pred)
cm_df2 = pd.DataFrame(cm_SMOTE, index=["Actual Negative", "Actual Positive"], columns=["Predicted Negative", "Predicted Positive"])
cm_df2

Unnamed: 0,Predicted Negative,Predicted Positive
Actual Negative,3649,27
Actual Positive,53,516


In [111]:
# Displaying the classification table
report_df2 = pd.DataFrame(classification_report_imbalanced(y_test, y_pred, output_dict=True)).transpose()
report_df2

Unnamed: 0,pre,rec,spe,f1,geo,iba,sup
Negative,0.985683,0.992655,0.906854,0.989157,0.948785,0.907917,3676.0
Positive,0.950276,0.906854,0.992655,0.928058,0.948785,0.89247,569.0
avg_pre,0.980937,0.980937,0.980937,0.980937,0.980937,0.980937,0.980937
avg_rec,0.981154,0.981154,0.981154,0.981154,0.981154,0.981154,0.981154
avg_spe,0.918355,0.918355,0.918355,0.918355,0.918355,0.918355,0.918355
avg_f1,0.980967,0.980967,0.980967,0.980967,0.980967,0.980967,0.980967
avg_geo,0.948785,0.948785,0.948785,0.948785,0.948785,0.948785,0.948785
avg_iba,0.905847,0.905847,0.905847,0.905847,0.905847,0.905847,0.905847
total_support,4245.0,4245.0,4245.0,4245.0,4245.0,4245.0,4245.0


## Uploading the updated table to SQL

In [112]:
# Using label encoding to transform data in table that is better formatted for analysis in R
# printing the classes for each variable to show what level will be assigned - the idex in the list is the level assigned to that descriptor
og_final_animalData = final_animalData_df.copy()

le = LabelEncoder()
final_animalData_df['mm'] = le.fit_transform(final_animalData_df['mm'])
print(le.classes_)

final_animalData_df['crt'] = le.fit_transform(final_animalData_df['crt'])
print(le.classes_)

final_animalData_df['mentation'] = le.fit_transform(final_animalData_df['mentation'])
print(le.classes_)

final_animalData_df['diarrhea'] = le.fit_transform(final_animalData_df['diarrhea'])
print(le.classes_)

final_animalData_df['vomiting'] = le.fit_transform(final_animalData_df['vomiting'])
print(le.classes_)

final_animalData_df['inappetence'] = le.fit_transform(final_animalData_df['inappetence'])
print(le.classes_)

final_animalData_df['lethargic'] = le.fit_transform(final_animalData_df['lethargic'])
print(le.classes_)

final_animalData_df['muscle_pain'] = le.fit_transform(final_animalData_df['muscle_pain'])
print(le.classes_)

final_animalData_df['lameness'] = le.fit_transform(final_animalData_df['lameness'])
print(le.classes_)

final_animalData_df['reported_weight_loss'] = le.fit_transform(final_animalData_df['reported_weight_loss'])
print(le.classes_)

final_animalData_df['joint_swelling'] = le.fit_transform(final_animalData_df['joint_swelling'])
print(le.classes_)

final_animalData_df['skin_condition'] = le.fit_transform(final_animalData_df['skin_condition'])
print(le.classes_)

final_animalData_df['is_4dx_tested'] = le.fit_transform(final_animalData_df['is_4dx_tested'])
print(le.classes_)


# Adding code to write the cleaned dataframe to a new table in our PostgreSQL database
final_animalData_df.to_sql(name='ml_cleaned_animalData', con=engine)

final_animalData_df.head(20)

['Light Pink' 'Pale' 'Pink']
['1-2 sec' '<1 sec' '>2 sec' 'UTO']
['Anxious/Agitated' 'BAR' 'Dull/Depressed' 'Obtunded' 'QAR']
['Chronic' 'Mild' 'Moderate' 'None']
['Chronic' 'Mild' 'Moderate' 'None']
['Mild' 'Moderate' 'None' 'Severe']
['Mild' 'Moderate' 'None']
['Mild' 'Moderate' 'None']
['None' 'Present']
['None' 'Present']
['Mild' 'Moderate' 'None']
['Bruising' 'Irritation' 'Normal' 'Petechia' 'Petechiae']
['Negative' 'Positive']


Unnamed: 0,age,weight_lbs,temperature,heart_rate_bpm,resp_rate_bpm,mm,crt,mentation,vomiting,diarrhea,inappetence,lethargic,lameness,muscle_pain,joint_swelling,reported_weight_loss,skin_condition,is_4dx_tested
6,1.0,15,102,183,44,2,0,4,1,1,2,2,0,2,2,0,2,0
8,2.0,72,100,138,21,2,2,4,1,3,2,2,0,2,2,1,2,0
10,0.005479,63,102,160,50,2,0,4,3,3,0,2,0,2,2,0,2,0
11,0.005479,59,102,157,18,2,2,2,1,1,1,2,0,2,0,0,2,0
19,6.0,42,104,110,40,0,0,2,3,2,0,2,0,0,0,0,2,1
20,0.166667,90,100,161,131,2,2,0,3,3,2,2,0,2,2,0,2,0
24,4.0,19,102,88,150,2,1,2,3,3,1,2,0,2,2,0,0,0
25,2.0,71,102,147,39,2,1,1,1,3,0,0,0,2,2,0,2,0
32,0.333333,47,99,93,39,2,0,4,3,1,0,2,0,0,2,0,2,0
35,9.0,69,105,178,145,2,0,4,1,3,0,0,0,2,0,1,2,1
