In [None]:
# Import dependencies
import pandas as pd
from path import Path

from sklearn import tree
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

from sqlalchemy import create_engine
import psycopg2
from config import db_password

In [None]:
# Import input dataset to dataframe
engine = create_engine(db_string)
db_string = f"postgres://postgres:{db_password}@127.0.0.1:5432/XXXXXXX"

animals_df = pd.read_sql("select * from \"XXXXXX\"", con=engine)
animals_df.head(10)

In [None]:
# Create session (link) from Python to the database
session = Session(engine)

In [None]:
# Generate a categorical variable list
animals_cat = animals_df.dtypes[loans_df.dtypes == "object"].index.tolist()

# Check the number of unique values in each column
animals_df[animals_cat].nunique()

In [None]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(animals_df[animals_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(animals_cat)
encode_df.head()

In [None]:
# Merge one-hot encoded features and drop the originals
animals_df = animals_df.merge(encode_df,left_index=True, right_index=True)
animals_df = animals_df.drop(animals_cat,1)
animals_df.head()

In [None]:
# Define features set
X = animals_df.copy()
X = X.drop("animal_id_outcome", axis=1)
X.head()

In [None]:
# Define target vector
y = animals_df["animal_id_outcome"].values.reshape(-1, 1)
y[:5]

In [None]:
# Split into Train and Test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

# Print array
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
# Create a StandardScaler instance
scaler = StandardScaler()
# Fit the Standard Scaler with the training data
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Create the decision tree classifier instance
model = tree.DecisionTreeClassifier()

In [None]:
# Fit the model
model = model.fit(X_train_scaled, y_train)

In [None]:
# Make predictions from test data
predictions = model.predict(X_test_scaled)
predictions

In [None]:
# Calculate confusion matrix
cm = confusion_matrix(y_test, predictions)

# Create a DataFrame from the confusion matrix
animals_cm_df = pd.DataFrame(
    cm, index=["Actual 0", "Actual 1"], columns=["Predicted 0", "Predicted 1"]
)
animals_cm_df

In [None]:
# Calculate the accuracy score
acc_score = accuracy_score(y_test, predictions)

In [None]:
# Display results
print("Confusion Matrix")
display(animals_cm_df)
print(f"Accuracy Score : {acc_score}")
print("Classification Report")
print(classification_report(y_test, predictions))