 ## Basic Setup

In [None]:
# Initial Imports
from pathlib import Path
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
# Load cleaned data
data = Path('../Resources/Displacement_Risk_Clean.csv')
df = pd.read_csv(data)
df.head(10)

## Preprocess the Dataset

In [None]:
# Encode target variable labels
le = LabelEncoder()
df_encoded = df.copy()
df_encoded["VULNERABLE Pop"] = le.fit_transform(df_encoded["VULNERABLE Pop"])
df_encoded.head(10)

In [None]:
# Binary encoding using Pandas
df_encoded = pd.get_dummies(df_encoded, columns=["GENTRIFICATION Typology", "DEMOGRAPHIC Change", "HOUSING_MARKET", "DISPLACEMENT RISK", "Quadrant_2019"])
df_encoded.head(10)

In [None]:
# Segment the features from the target variable
y = df_encoded["VULNERABLE Pop"]
X = df_encoded.drop(columns="VULNERABLE Pop")

In [None]:
# Use the train_test_split function to create training and testing subsets
X_train, X_test, y_train, y_test = train_test_split(X, 
                                                    y, 
                                                    random_state=1, 
                                                    stratify=y)
# Check shape of training subset
X_train.shape

 ## Perform Simple Logistic Regression

In [None]:
# Initilize logistic regression model
classifier = LogisticRegression(solver='lbfgs',
                                max_iter=200,
                                random_state=1)

In [None]:
# Fit our model with the training data
classifier.fit(X_train, y_train)

In [None]:
# Make predictions using the test data
y_pred = classifier.predict(X_test)
results = pd.DataFrame({"Prediction": y_pred, "Actual": y_test}).reset_index(drop=True)
results.head(10)

## Assess Performance of Logist Regression Model

In [None]:
# Deermine accuracy score of the model
print(accuracy_score(y_test, y_pred))

In [None]:
# Generate Confusion Matrix
matrix = confusion_matrix(y_test, y_pred)
print(matrix)

In [None]:
# Generate Classification Report
report = classification_report(y_test, y_pred)
print(report)