# <span style="font-size: 50%;">Level 6 Data Science - Software Engineering</span><br>Topic 8 - Linear Discriminant Analysis (lda)

First, let's install the necessary libraries. Their versions (included in `requirements.txt`) are the ones that we have tested, and we can use `pip install -q` to install them without showing the output.

In [None]:
%pip install -q -r ../requirements.txt

In [None]:
# Ignore warnings - they are mostly about deprecation of certain features
import warnings
warnings.filterwarnings("ignore")

# Ignore matplotlib font manager logging (which is not relevant for this notebook)
import logging
logging.getLogger("matplotlib.font_manager").setLevel(logging.ERROR)

In [None]:
from pandas import read_csv

data = read_csv('https://raw.githubusercontent.com/BPP-Digital-Advanced-Data-Analytics/public_datasets/main/WA_Fn-UseC_-HR-Employee-Attrition.csv')

data['Attrition'] = data['Attrition'].replace({'Yes': 1, 'No': 0})
data['Attrition'] = data['Attrition'].astype(int)

In [None]:
from pycaret.classification import setup
from sklearn import set_config

set_config(enable_metadata_routing=True)

s = setup(
    data,  # our dataframe
    target="Attrition",  # the feature that we want to predict
    ignore_features=[  # features we want to exclude because they are not useful
        "EmployeeCount",
        "EmployeeNumber",
        "Over18",
        "StandardHours",
    ],
    session_id=123,
)

In [None]:
from pycaret.classification import create_model, plot_model, predict_model

thresh = 0.5 # CHANGE THIS above and below 0.5 but ensure it is greater than 0 and less than 1
lda = create_model('lda',
                   probability_threshold = thresh,
                   fold=5)
plot_model(lda, plot = 'confusion_matrix')
holdout_pred = predict_model(lda)

In [None]:
plot_model(lda, plot = 'pr')

In [None]:
plot_model(lda, plot = 'feature')

In [None]:
from pycaret.classification import check_fairness

check_fairness(lda, sensitive_features = ['Gender'])