## Discriminant Analysis

- a classification method

In [None]:
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

from metrics import classification_summary
from pandas_util import normalize_columns

In [None]:
mower_df = pd.read_csv("./datasets/dmba/RidingMowers.csv")
normalize_columns(mower_df)
mower_df.head()

In [None]:
da_reg = LinearDiscriminantAnalysis()
X = mower_df.drop(columns=["ownership"])
y = mower_df["ownership"]
da_reg.fit(X, y)
print("Coefficients:", da_reg.coef_)
print("Intercept:", da_reg.intercept_)

In [None]:
da_reg = LinearDiscriminantAnalysis()
da_reg.fit(X, y)
result_df = mower_df.copy()
result_df["decision_function"] = da_reg.decision_function(X)
result_df["prediction"] = da_reg.predict(X)
result_df["p(owner)"] = da_reg.predict_proba(X)[:, 1]
result_df.head()

In [None]:
result_df.plot.scatter(x="income", y="lot_size")

## Classifying more than two classes

In [None]:
accidents_df = pd.read_csv("./datasets/dmba/accidents.csv")
normalize_columns(accidents_df)
lda_reg = LinearDiscriminantAnalysis()
X = accidents_df.drop(columns=["max_sev"])
y = accidents_df["max_sev"]
lda_reg.fit(X, y)

In [None]:
print("Coefficients and intercepts")
fct = pd.DataFrame([lda_reg.intercept_], columns=lda_reg.classes_, index=["constant"])
fct = fct.append(
    pd.DataFrame(
        lda_reg.coef_.T, columns=lda_reg.classes_, index=list(accidents_df.columns)[:-1]
    ),
)
fct

In [None]:
classification_summary(y_true=y, y_pred=lda_reg.predict(X))

## Classification scores, membership probabilities and classifications for three-class injury training dataset

In [None]:
result = pd.concat(
    [
        pd.DataFrame({"classification": lda_reg.predict(X), "actual": y}),
        pd.DataFrame(
            lda_reg.decision_function(X),
            columns=["Score {}".format(cls) for cls in lda_reg.classes_],
        ),
        pd.DataFrame(
            lda_reg.predict_proba(X),
            columns=["Propensity {}".format(cls) for cls in lda_reg.classes_],
        ),
    ],
    axis=1,
)
pd.set_option("precision", 2)
pd.set_option("chop_threshold", 0.01)
print(result.head())