In [None]:
# ===== Pacotes / Packages =====
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import accuracy_score, confusion_matrix, ConfusionMatrixDisplay

In [None]:
# ===== 1. Ler os dados .XLSX / Read .XLSX data =====
dados = pd.read_excel("NAME.xlsx")
print(dados.head())

In [None]:
# === Ler os dados .CSV / Read .CSV data ===
dados = pd.read_csv('NAME.csv', sep=',')  # especifica o separador / specifies the separator
print(dados.head())

In [None]:
# A classe deve estar na primeira coluna / The class must be in the first column
classe = dados.iloc[:, 0].astype(str)  # forçar string/categórica / force string/categorical
X = dados.iloc[:, 1:]  # preditoras / predictors

# ===== 2. Garantir que preditoras sejam numéricas / Ensure predictors are numeric =====
X = X.apply(pd.to_numeric, errors='coerce')

# Remover colunas constantes / Remove constant columns
X = X.loc[:, X.nunique() > 1]

# Remover colunas com NA / Remove columns with NA
X = X.dropna(axis=1)

# Recombinar com a classe / Recombine with the class
dados = X.copy()
dados["Classe"] = classe

# ===== Divisão treino/teste / Training/testing division =====
X_train, X_test, y_train, y_test = train_test_split(
    X, classe, test_size=0.3, random_state=1234, stratify=classe
)

print("Train:", X_train.shape)
print("Test:", X_test.shape)

In [None]:
# ===== Rodar LDA / Run LDA =====
lda = LDA()
lda.fit(X_train, y_train)

# ===== Predição / Prediction =====
pred_train = lda.predict(X_train)
pred_test = lda.predict(X_test)

print("Training Accuracy:", accuracy_score(y_train, pred_train))
print("Test Accuracy:", accuracy_score(y_test, pred_test))

In [None]:
# ===== 6. Gráfico de discriminação / Discrimination Chart =====
X_lda = lda.transform(X)
num_ld = X_lda.shape[1]

if num_ld >= 2:
    lda_df = pd.DataFrame({
        "LD1": X_lda[:, 0],
        "LD2": X_lda[:, 1],
        "Classe": classe
    })

    plt.figure(figsize=(8,6))
    sns.scatterplot(data=lda_df, x="LD1", y="LD2", hue="Classe", alpha=0.7, s=60)
    plt.title("Discrimination via LDA (LD1 vs LD2)")
    plt.show()

else:
    lda_df = pd.DataFrame({
        "LD1": X_lda[:, 0],
        "Classe": classe
    })

    plt.figure(figsize=(8,6))
    sns.kdeplot(data=lda_df, x="LD1", hue="Classe", fill=True, alpha=0.5)
    plt.title("Discrimination via LDA (just LD1)")
    plt.show()

In [None]:
# ===== 7. Loadings =====
loadings = lda.scalings_
num_ld = loadings.shape[1]

if num_ld >= 2:
    loadings_df = pd.DataFrame(loadings[:, :2],
                               columns=["LD1", "LD2"],
                               index=X.columns).reset_index()
    loadings_df.rename(columns={"index": "Variavel"}, inplace=True)

    plt.figure(figsize=(8,6))
    for i, row in loadings_df.iterrows():
        plt.arrow(0, 0, row["LD1"], row["LD2"],
                  color="blue", alpha=0.7, head_width=0.02)
        plt.text(row["LD1"], row["LD2"], row["Variavel"],
                 color="red", ha="center", va="center")
    plt.axhline(0, color="gray", linestyle="--")
    plt.axvline(0, color="gray", linestyle="--")
    plt.title("Loadings das variáveis (LD1 vs LD2)")
    plt.xlabel("LD1")
    plt.ylabel("LD2")
    plt.grid(True, linestyle="--", alpha=0.6)
    plt.show()

else:
    loadings_df = pd.DataFrame({
        "LD1": loadings[:, 0],
        "Variavel": X.columns
    }).sort_values("LD1", ascending=False)

    plt.figure(figsize=(8,6))
    sns.barplot(data=loadings_df, y="Feature", x="LD1", color="steelblue")
    plt.title("Feature loadings (just LD1)")
    plt.show()


In [None]:
# ---- Matriz de confusão para o conjunto de teste / Confusion matrix for the test set ----
cm = confusion_matrix(y_test, pred_test, labels=np.unique(classe))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=np.unique(classe))
disp.plot(cmap="Blues", values_format="d")
plt.title("Confusion Matrix - Test")
plt.show()