In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('../DATA/iris.csv')
df.head(3)

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df['species'].value_counts()

In [None]:
sns.countplot(x='species', data=df)

In [None]:
sns.scatterplot(x='petal_length', y='petal_width', data=df, hue='species')

In [None]:
sns.pairplot(df, hue='species')

In [None]:
sns.heatmap(df.drop('species', axis=1).corr(), annot=True)

In [None]:
X = df.drop('species', axis=1)
y = df['species']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=101)

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
log_model = LogisticRegression(solver='saga', max_iter=5000)

In [None]:
# Настраиваем param_grid
penalty = ['l1', 'l2', 'elasticnet']
l1_ratio = np.linspace(0, 1, 20)
C = np.logspace(0, 10, 20)
param_grid = [
    {'estimator__penalty': ['l1'], 'estimator__C': C},  # Для l1
    {'estimator__penalty': ['l2'], 'estimator__C': C},  # Для l2
    {'estimator__penalty': ['elasticnet'], 'estimator__C': C, 'estimator__l1_ratio': l1_ratio}  # Для elasticnet
]

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.multiclass import OneVsRestClassifier

In [None]:
# Оборачиваем в OneVsRestClassifier
model = GridSearchCV(estimator=OneVsRestClassifier(log_model), param_grid=param_grid, cv=5)

In [None]:
model.fit(X_train, y_train)

In [None]:
# Вывод лучших параметров
print("Лучшие параметры:", model.best_params_)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, ConfusionMatrixDisplay

In [None]:
y_predicted = model.predict(X_test)

In [None]:
y_predicted

In [None]:
cm = confusion_matrix(y_test, y_predicted)
cm

In [None]:
accuracy_score(y_test, y_predicted)

In [None]:
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['setosa', 'versicolor', 'virginica'])
disp.plot(cmap='Blues')
plt.title('Confusion Matrix')

plt.show()

In [None]:
print(classification_report(y_test, y_predicted))

In [None]:
from sklearn.metrics import roc_curve, auc

In [None]:
def plot_multiclass_roc(clf, X_test, y_test, n_classes, class_labels=None, figsize=(5, 5)):
    # Получение оценок
    y_score = clf.decision_function(X_test)

    # Проверка данных
    print("Unique labels in y_test:", np.unique(y_test))
    print("Shape of y_score:", y_score.shape)

    # Бинаризация меток
    y_test_dummies = pd.get_dummies(y_test, drop_first=False).values
    if y_test_dummies.shape[1] != n_classes:
        raise ValueError(f"Expected {n_classes} classes, but got {y_test_dummies.shape[1]}")

    # Пустые структуры
    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    # Расчёт ROC и AUC
    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test_dummies[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Построение графика
    fig, ax = plt.subplots(figsize=figsize)
    ax.plot([0, 1], [0, 1], 'k--', label='Random Classifier', linewidth=2)
    for i in range(n_classes):
        label = class_labels[i] if class_labels else i
        ax.plot(fpr[i], tpr[i], label=f'ROC curve (area = {roc_auc[i]:.2f}) for {label}')
    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('ROC Curves for Multiclass')
    ax.legend(loc='lower right')
    ax.grid(alpha=0.4)
    plt.show()

In [None]:
# Построение ROC
plot_multiclass_roc(model, X_test, y_test, n_classes=3, class_labels=[-1, 0, 1])