<a href="https://colab.research.google.com/github/UtwoA/Introduction_to_ML/blob/main/ml3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

In [17]:
file_path = 'iris-1.txt'
df = pd.read_csv(file_path, sep=',', encoding='windows-1251')

In [20]:
df.rename(columns={
    'Длина чашелистика': 'sepal length',
    'Ширина чашелистика': 'sepal width',
    'Длина лепестка': 'petal length',
    'Ширина лепестка': 'petal width',
    'Класс': 'species'
}, inplace=True)


In [21]:
df.rename(columns={'Iris plant': 'species'}, inplace=True)

numeric_cols = ['sepal length','sepal width','petal length','petal width']
for col in numeric_cols:
    df[col] = df[col].astype(str).str.replace(',', '.').astype(float)

In [22]:
X = df[numeric_cols]
y = df['species']

In [23]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

In [24]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('classifier', SVC())
])

In [25]:
param_grid = [
    {'classifier__kernel': ['linear'], 'classifier__C': [0.1, 1, 10, 100]},
    {'classifier__kernel': ['rbf'], 'classifier__C': [0.1, 1, 10, 100], 'classifier__gamma': ['scale', 'auto', 0.1, 0.01]},
    {'classifier__kernel': ['poly'], 'classifier__degree': [2, 3, 4], 'classifier__C': [0.1, 1, 10], 'classifier__gamma': ['scale', 'auto']},
    {'classifier__kernel': ['sigmoid'], 'classifier__C': [0.1, 1, 10], 'classifier__gamma': ['scale', 'auto']}
]

In [26]:
grid_search = GridSearchCV(
    estimator=pipeline,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

In [27]:
grid_search.fit(X_train, y_train)

print("Лучшие параметры модели:", grid_search.best_params_)
print("Лучшая точность кросс-валидации:", grid_search.best_score_)

Лучшие параметры модели: {'classifier__C': 100, 'classifier__kernel': 'linear'}
Лучшая точность кросс-валидации: 0.980952380952381


In [28]:
best_model = grid_search.best_estimator_

In [29]:
y_train_pred = best_model.predict(X_train)
y_test_pred = best_model.predict(X_test)

In [30]:
def print_metrics(y_true, y_pred, dataset_name):
    print(f"Метрики для {dataset_name}:")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision (macro):", precision_score(y_true, y_pred, average='macro'))
    print("Recall (macro):", recall_score(y_true, y_pred, average='macro'))
    print("F1-score (macro):", f1_score(y_true, y_pred, average='macro'))
    print("-" * 40)

In [31]:
print_metrics(y_train, y_train_pred, "обучающей выборки")
print_metrics(y_test, y_test_pred, "тестовой выборки")

Метрики для обучающей выборки:
Accuracy: 1.0
Precision (macro): 1.0
Recall (macro): 1.0
F1-score (macro): 1.0
----------------------------------------
Метрики для тестовой выборки:
Accuracy: 0.9333333333333333
Precision (macro): 0.9444444444444445
Recall (macro): 0.9333333333333332
F1-score (macro): 0.9326599326599326
----------------------------------------


In [32]:
print("Classification report (тестовая выборка):")
print(classification_report(y_test, y_test_pred))

Classification report (тестовая выборка):
                 precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        15
Iris-versicolor       0.83      1.00      0.91        15
 Iris-virginica       1.00      0.80      0.89        15

       accuracy                           0.93        45
      macro avg       0.94      0.93      0.93        45
   weighted avg       0.94      0.93      0.93        45



In [33]:
svc_step = best_model.named_steps['classifier']
print("Число опорных векторов по классам:", svc_step.n_support_)

Число опорных векторов по классам: [2 3 4]
