<a href="https://colab.research.google.com/github/UtwoA/Introduction_to_ML/blob/main/ml3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [34]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report


In [35]:
column_names = ['name', 'hobby', 'age', 'educational_level', 'marital_status', 'class']

df = pd.read_csv('hayes-roth.data', header=None, names=column_names)
print("Первые 5 строк датасета:")
print(df.head())

Первые 5 строк датасета:
   name  hobby  age  educational_level  marital_status  class
0    92      2    1                  1               2      1
1    10      2    1                  3               2      2
2    83      3    1                  4               1      3
3    61      2    4                  2               2      3
4   107      1    1                  3               4      3


In [36]:
feature_cols = ['hobby', 'age', 'educational_level', 'marital_status']
X = df[feature_cols]
y = df['class']

In [37]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

In [38]:
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # стандартизация признаков
    ('classifier', SVC())          # SVM классификатор
])

In [39]:
param_grid = [
    {'classifier__kernel': ['linear'], 'classifier__C': [0.1, 1, 10, 100]},
    {'classifier__kernel': ['rbf'], 'classifier__C': [0.1, 1, 10, 100], 'classifier__gamma': ['scale', 'auto', 0.1, 0.01]},
    {'classifier__kernel': ['poly'], 'classifier__degree': [2, 3, 4], 'classifier__C': [0.1, 1, 10], 'classifier__gamma': ['scale', 'auto']},
    {'classifier__kernel': ['sigmoid'], 'classifier__C': [0.1, 1, 10], 'classifier__gamma': ['scale', 'auto']}
]

grid_search = GridSearchCV(
    estimator=pipeline,
    param_grid=param_grid,
    cv=5,
    scoring='accuracy',
    n_jobs=-1
)

In [40]:
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

print("\nЛучшие параметры модели:", grid_search.best_params_)
print("Лучшая точность кросс-валидации:", grid_search.best_score_)


Лучшие параметры модели: {'classifier__C': 10, 'classifier__gamma': 0.1, 'classifier__kernel': 'rbf'}
Лучшая точность кросс-валидации: 0.8473684210526315


In [41]:
y_train_pred = best_model.predict(X_train)
y_test_pred = best_model.predict(X_test)

In [42]:
def print_metrics(y_true, y_pred, dataset_name):
    print(f"\nМетрики для {dataset_name}:")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision (macro):", precision_score(y_true, y_pred, average='macro'))
    print("Recall (macro):", recall_score(y_true, y_pred, average='macro'))
    print("F1-score (macro):", f1_score(y_true, y_pred, average='macro'))
    print("-" * 40)

In [43]:
print_metrics(y_train, y_train_pred, "обучающей выборки")
print_metrics(y_test, y_test_pred, "тестовой выборки")


Метрики для обучающей выборки:
Accuracy: 0.9130434782608695
Precision (macro): 0.9284946236559138
Recall (macro): 0.9243386243386243
F1-score (macro): 0.924508240297714
----------------------------------------

Метрики для тестовой выборки:
Accuracy: 0.825
Precision (macro): 0.85
Recall (macro): 0.85
F1-score (macro): 0.8494623655913979
----------------------------------------


In [44]:
print("\nClassification report (тестовая выборка):")
print(classification_report(y_test, y_test_pred))


Classification report (тестовая выборка):
              precision    recall  f1-score   support

           1       0.80      0.75      0.77        16
           2       0.75      0.80      0.77        15
           3       1.00      1.00      1.00         9

    accuracy                           0.82        40
   macro avg       0.85      0.85      0.85        40
weighted avg       0.83      0.82      0.82        40

