<a href="https://colab.research.google.com/github/argONNY/Data-processing-and-visualization-tools/blob/main/laba5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.preprocessing import Normalizer
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import make_scorer, precision_score

urls = [
    'https://archive.ics.uci.edu/ml/machine-learning-databases/00198/Faults.NNA',
    'https://archive.ics.uci.edu/ml/machine-learning-databases/steel-plates-faults/Faults.NNA'
]

data = None
for url in urls:
    try:
        data = pd.read_csv(url, delimiter='\t', header=None)
        break
    except:
        continue

if data is None:
    try:
        data = pd.read_csv('/content/Faults.NNA', delimiter='\t', header=None)
    except:
        raise FileNotFoundError("Файл не найден")

X = data.iloc[:, [14, 15, 16]]
y = data.iloc[:, 27]

X.columns = ['Edges_Index', 'Empty_Index', 'Square_Index']

print(f"Распределение классов:")
print(y.value_counts())
print(f"\nДоля класса 1 (Pastry): {y.mean():.4f}")

imputer = SimpleImputer(strategy='median')
X_imputed = imputer.fit_transform(X)
X = pd.DataFrame(X_imputed, columns=X.columns)

normalizer = Normalizer(norm='max')
X_normalized = normalizer.fit_transform(X)
X = pd.DataFrame(X_normalized, columns=X.columns)

skf = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
model = LogisticRegression(max_iter=1000, random_state=42, class_weight='balanced')

if len(np.unique(y)) == 2:
    precision_scorer = make_scorer(precision_score, average='binary', zero_division=0)
else:
    precision_scorer = make_scorer(precision_score, average='weighted', zero_division=0)

scores = cross_val_score(model, X, y, cv=skf, scoring=precision_scorer)

for i, score in enumerate(scores, 1):
    print(f"Фолд {i}: {score:.4f}")

print(f"Средняя precision: {scores.mean():.4f}")
print(f"Стандартное отклонение: {scores.std():.4f}")
print(f"Min precision: {scores.min():.4f}")
print(f"Max precision: {scores.max():.4f}")

Распределение классов:
27
0    1783
1     158
Name: count, dtype: int64

Доля класса 1 (Pastry): 0.0814
Фолд 1: 0.1000
Фолд 2: 0.1875
Фолд 3: 0.1385
Фолд 4: 0.1111
Фолд 5: 0.1481
Фолд 6: 0.1379
Фолд 7: 0.1923
Фолд 8: 0.1346
Фолд 9: 0.1014
Фолд 10: 0.1132
Средняя precision: 0.1365
Стандартное отклонение: 0.0310
Min precision: 0.1000
Max precision: 0.1923
