In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Загрузка данных
data = pd.read_csv('gym_membership.csv')

# Кодирование категориальных данных
label_encoders = {}
for column in data.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    data[column] = label_encoders[column].fit_transform(data[column])

# Разделение признаков и целевой переменной
X = data.drop('personal_training', axis=1)
y = data['personal_training']

# Масштабирование данных
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Разделение на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Создание и обучение модели Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Кросс-валидация
cv_scores = cross_val_score(rf_model, X_train, y_train, cv=5)
#
# Вывод результатов кросс-валидации
print(f'Cross-Validation Scores: {cv_scores}')
print(f'Average Cross-Validation Score: {cv_scores.mean():.2f}')

# Обучение и оценка модели на тестовой выборке
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f'Random Forest Accuracy: {accuracy_rf:.2f}')


Cross-Validation Scores: [1. 1. 1. 1. 1.]
Average Cross-Validation Score: 1.00
Random Forest Accuracy: 1.00
