In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import SMOTE
from sklearn.metrics import f1_score, classification_report

df_train = pd.read_csv('train.csv', index_col=0)
df_test = pd.read_csv('test.csv', index_col=0)

mapping = {'2':2, '3':3, '4':4,'больше': 5}
df_train['doors_count'] = df_train['doors_count'].map(mapping)
df_train['person_count'] = df_train['person_count'].map(mapping)
df_test['doors_count'] = df_test['doors_count'].map(mapping)
df_test['person_count'] = df_test['person_count'].map(mapping)

X = df_train.drop(["class"], axis = 1)
y = df_train["class"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
sm = SMOTE(random_state=42)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)
rf_clf.fit(X_train_res, y_train_res)

y_pred_rf = rf_clf.predict(X_test)
f1_rf = f1_score(y_test, y_pred_rf, average='macro')
print("F1 мера Random Forest:", f1_rf)
print(classification_report(y_test, y_pred_rf))

X_test_df = df_test.drop(["class"], axis=1)
df_test["class"] = rf_clf.predict(X_test_df)
df_test.sort_index().to_csv("submit.csv")


F1 мера Random Forest: 0.9284635955831608
              precision    recall  f1-score   support

           0       1.00      0.97      0.98       187
           1       0.92      0.97      0.94        70
           2       0.86      0.86      0.86         7
           3       0.87      1.00      0.93        13

    accuracy                           0.97       277
   macro avg       0.91      0.95      0.93       277
weighted avg       0.97      0.97      0.97       277

