In [182]:
import numpy as np
import pandas as pd
import catboost as cb
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import KMeansSMOTE
from sklearn.metrics import f1_score, classification_report
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from xgboost import XGBClassifier
import lightgbm as lgb
from sklearn.neural_network import MLPClassifier

In [183]:
# 读取数据集
df_train = pd.read_csv("train_10000.csv")
df_val = pd.read_csv("validate_1000.csv")

In [184]:
# 缺失值处理
df_train = df_train.fillna(df_train.mean())
df_val = df_val.fillna(df_val.mean())

In [185]:
# 切分数据集
X_train = np.array(df_train.drop(["label", "sample_id"], axis=1))
y_train = np.array(df_train["label"])

X_val = np.array(df_val.drop(["label", "sample_id"], axis=1))
y_val = np.array(df_val["label"])

In [186]:
# 标准化数据
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.fit_transform(X_val)

In [187]:
# 过采样数据
kmeans_smote = KMeansSMOTE()
X_train_resampled, y_train_resampled = kmeans_smote.fit_resample(X_train_scaled, y_train)

In [188]:
mlp_model = MLPClassifier()

In [189]:
mlp_model.fit(X_train_resampled, y_train_resampled)

In [190]:
y_val_pred = mlp_model.predict(X_val_scaled)
macro_f1 = f1_score(y_val, y_val_pred, average='macro')
print(macro_f1)
print(classification_report(y_val, y_val_pred))

0.7989386908030213
              precision    recall  f1-score   support

           0       0.59      0.91      0.71       176
           1       0.72      0.52      0.60       166
           2       0.69      0.66      0.67       171
           3       0.98      0.92      0.95       169
           4       0.99      0.94      0.96       156
           5       0.99      0.81      0.89       162

    accuracy                           0.79      1000
   macro avg       0.82      0.79      0.80      1000
weighted avg       0.82      0.79      0.80      1000



In [191]:
y_train_pred = mlp_model.predict(X_train_scaled)
macro_f1 = f1_score(y_train, y_train_pred, average='macro')
print(macro_f1)
print(classification_report(y_train, y_train_pred))

1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5144
           1       1.00      1.00      1.00      1062
           2       1.00      1.00      1.00      1613
           3       1.00      1.00      1.00       884
           4       1.00      1.00      1.00       554
           5       1.00      1.00      1.00       743

    accuracy                           1.00     10000
   macro avg       1.00      1.00      1.00     10000
weighted avg       1.00      1.00      1.00     10000

