In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier  # 替换GBM为RF
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_excel('hechuan ube pre 3.xlsx')  # 确保文件路径正确

# Split the data into features (X) and target variable (Y)
X = data.drop(['ID', 'los', 'elos'], axis=1)
Y = data['elos']

# Convert Y to binary classification (if necessary)
Y = (Y > Y.median()).astype(int)

# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize models
rf_model = RandomForestClassifier(random_state=42, n_estimators=100)  # 替换GBM为RF
rf_model.fit(X_train_scaled, Y_train)

lasso_lr_model = LogisticRegression(penalty='l1', solver='liblinear', C=0.1, random_state=42)
lasso_lr_model.fit(X_train_scaled, Y_train)

xgb_model = XGBClassifier(random_state=42, n_estimators=50, max_depth=3)
xgb_model.fit(X_train_scaled, Y_train)

# Calculate feature importances
# RF (替换原来的GBM)
feature_importance_rf = pd.DataFrame({
    'Feature': X.columns,
    'Importance': rf_model.feature_importances_
}).sort_values(by='Importance', ascending=False).head(5)

# Lasso Logistic Regression
feature_importance_lasso_lr = pd.DataFrame({
    'Feature': X.columns,
    'Coefficient': np.abs(lasso_lr_model.coef_[0])  # 注意：这里需要取第一行系数
}).sort_values(by='Coefficient', ascending=False).head(5)

# XGB
feature_importance_xgb = pd.DataFrame({
    'Feature': X.columns,
    'Importance': xgb_model.feature_importances_
}).sort_values(by='Importance', ascending=False).head(5)

# Plot feature importances
plt.figure(figsize=(10, 6))

plt.subplot(1, 3, 1)
plt.bar(feature_importance_rf['Feature'], feature_importance_rf['Importance'])
plt.xlabel('Feature')
plt.ylabel('Importance')
plt.title('RF Feature Importance')  # 修改标题
plt.xticks(rotation=90)

plt.subplot(1, 3, 2)
plt.bar(feature_importance_lasso_lr['Feature'], feature_importance_lasso_lr['Coefficient'])
plt.xlabel('Feature')
plt.ylabel('Coefficient')
plt.title('Lasso LR Coefficient')
plt.xticks(rotation=90)

plt.subplot(1, 3, 3)
plt.bar(feature_importance_xgb['Feature'], feature_importance_xgb['Importance'])
plt.xlabel('Feature')
plt.ylabel('Importance')
plt.title('XGB Feature Importance')
plt.xticks(rotation=90)

plt.tight_layout()
plt.show()

print(feature_importance_rf)  # 修改为RF
print(feature_importance_lasso_lr)
print(feature_importance_xgb)