In [1]:
import xgboost as xgb
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn.metrics import r2_score, auc, accuracy_score

In [2]:
df = pd.read_csv('nndb_flat_out.csv')
one_hot_encoder = OneHotEncoder()
foodgroup_encoded = one_hot_encoder.fit_transform(df[['foodgroup']]).toarray()

In [3]:
feature_columns = ['energy_kcal', 'protein_g', 'fat_g','carb_g','sugar_g','fiber_g','vita_mcg'] 
X = df[feature_columns]
y = foodgroup_encoded.argmax(axis=1)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.9, random_state=42)
#print(y_train)
#print(X_train.shape, X_test.shape)

In [5]:
# 转换为 DMatrix 格式
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test)

In [6]:
# 设置参数
params = {
    'objective': 'multi:softmax',  # 多分类问题
    'num_class': 25,  # 类别数量
    'max_depth': 4,  # 树的最大深度
    'eta': 0.1,  # 学习率
    'seed': 42
}


In [7]:
# 训练模型
num_round = 50  # 迭代次数
bst = xgb.train(params, dtrain, num_boost_round=num_round)

In [8]:
# 预测
preds = bst.predict(dtest)
#print(preds)

In [9]:
# 计算准确率
accuracy = accuracy_score(y_test, preds)
print(f"模型准确率: {accuracy:.2f}")


模型准确率: 0.57


In [10]:
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier

# 创建模型
model = XGBClassifier()

# 定义参数网格
param_grid = {
    'max_depth': [3, 4, 5],
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.1, 0.3, 0.5]
}

# 使用网格搜索
grid_search = GridSearchCV(model, param_grid, scoring='accuracy', cv=3)
grid_search.fit(X_train, y_train)

# 输出最佳参数
print("最佳参数组合：", grid_search.best_params_)


最佳参数组合： {'learning_rate': 0.1, 'max_depth': 4, 'n_estimators': 100}
