In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
# 载入数据
X_test = pd.read_pickle('./input/X_test.pkl')
X_train = pd.read_pickle('./input/X_train.pkl')
y_train = pd.read_pickle('./input/y_train.pkl')

In [2]:
from sklearn.preprocessing import minmax_scale as mm
X_test = mm(X_test)
X_train= mm(X_train)

In [3]:
import warnings 
warnings.filterwarnings("ignore")
from sklearn.model_selection import GridSearchCV

# 一个训练优化模型的函数
def train_optimal_classifier(clf,param):
    grid_search = GridSearchCV(
        clf,
        param_grid=param,
        cv=5,
        verbose=0,
        scoring='f1_weighted',
        error_score=0,
        refit=True ,
        n_jobs=-1
        )
    grid_search.fit(X_train, y_train)
    print("Best parameters")
    print(grid_search.best_params_)
    print('best F1: ')
    print(grid_search.best_score_)
    return (grid_search.best_estimator_)

In [4]:
rf = RandomForestClassifier(n_jobs=-1)
param_dic = {  "n_estimators":[50,200,500,1000],
               "max_depth":[3,7],
               "max_features":[0.7,0.9]
             }
train_optimal_classifier(rf,param_dic)

Best parameters
{'max_features': 0.7, 'n_estimators': 50, 'max_depth': 3}
best F1: 
0.592272414475


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=3, max_features=0.7, max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=50, n_jobs=-1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)

In [5]:
rf = RandomForestClassifier(n_estimators=20, max_depth=3, max_features=0.7)
rf.fit(X_train, y_train)
pred = rf.predict(X_test)

In [6]:
data =  pd.read_csv("./input/studentID_test.txt", names=['学生id','助学金金额'], encoding='utf-8')

In [7]:
data['助学金金额'] = pd.DataFrame(pred)

In [8]:
data.to_csv('./Result/RandomForest_Result.csv')

In [12]:
gbdt_cf = xgb.XGBClassifier(n_jobs=-1)
param_dic = {
               "n_estimators":[50,200,500,1000],
               "max_depth":[3,7,9],
               "learning_rate":[0.05, 0.1, 0.2, 0.3],
               "colsample_bytree":[0.7,0.9]
             }
train_optimal_classifier(gbdt_cf,param_dic)

Best parameters
{'n_estimators': 200, 'learning_rate': 0.05, 'colsample_bytree': 0.7, 'max_depth': 3}
best F1: 
0.353738438358


XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=0.7, gamma=0, learning_rate=0.05, max_delta_step=0,
       max_depth=3, min_child_weight=1, missing=None, n_estimators=200,
       n_jobs=-1, nthread=-1, objective='multi:softprob', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=0, silent=True,
       subsample=1)

In [13]:
gbdt_cf = xgb.XGBClassifier(n_estimators=200, max_depth=3, learning_rate=0.05, colsample_bytree=0.7)
gbdt_cf.fit(X_train, y_train)
pred_gbdt = gbdt_cf.predict(X_test)

In [14]:
data['助学金金额'] = pd.DataFrame(pred_gbdt)
data.to_csv('./Result/GBDT_Result.csv')