In [20]:
import pandas as pd # pandas -- data reading and manipulation 
import xgboost as xgb

In [21]:
# dataframe - python pandas object that holds data and gives you access to methods to manipulate that data 
df = pd.read_csv('student-mat.csv', sep=';')

In [22]:
# manipulating G3 (end of year performance) to separate numerical performance into 
# average, average, below average students. So this line is creating a new column in our dataframe that stores
# what type of student this person is 
df['G3 perf'] = df['G3'].map(lambda x: 0 if x > 15 else 1 if x > 10 else 2)

In [23]:
feature_cols = ['sex', 'age', 'address', 'famsize', 'Pstatus', 'Medu', 'Fedu','Mjob',
                'Fjob', 'reason', 'guardian', 'traveltime', 'studytime', 'failures', 'schoolsup',
               'famsup', 'paid', 'activities', 'nursery', 'higher', 'internet', 'romantic', 'famrel',
               'freetime', 'goout', 'Dalc', 'Walc', 'health', 'absences']
numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']

# df = df.select_dtypes(include=numerics)

train = df.sample(frac=0.8)
test = df.drop(train.index)

train_features = train[feature_cols].select_dtypes(include=numerics).as_matrix()
train_labels = train['G3 perf'].as_matrix()

test_features = test[feature_cols].select_dtypes(include=numerics).as_matrix()
test_labels = test['G3 perf'].as_matrix()

In [24]:
dtrain = xgb.DMatrix(train_features, label=train_labels)
dtest = xgb.DMatrix(test_features, label=test_labels)

In [28]:
params = {'bst:max_depth':4, 'bst:eta':.01, 'objective':'multi:softmax', 'num_class': 3}
params['eval_metric'] = 'merror'
evallist  = [(dtrain, 'train'), (dtest, 'eval')]

num_round = 40
bst = xgb.train(params, dtrain, num_round, evallist)

[0]	train-merror:0.306962	eval-merror:0.531646
[1]	train-merror:0.272152	eval-merror:0.481013
[2]	train-merror:0.218354	eval-merror:0.468354
[3]	train-merror:0.151899	eval-merror:0.455696
[4]	train-merror:0.123418	eval-merror:0.468354
[5]	train-merror:0.129747	eval-merror:0.481013
[6]	train-merror:0.120253	eval-merror:0.443038
[7]	train-merror:0.10443	eval-merror:0.443038
[8]	train-merror:0.101266	eval-merror:0.443038
[9]	train-merror:0.098101	eval-merror:0.468354
[10]	train-merror:0.091772	eval-merror:0.481013
[11]	train-merror:0.091772	eval-merror:0.481013
[12]	train-merror:0.060127	eval-merror:0.481013
[13]	train-merror:0.056962	eval-merror:0.493671
[14]	train-merror:0.060127	eval-merror:0.493671
[15]	train-merror:0.050633	eval-merror:0.493671
[16]	train-merror:0.047468	eval-merror:0.481013
[17]	train-merror:0.050633	eval-merror:0.506329
[18]	train-merror:0.041139	eval-merror:0.481013
[19]	train-merror:0.041139	eval-merror:0.468354
[20]	train-merror:0.041139	eval-merror:0.468354
[21