In [197]:
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd

In [198]:
dataset = pd.read_csv("xgb_n_tmp.csv", nrows = 20000)

In [199]:
dataset.head(4)
dataset = dataset.sample(frac=1).reset_index(drop=True)
X = dataset["gene"]
Y = dataset["label"]


In [200]:
seed = 100
test_size = 0.20
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)

In [201]:
model = xgb.XGBClassifier(n_estimators=1000, max_depth=8, learning_rate=0.05, subsample=0.3)
model.fit(X_train[:,np.newaxis], y_train)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
       colsample_bytree=1, gamma=0, learning_rate=0.05, max_delta_step=0,
       max_depth=8, min_child_weight=1, missing=None, n_estimators=1000,
       n_jobs=1, nthread=None, objective='binary:logistic', random_state=0,
       reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
       silent=True, subsample=0.3)

In [202]:
y_pred = model.predict(X_test[:,np.newaxis])
predictions = [round(value) for value in y_pred]

In [203]:
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 48.83%


In [205]:
from sklearn.svm import SVC  

import matplotlib.pyplot as plt
from sklearn import svm, datasets
%matplotlib inline

svclassifier = SVC(C=2.0, cache_size=900, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.01, verbose=False).fit(X_train[:,np.newaxis], y_train)  
y_pred = svclassifier.predict(X_test[:,np.newaxis]) 
print(accuracy_score(y_test, y_pred))

0.49


In [None]:
from sklearn import svm
from sklearn.model_selection import GridSearchCV
def svc_param_selection(X, y, nfolds, X_test, y_test):
    Cs = [0.001, 0.01, 0.1, 1, 10]
    gammas = [0.001, 0.01, 0.1, 1]
    param_grid = {'C': Cs, 'gamma' : gammas}
    grid_search = GridSearchCV(svm.SVC(kernel='rbf'), param_grid, cv=nfolds)
    model = grid_search.fit(X, y)
    y_pred = svclassifier.predict(X_test) 
    print(accuracy_score(y_test, y_pred))
    grid_search.best_params_
    return grid_search.best_params_
svc_param_selection(X_train[:,np.newaxis], y_train, 100, X_test[:,np.newaxis], y_test)