In [1]:
import numpy as np
from sklearn.svm import LinearSVC
from sklearn import svm
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost.sklearn import XGBClassifier

#Split array into multiple sub-arrays of equal size.
def get_splits(n, k):
    if n%k == 0:
      x=np.arange(n).reshape((k,n//k))
      np.random.shuffle(x.flat) #.flat will flatten an array.
      x=x.tolist()
    else:
      x=np.arange(n-n%k).reshape((k,(n-n%k)//k))
      np.random.shuffle(x.flat) # flat 
      x=x.tolist()
      for i,j in zip([m for m in range(n-n%k,n)],[l for l in range(0,n%k)]):
          x[j].append(i)

    return x

def my_cross_val(method, X, y, k):
    l1=get_splits(X.shape[0],k)
    sum_error=np.array([])
    
    if method == 'LinearSVC':
        clf = LinearSVC(max_iter=2000) 
    elif method == 'SVC':       
        clf = svm.SVC(gamma='scale', C=10) #C is a valuation of "how badly" you want to properly classify, or fit, everything.
    elif method == 'LogisticRegression':       
        clf = LogisticRegression(penalty='l2', solver='lbfgs', multi_class='multinomial')
    elif method == 'RandomForestClassifier':       
        clf = RandomForestClassifier(max_depth=20, random_state=0,n_estimators=500)
    elif method == 'XGBClassifier':       
        clf = XGBClassifier(max_depth=5)


    for i in range(0,k): # k-fold have k error rates, sum_error is their average
        X_train = np.empty((0, 64), int)
        y_train = np.empty((0,),int)
        for j in range(0,k): # making iterations cause X_train need to be extracted k-1 times and collected
            if j == i: continue 
            X_tem = X[l1[j],:]
            y_tem = y[l1[j]]
            X_train = np.concatenate((X_train,X_tem))
            y_train = np.concatenate((y_train,y_tem))
        X_test = X[l1[i],:]
        y_test = y[l1[i]]

        clf.fit(X_train,y_train)
        y_pred = clf.predict(X_test)         
        error = 1 - np.sum(y_pred == y_test)/(y_pred.shape[0])
        sum_error = np.append(sum_error,error)

    return sum_error

def my_train_test(method, X, y, pi, k):
    
    if method is 'LinearSVC':
        clf = LinearSVC(max_iter=2000) 
    elif method == 'SVC':       
        clf = svm.SVC(gamma='scale', C=10) #C is a valuation of "how badly" you want to properly classify, or fit, everything.
    elif method == 'LogisticRegression':       
        clf = LogisticRegression(penalty='l2', solver='lbfgs', multi_class='multinomial')
    elif method == 'RandomForestClassifier':       
        clf = RandomForestClassifier(max_depth=20, random_state=0,n_estimators=500)
    elif method == 'XGBClassifier':       
        clf = XGBClassifier(max_depth=5)

    xy = np.column_stack([X,y])
    sum_error=np.array([])
    for i in range(0,k):
        s1 = np.random.choice(range(xy.shape[0]),round(xy.shape[0]*0.75), replace=False)
        s2 = list(set(range(xy.shape[0]))-set(s1))
        X_train = X[s1,:]
        y_train = y[s1]
        X_test = X[s2,:]
        y_test = y[s2]

        clf.fit(X_train,y_train)
        y_pred = clf.predict(X_test)         
        error = 1 - np.sum(y_pred == y_test)/(y_pred.shape[0])
        sum_error = np.append(sum_error,error)

    return sum_error