<a href="https://colab.research.google.com/github/darshan-hindocha/lab/blob/main/boosting_and_custom_kernel_SVM_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Image Dataset can be found [here](https://drive.google.com/file/d/146Rmq_3LJfyEsPNCvLhw8rCLP_Oxavje/view?usp=sharing)

Movie Review Dataset can be found [here](https://drive.google.com/file/d/1XECfdzosU6p_koY39jnvMAO6GdHnAfCt/view?usp=sharing)

In [None]:

from zipfile import ZipFile
file_name = 'image_dataset.zip'

with ZipFile(file_name, 'r') as zip:
  zip.extractall()
  print('Done')

Done


#### Image feature extraction code

In [None]:
import numpy as np
import cv2
import glob
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import preprocessing
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold
import nltk
nltk.download('wordnet')
nltk.download('stopwords')
from nltk.corpus import stopwords
stopwordlist = stopwords.words('english')
import string
from nltk.stem import PorterStemmer

porter = PorterStemmer()
    
def obtain_dataset(folder_name):
    labels_count = 0
    labels = {}
    # assuming 128x128 size images and HoGDescriptor length of 34020
    hog_feature_len=34020
    hog = cv2.HOGDescriptor()
    #code for obtaining hog feature for one image file name
    # im = cv2.imread(image_filename)
    # h = hog.compute(image)
    # use this to read all images in the three directories and obtain the set of features X and train labels Y
    # you can assume there are three different classes in the image dataset
    X=[]
    y=[]
    for image_filename in glob.iglob(folder_name+r'/*/*.png', recursive=True):
        im = cv2.imread(image_filename)
        h = hog.compute(im)
        X.append(h)
        
        label = image_filename.split('/')[1]
        if label not in labels:
            labels[label] = labels_count
            labels_count+=1
        y.append(labels[label])

    X = np.array(X).reshape(150,34020)
    y = np.array(y).reshape(-1)

    return (X,y) 

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


#### Boosting classifier class

In [None]:
class BoostingClassifier:
  # You need to implement this classifier. 
  def __init__(self, feature_reduction = True,scaling=False, samplesize=22,num_classifiers=14,max_tree_depth=10,max_features='sqrt'):
    #implement initialisation
    self.samplesize = samplesize
    self.num_weak_classifiers = num_classifiers
    self.tree_depth = max_tree_depth
    self.n_features = 'sqrt'
    self.epsilon = 0.001
    self.feature_reduction = feature_reduction
    self.scale = scaling

  def choose_samples(self,N):
    sample_indices = np.random.choice(N,size = self.samplesize, replace = True, p = self.weights)
    X_bag = self.Xtrain[sample_indices,:]
    y_bag = self.ytrain[sample_indices]
    return X_bag, y_bag

  def fit_and_run_model(self,X_bag, y_bag,i):
    self.classifiers[i] = DecisionTreeClassifier(max_depth = self.tree_depth,max_features=self.n_features,min_samples_split=5)
    self.classifiers[i].fit(X_bag,y_bag)

    y_pred = self.classifiers[i].predict(self.Xtrain)
    misclassified = (y_pred!=self.ytrain)
    total_error = np.sum(y_pred!=self.ytrain)/(y_pred.shape[0])

    self.model_alphas[i] = np.log(float("{:.4f}".format((1-total_error)/(total_error))))
    return misclassified

  
  def reweight(self, misclassified, clf_performance):
    self.weights[misclassified] = self.weights[misclassified]*np.exp(clf_performance)
    
    correctly_classified = np.invert(misclassified)
    self.weights[correctly_classified] = self.weights[correctly_classified]*np.exp(-clf_performance)

    #normalise
    self.weights = self.weights/sum(self.weights)
    pass
  
  def fit(self, X,y):
    #implement training of the boosting classifier
    #initialise - Training data, weak classifiers, performances, weights
    
    N = X.shape[0]
    self.Xtrain = np.array(X)
    self.ytrain = np.array(y).reshape(-1)

    self.classifiers = np.empty((self.num_weak_classifiers),dtype=object)
    self.model_alphas = np.empty((self.num_weak_classifiers))
    
    self.weights = (1/N)*np.ones(N) #equal weights

    if self.scale:
      #Standardise
      self.scaler = preprocessing.StandardScaler().fit(self.Xtrain)
      self.Xtrain = self.scaler.transform(self.Xtrain)
    if self.feature_reduction:
      #PCA
      self.pca = PCA()
      self.Xtrain = self.pca.fit_transform(self.Xtrain,self.ytrain)


    for i in range(self.num_weak_classifiers):
      #choose samples for a weak classifier
      X_bag,y_bag = self.choose_samples(N)
      
      #run the model, get the performance alpha and the misclassified vector
      misclassified = self.fit_and_run_model(X_bag,y_bag,i)
      
      #reweight the weights for the next round
      self.reweight(misclassified,self.model_alphas[i])
    pass


  def cross_validation(self,X,y,feature_reduction,Scaled,TreeDepth,SampleSize,WeakClassifiers):

    self.samplesize = SampleSize
    self.num_weak_classifiers = WeakClassifiers
    self.tree_depth = TreeDepth
    self.feature_reduction = feature_reduction
    self.scale = Scaled
    
    # prepare cross validation
    kfold = KFold(10,True,3)
    acc = []
    # enumerate splits
    for train, validate in kfold.split(X):
      self.fit(X[train],y[train])
      pred = self.predict(X[validate])
      acc.append(accuracy_score(y[validate], pred))  
    return np.mean(acc)
    


  def predict(self, X,Transform=True):
    # implement prediction of the boosting classifier
    if Transform:
      if self.scale: X = self.scaler.transform(X)
      if self.feature_reduction: X = self.pca.transform(X)
    
    y_pred = np.empty((X.shape[0],self.num_weak_classifiers))
    
    for i,clf in enumerate(self.classifiers):
      y_pred[:,i] = clf.predict(X)


    ret = np.empty((X.shape[0],3))
    ret[:,0] = np.sum(np.multiply(self.model_alphas,y_pred==0),axis = 1)
    ret[:,1] = np.sum(np.multiply(self.model_alphas,y_pred==1),axis = 1)
    ret[:,2] = np.sum(np.multiply(self.model_alphas,y_pred==2),axis = 1)

    ret = np.argmax(ret,axis=1)

    return ret

#### Hyper-Parameter Tuning



```
from google.colab import files 
df = pd.DataFrame(columns=['PCA','Scaled','TreeDepth','SampleSize','No.WeakClassifiers','Accuracy'])
counter = 0
for sample_size in reversed(range(4,40,2)):
  for num_of_classifiers in reversed(range(2,25,3)):
    for tree_depth in reversed(range(2,20,4)):
      for feature_reduction in [1,0]:
        for scaled in [1,0]:
          try:
            bc = BoostingClassifier()
            acc = bc.cross_validation(Xtrain,ytrain,feature_reduction,scaled,tree_depth,sample_size,num_of_classifiers)
            new_row = {'PCA':feature_reduction,'Scaled':scaled,'TreeDepth':tree_depth,'SampleSize':sample_size,'No.WeakClassifiers':num_of_classifiers,'Accuracy':acc}
          except Exception as err:
            new_row = {'PCA':feature_reduction,'Scaled':scaled,'TreeDepth':tree_depth,'SampleSize':sample_size,'No.WeakClassifiers':num_of_classifiers,'Accuracy':err}
          
          df = df.append(new_row,ignore_index=True)
          counter+=1
          if counter%200 == 0:
            df.to_csv(f"backboostingtuning{counter}.csv")
            files.download(f"backboostingtuning{counter}.csv")
```



#### Obtain the train,test split

In [None]:
train_folder_name='image_dataset'
(X_train, Y_train) = obtain_dataset(train_folder_name)
Xtrain, Xtest, ytrain, ytest = train_test_split(X_train, Y_train, train_size=0.8, random_state=123)

#### Train the boosting classifer and evaluate it on the train-test split

In [None]:
bc = BoostingClassifier()
bc.fit(Xtrain, ytrain)
y_pred = bc.predict(Xtest)
print('accuracy', accuracy_score(ytest, y_pred))

accuracy 0.43333333333333335


### Test function that will be called to evaluate.

In [None]:
def test_func_boosting_image(image_dataset_train, image_dataset_test):
    (X_train, Y_train) = obtain_dataset(image_dataset_train)
    (X_test, Y_test) = obtain_dataset(image_dataset_test)
    bc = BoostingClassifier()
    bc.fit(X_train, Y_train)
    y_pred = bc.predict(X_test)
    print('accuracy', accuracy_score(Y_test, y_pred))
    return y_pred

# Task 2

Classify the above dataset using a Support Vector Machine (SVM).


### Kernels

In [None]:
sig = 3
def exp_kernel(X2,X1,sigma=sig):
  func = lambda a,b: np.exp(-np.linalg.norm(a-b)/(2*(sigma**0.5)))
  hXX = np.apply_along_axis(lambda x1 : np.apply_along_axis(lambda x2:  func(x1, x2), 1, X1), 1, X2)
  return hXX

def laplacian_kernel(X2,X1,sigma=sig):
  func = lambda a,b: np.exp(-np.linalg.norm(a-b)/sigma)
  hXX = np.apply_along_axis(lambda x1 : np.apply_along_axis(lambda x2:  func(x1, x2), 1, X1), 1, X2)
  return hXX

def rational_quadratic_kernel(X2,X1,sigma=sig):
  func = lambda a,b: 1 - (np.linalg.norm(a-b)**2)/((np.linalg.norm(a-b)**2)+sigma)
  hXX = np.apply_along_axis(lambda x1 : np.apply_along_axis(lambda x2:  func(x1, x2), 1, X1), 1, X2)
  return hXX

def log_kernel(X2,X1,sigma=sig):
  func = lambda a,b: - np.log((np.linalg.norm(a-b)**sigma)+1)
  hXX = np.apply_along_axis(lambda x1 : np.apply_along_axis(lambda x2:  func(x1, x2), 1, X1), 1, X2)
  return hXX

def spline_kernel(X2,X1):
  func = lambda a,b,c: np.prod(1 + a*b + a*b*c - (((a+b)/2)*(c**2)) + ((c**3)/3))
  hXX = np.apply_along_axis(lambda x1: np.apply_along_axis(lambda x2:  func(x1, x2,np.minimum(x1,x2)), 1, X1), 1, X2)
  return hXX

def cauchy_kernel(X2,X1,sigma=sig):
  func = lambda a,b: 1/(1+(np.linalg.norm(a-b)**2)/(sigma**2))
  hXX = np.apply_along_axis(lambda x1: np.apply_along_axis(lambda x2:  func(x1, x2), 1, X1), 1, X2)

  return hXX





```
from google.colab import files 
df = pd.DataFrame(columns=['PCA','Scaled','Kernel','Sigma','Accuracy'])
counter = 0
for kern in reversed(['linear', 'rbf','sigmoid','poly',exp_kernel,laplacian_kernel,rational_quadratic_kernel,log_kernel,spline_kernel,cauchy_kernel]):
  for feature_reduction in [1,0]:
    for scaled in [1,0]:
      for sig in np.random.uniform(0.1,10,size=5):
        if kern not in [spline_kernel] and not isinstance(kern,str): kern.__defaults__ = (_,_,sig)
        try:
          sv = SVMClassifier()
          acc = sv.cross_validation(Xtrain,ytrain,feature_reduction,scaled,'image',kern)
          if not isinstance(kern,str):
            new_row = {'PCA':feature_reduction,'Scaled':scaled,'Kernel':kern.__name__,'Sigma':sig,'Accuracy':acc}
          else: new_row = {'PCA':feature_reduction,'Scaled':scaled,'Kernel':kern,'Sigma':sig,'Accuracy':acc}
        except Exception as err:
          if not isinstance(kern,str):
            new_row = {'PCA':feature_reduction,'Scaled':scaled,'Kernel':kern.__name__,'Sigma':sig,'Accuracy':err}
          else: new_row = {'PCA':feature_reduction,'Scaled':scaled,'Kernel':kern,'Sigma':sig,'Accuracy':err}
        
        df = df.append(new_row,ignore_index=True)
        counter+=1
        if counter%20 == 0:
          df.to_csv(f"svmtuning{counter}.csv")
          files.download(f"svmtuning{counter}.csv")
        if kern in [spline_kernel] or isinstance(kern,str): break
```



In [None]:
from sklearn import svm

class SVMClassifier:
  def __init__(self,kern='linear',feature_reduction=True, Scaled=True):
    #implement initialisation
    self.kern = kern
    self.feature_reduction = feature_reduction
    self.scale = Scaled
    pass
  
  def fit_image(self,X,y,kern=log_kernel,feature_reduction=True, Scaled=False):

    self.kern = kern
    self.feature_reduction = feature_reduction
    self.scale = Scaled

    self.Xtrain = X
    self.ytrain = y
    
    if self.scale:
      #Standardise
      self.scaler = preprocessing.StandardScaler().fit(self.Xtrain)
      self.Xtrain = self.scaler.transform(self.Xtrain)
    if self.feature_reduction:
      #PCA
      self.pca = PCA()
      self.Xtrain = self.pca.fit_transform(self.Xtrain,self.ytrain)

    self.clf = svm.SVC(kernel=self.kern)
    self.clf.fit(self.Xtrain, self.ytrain)

    pass

  def fit_text(self, X,y,kern='rbf',feature_reduction=False, Scaled=False):
    self.kern = kern
    self.feature_reduction = feature_reduction
    self.scale = Scaled

    self.Xtrain = X
    self.ytrain = y
    
    if self.scale:
      #Standardise
      self.scaler = preprocessing.StandardScaler().fit(self.Xtrain)
      self.Xtrain = self.scaler.transform(self.Xtrain)
    if self.feature_reduction:
      #PCA
      self.pca = PCA()
      self.Xtrain = self.pca.fit_transform(self.Xtrain,self.ytrain)

    self.clf = svm.SVC(kernel=self.kern)
    self.clf.fit(self.Xtrain, self.ytrain)
    pass

  def predict_image(self, X, Transform=True):
    # prediction routine for the SVM
    if Transform:
      if self.scale: X = self.scaler.transform(X)
      if self.feature_reduction: X = self.pca.transform(X)
    y_pred = self.clf.predict(X)
    return y_pred

  def predict_text(self, X,Transform=True):
    # prediction routine for the SVM
    if Transform:
      if self.scale: X = self.scaler.transform(X)
      if self.feature_reduction: X = self.pca.transform(X)
    y_pred = self.clf.predict(X)
    return y_pred

  
  def cross_validation(self,X,y,feature_reduction,scaled,data,kern):

    self.kern = kern
    self.feature_reduction = feature_reduction
    self.scale = scaled
    
    # prepare cross validation
    kfold = KFold(10,True,3)
    acc = []
    # enumerate splits
    if data is 'image':
      for train, validate in kfold.split(X):
        self.fit_image(X[train],y[train])
        pred = self.predict_image(X[validate])
        acc.append(accuracy_score(y[validate], pred))
    elif data is 'text':
      for train, validate in kfold.split(X):
        self.fit_text(X[train],y[train])
        pred = self.predict_text(X[validate])
        acc.append(accuracy_score(y[validate], pred))

    return np.mean(acc)

### Hyperparameter tuning for SVM Image Classification

```
from google.colab import files 
df = pd.DataFrame(columns=['PCA','Scaled','Kernel','Sigma','Accuracy'])
counter = 0
for kern in reversed(['linear', 'rbf','sigmoid','poly',exp_kernel,laplacian_kernel,rational_quadratic_kernel,log_kernel,spline_kernel,cauchy_kernel]):
  for feature_reduction in [1,0]:
    for scaled in [1,0]:
      for sig in [0.6]:
        try:
          sv = SVMClassifier()
          acc = sv.cross_validation(Xtrain,ytrain,feature_reduction,scaled,'image',kern)
          if not isinstance(kern,str):
            new_row = {'PCA':feature_reduction,'Scaled':scaled,'Kernel':kern.__name__,'Sigma':sig,'Accuracy':acc}
          else: new_row = {'PCA':feature_reduction,'Scaled':scaled,'Kernel':kern,'Sigma':sig,'Accuracy':acc}
        except Exception as err:
          if not isinstance(kern,str):
            new_row = {'PCA':feature_reduction,'Scaled':scaled,'Kernel':kern.__name__,'Sigma':sig,'Accuracy':err}
          else: new_row = {'PCA':feature_reduction,'Scaled':scaled,'Kernel':kern,'Sigma':sig,'Accuracy':err}
 
        df = df.append(new_row,ignore_index=True)
        counter+=1
        if not isinstance(kern,str):
          print('Kernel:',kern.__name__)
        else:
          print('Kernel:',kern)
        if counter%4 == 0:
          df.to_csv(f"svmimagetuning.csv")
          
```



#### Train the SVM classifer and evaluate it on the train-test split

In [None]:
sc = SVMClassifier()
sc.fit_image(Xtrain, ytrain,kern=log_kernel,feature_reduction = False, Scaled=False)
y_pred = sc.predict_image(Xtest)
print('accuracy', accuracy_score(ytest, y_pred))

accuracy 0.43333333333333335


### Test function

In [None]:
def test_func_svm_image(image_dataset_train, image_dataset_test):
    (X_train, Y_train) = obtain_dataset(image_dataset_train)
    (X_test, Y_test) = obtain_dataset(image_dataset_test)
    sc = SVMClassifier()
    sc.fit_image(X_train, Y_train)
    y_pred = sc.predict_image(X_test)
    print('accuracy', accuracy_score(Y_test, y_pred))
    return y_pred

# Task 3

In this task, you need to obtain sentiment analysis for the provided dataset. The dataset consists of movie reviews with the sentiments being provided. The sentiments are either positive or negative. You need to train a boosting based classifier to obtain train and cross-validate on the dataset provided. The method will be evaluated against an external test set.

#### Process the text and obtain a bag of words-based features 

In [None]:
from nltk.corpus import stopwords
import re
stopwordlist = stopwords.words('english')

stemmed_stopwords = [re.sub(r"[^A-Za-z0-9\-]", " ", porter.stem(word)).split() for word in stopwordlist]

for i,words in enumerate(stemmed_stopwords):
  if isinstance(words,list): stemmed_stopwords[i]= words[0]

def label_y(row):
  if row['sentiment'] == 'negative': return 0
  if row['sentiment'] == 'positive': return 1


def stemming_tokeniser(review):
  from nltk.stem import PorterStemmer
  porter = PorterStemmer()
  import re

  words = re.sub(r"[^A-Za-z0-9\-]", " ", review).lower().split()
  words = [porter.stem(word) for word in words]
  return words


def extract_bag_of_words(filename, n_features=1000,stpwrds=stemmed_stopwords):
  from sklearn.feature_extraction.text import TfidfVectorizer
  df = pd.read_csv(filename)

  df['y'] = df.apply(lambda row: label_y(row),axis=1)

  xtrainlist = df['review'].tolist()

  tfidf_vectorizer = TfidfVectorizer(tokenizer= stemming_tokeniser,max_df=0.95, min_df=3,max_features=n_features,stop_words=stpwrds)

  X = tfidf_vectorizer.fit_transform(xtrainlist).toarray()
  y = np.array(df['y'])

  return (X,y)


In [None]:
from sklearn.model_selection import train_test_split
train_file_name='movie_review_train.csv'
(X_train, Y_train) = extract_bag_of_words(train_file_name)
Xtrain, Xtest, ytrain, ytest = train_test_split(X_train, Y_train, train_size=0.8, random_state=123)

  'stop_words.' % sorted(inconsistent))


### Hyperparameter tuning Boosting for text classification 

```
from google.colab import files 
df = pd.DataFrame(columns=['TreeDepth','SampleSize','No.WeakClassifiers','Accuracy'])
counter = 0
for n_classifiers in range(5,40,4):
  for samples in range(50,1200,100):
    for trees in range(1,45,5):
      try:
        bc = BoostingClassifier()
        acc = bc.cross_validation(Xtrain, ytrain,feature_reduction=False,Scaled=False,WeakClassifiers=n_classifiers,SampleSize=samples,TreeDepth=trees)
        new_row = {'TreeDepth':trees,'SampleSize':samples,'No.WeakClassifiers':n_classifiers,'Accuracy':acc}
      except Exception as err:
        new_row = {'TreeDepth':trees,'SampleSize':samples,'No.WeakClassifiers':n_classifiers,'Accuracy':err}
      
      df = df.append(new_row,ignore_index=True)
      counter+=1
      if counter%20 == 0:
        df.to_csv(f"textboosting{counter}.csv")
        files.download(f"textboosting{counter}.csv")
```



In [None]:
bc = BoostingClassifier(feature_reduction=False,scaling=False,num_classifiers=40,samplesize=1000,max_tree_depth=40)
bc.fit(Xtrain, ytrain)
y_pred = bc.predict(Xtest)

print('accuracy', accuracy_score(ytest, y_pred))

accuracy 0.773


### Test function 

In [None]:
def test_func_boosting_text(text_dataset_train, text_dataset_test):
    (X_train, Y_train) = extract_bag_of_words(text_dataset_train)
    (X_test, Y_test) = extract_bag_of_words(text_dataset_test)
    
    bc = BoostingClassifier(feature_reduction = False,scaling=False, samplesize=950,num_classifiers=37,max_tree_depth=41)
    bc.fit(X_train, Y_train)
    y_pred = bc.predict(X_test)    
    print('accuracy', accuracy_score(Y_test, y_pred))
    return y_pred

# Task 4

Classify the above movie review dataset using a Support Vector Machine (SVM).

In [None]:
sc = SVMClassifier()
acc = sc.cross_validation(Xtrain,ytrain,feature_reduction=True,scaled=False,data='text',kern=log_kernel)
print(acc)

In [None]:
# your code for svm based training of the dataset
sc = SVMClassifier()
sc.fit_text(Xtrain, ytrain,kern=log_kernel)
y_pred = sc.predict_text(Xtest)
print('accuracy', accuracy_score(ytest, y_pred))

accuracy 0.853


In [None]:
sc = SVMClassifier()
sc.fit_text(Xtrain, ytrain,kern='linear',feature_reduction=False)
y_pred = sc.predict_text(Xtest)
print('Linear Kernel Accuracy', accuracy_score(ytest, y_pred))

accuracy 0.856


In [None]:
sc = SVMClassifier()
sc.fit_text(Xtrain, ytrain,kern='rbf',feature_reduction=False)
y_pred = sc.predict_text(Xtest)
print('RBF Kernel Accuracy', accuracy_score(ytest, y_pred))

RBF Kernel Accuracy 0.857


In [None]:
sc = SVMClassifier()
sc.fit_text(Xtrain, ytrain,kern='poly',feature_reduction=False)
y_pred = sc.predict_text(Xtest)
print('Poly Kernel Accuracy', accuracy_score(ytest, y_pred))

Poly Kernel Accuracy 0.831


In [None]:
sc = SVMClassifier()
sc.fit_text(Xtrain, ytrain,kern='sigmoid',feature_reduction=False)
y_pred = sc.predict_text(Xtest)
print('Sigmoid Kernel Accuracy', accuracy_score(ytest, y_pred))

Sigmoid Kernel Accuracy 0.855


In [None]:
sc = SVMClassifier()
sc.fit_text(Xtrain, ytrain,kern=laplacian_kernel,feature_reduction=False)
y_pred = sc.predict_text(Xtest)
print('Laplacian Kernel Accuracy', accuracy_score(ytest, y_pred))

In [None]:
sc = SVMClassifier()
sc.fit_text(Xtrain, ytrain,kern=spline_kernel,feature_reduction=False)
y_pred = sc.predict_text(Xtest)
print('Spline Kernel Accuracy', accuracy_score(ytest, y_pred))

In [None]:
sc = SVMClassifier()
sc.fit_text(Xtrain, ytrain,kern=cauchy_kernel,feature_reduction=False)
y_pred = sc.predict_text(Xtest)
print('Cauchy Kernel Accuracy', accuracy_score(ytest, y_pred))

### Test function 

In [None]:
def test_func_svm_text(text_dataset_train, text_dataset_test):
    (X_train, Y_train) = extract_bag_of_words(text_dataset_train)
    (X_test, Y_test) = extract_bag_of_words(text_dataset_test)
    sc = SVMClassifier()
    sc.fit_text(X_train, Y_train)
    y_pred = sc.predict_text(X_test)
    print('accuracy', accuracy_score(Y_test, y_pred))
    return y_pred