Searching for parameters that we can use for our `AdaBoostClassifier` using the technique of
`GridSearchCV`.
We'll improvise on the following parameters:

   `algorithm`, default: 'SAMME.R',

   `learning_rate`, default: 1.0,

   `n_estimators`, default: 50,


In [None]:
param_dict = {
    "algorithm": 'SAMME.R',
    "learning_rate": 1.0,
    "n_estimators": 50
}

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import MaxAbsScaler
import pandas as pd
from sklearn.ensemble import AdaBoostClassifier
import numpy as np
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from pathlib import Path

np.printoptions(precision=3)

In [None]:
base = Path().resolve().parents[1] / r'data/subsets/gender_final_small'
locations = [str(base) + str(i) + '.csv' for i in range(1, 10)]

# Have 9000 names sampled at random.
X = pd.concat([pd.read_csv(location) for location in locations], axis=0)
y = X["Gender"]
X.drop(labels="Gender", inplace=True, axis=1)

In [None]:
# vectorize and convert to floats
vectorizer = CountVectorizer(analyzer='char', ngram_range=(2,10),
                             decode_error='replace', binary=True)
X = vectorizer.fit_transform(X['Name'])

scaler = MaxAbsScaler()
X = scaler.fit_transform(X)

In [None]:
# Create an instance of ABC
abc_clf = AdaBoostClassifier(algorithm=param_dict["algorithm"],
                             learning_rate=param_dict["learning_rate"],
                             n_estimators=param_dict["n_estimators"])