In [1]:
import sys
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegressionCV
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier


from sklearn.utils.testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning

ignore_warnings(category=ConvergenceWarning)



In [2]:
mod_path = '/Users/ishareef7/Springboard/Capstone2'
sys.path.append(mod_path)
from src.features import build_dataset

In [3]:
dataset = build_dataset.get_dataset_echonest()
X_train, X_test, y_train, y_test = build_dataset.process_dataset(dataset)

In [4]:
weights =  dict(pd.Series(y_train).value_counts())

## Multinomial Logistic Regression Model

In [5]:
lgr_multi = LogisticRegressionCV(cv = 5, class_weight = weights, multi_class = 'multinomial',
                          random_state = 7, max_iter = 1000)
_ = lgr_multi.fit(X_train, y_train)



In [6]:
print("Multinomial Logistic Regression Test Set Accuracy =", lgr_multi.score(X_test,y_test))

Multinomial Logistic Regression Test Set Accuracy = 0.5475638051044084


## One vs. All Logistic Regression Model

In [7]:
lgr_ovr = LogisticRegressionCV(cv = 5, class_weight = weights, multi_class = 'ovr',
                          random_state = 7, max_iter = 1000)
_ = lgr_ovr.fit(X_train, y_train)









In [8]:
print("One v Rest Logistic Regression Test Set Accuracy =", lgr_ovr.score(X_test,y_test))

One v Rest Logistic Regression Test Set Accuracy = 0.5491105955143079


## One vs. Rest Support Vector Classifier

In [None]:
param_grid = {'kernel': ['linear', 'poly', 'rbf'],
              'gamma':['scale'],
              'class_weight':[weights],
              'degree':[2,3,4,5]
             }

svc = SVC(random_state = 7)
svc_cv = GridSearchCV(svc, cv =5, param_grid = param_grid)
_ = svc_cv.fit(X_train,y_train)

In [None]:
print("Support Vector Classifier Test Set Accuracy =", svc_cv.score(X_test,y_test))

## K-Nearest Neighbors Classifier

In [None]:
param_grid = {'n_neighbors': [5, 10, 15, 20]}

kn = KNeighborsClassifier(random_state = 7)
kn_cv = GridSearchCV(svc, cv =5, param_grid = param_grid)
_ = kn_cv.fit(X_train,y_train)

In [None]:
print("K Nearest Neighbors Test Set Accuracy =", kn_cv.score(X_test,y_test))

## Gradient Boosting Classifier

In [None]:
parameters = {"max_features":["log2","sqrt"],
              "n_estimators":[250,500,750,1000,1250]
             }
gbm = GradientBoostingClassifier(random_state = 7)
gbm_cv = GridSearchCV(, parameters, cv=5)
_ = gbm_cv.fit(X_train,y_train)

In [None]:
print("Gradient Boosting Test Set Accuracy =", gbm_cv.score(X_test,y_test))