In [13]:
import numpy as np
import pandas as pd
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [14]:
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV

In [15]:
# Define a pipeline to search for the best combination of PCA truncation
# and classifier regularization.
pca = PCA()
pipe = RandomForestClassifier()

In [16]:
df_pump_features = pd.read_csv("Features/df_pump_feature.csv")
df_pump_target = pd.read_csv("Features/df_pump_target.csv")

In [17]:
X = df_pump_features
y = df_pump_target.values.ravel()

In [18]:
X_train, X_test, y_train, y_test = train_test_split(
    X, 
    y, 
    test_size=0.2, 
    random_state=69
)

In [None]:
# Parameters of pipelines can be set using ‘__’ separated parameter names:
param_grid = param_grid = [
        {'bootstrap': [False, True],
         'criterion': ['gini', 'entropy'],
         'n_estimators': [80, 90, 100, 110, 130],
         'max_features': [2, 3, 4, 5, 6, 7, 8, 9, 10, 15]
        },
    ]
search = GridSearchCV(pipe, param_grid, cv=5,scoring='neg_mean_squared_error', return_train_score = True, refit=True)
search.fit(X_train, y_train)
print(search.cv_results_)
print("Best parameter (CV score=%0.3f):" % search.best_score_)
print(search.best_params_)
print(search.best_estimator_)

In [None]:
# Plot the PCA spectrum
pca.fit(X_train)

fig, (ax0, ax1) = plt.subplots(nrows=2, sharex=True, figsize=(6, 6))
ax0.plot(np.arange(1, pca.n_components_ + 1),
         pca.explained_variance_ratio_, '+', linewidth=2)
ax0.set_ylabel('PCA explained variance ratio')

ax0.axvline(search.best_estimator_.named_steps['pca'].n_components,
            linestyle=':', label='n_components chosen')
ax0.legend(prop=dict(size=12))

In [9]:
from sklearn.metrics import classification_report
y_pred = search.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.71      0.17      0.28       275
           1       0.91      0.99      0.95      2248

    accuracy                           0.90      2523
   macro avg       0.81      0.58      0.61      2523
weighted avg       0.89      0.90      0.87      2523

