## Display settings

In [1]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

<IPython.core.display.Javascript object>

In [2]:
from IPython.core.display import HTML
HTML("""
<style>
.output_png {
    display: table-cell;
    text-align: center;
    vertical-align: middle;
}
</style>
""")

## Prerequisites

In [3]:
import os
import glob
import warnings 
warnings.filterwarnings('ignore')
from copy import deepcopy
import time
import datetime
from collections import defaultdict
from typing import Tuple, List

# Data wrangling
from dfply import *
import numpy as np 
import pandas as pd 
import pickle
from scipy.signal import argrelextrema
import statistics as stat

# Data visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

# Price pattern detection
from price_detection_tools import (import_, 
                                   screener,
                                   add_label,
                                   get_index,
                                   get_target,
                                   get_features)

# Data preprocessing
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Modeling
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

# Metrics
from sklearn.metrics import (confusion_matrix,
                             classification_report,
                             accuracy_score,
                             precision_recall_curve)

# Tuning
from sklearn.model_selection import (cross_val_score,
                                     GridSearchCV,
                                     RepeatedKFold)

%matplotlib inline
sns.set()
plt.style.use('seaborn-whitegrid')

## Tools

In [4]:
def tune_model(X: pd.DataFrame, 
               y: pd.DataFrame,
               estimator, 
               scoring, 
               param_grid: dict,
               cv: int=None):
    cv = RepeatedKFold(n_splits=5, n_repeats=5, random_state=7) 
    model = GridSearchCV(estimator=estimator,
                         param_grid=param_grid,
                         scoring=scoring,
                         cv=cv,
                         refit=True,
                         verbose=3,
                         n_jobs=-1)
    model_result = model.fit(X,y)
    
    print()
    print(f"Best score: {round(model_result.best_score_ * 100, 2)}")
    print(f"Best params: {model_result.best_params_}")
    print()
    
    return model_result

## Data

#### Import detected patterns

In [5]:
df = import_('merged_patterns.csv')

In [6]:
features, target = (df >> get_features, df >> get_target)

### Data Split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(features, target,
                                                    test_size=0.20,
                                                    random_state=7,
                                                    shuffle=True)

#### Plot captured patterns

In [None]:
for i in features.values:
    plt.plot(i, marker='o')
    plt.show()

## Data Preprocessing

In [8]:
scaler = MinMaxScaler()
scaler = scaler.fit(features)
features = scaler.transform(features)

In [9]:
pickle.dump(scaler, open('scaler.pkl', 'wb'))

## Model training and tuning

In [10]:
scaler = pickle.load(open('scaler.pkl', 'rb'))

In [13]:
X_test[features] = scaler.transform(X_test)

ValueError: Array conditional must be same shape as self

### k-Nearest Neigbors

In [None]:
knn = KNeighborsClassifier()

In [None]:
knn_params = [{
    'n_neighbors': np.arange(1, 35),
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan'],
}]

In [None]:
tuned_knn = tune_model(features, target,
                         estimator=knn,
                         scoring='accuracy',
                         param_grid=knn_params)

### Support Vector Machines

In [None]:
svm = SVC(kernel='rbf')

In [None]:
svm_params = {
    'C': [0.1, 1, 100, 1000],
    'gamma': [0.0001, 0.001, 0.005, 0.1, 1, 3, 5]
}

In [None]:
tuned_svm = tune_model(features, target,
                       estimator=svm,
                       scoring='accuracy',
                       param_grid=svm_params)

## All Data

In [None]:
path = '/Users/d.e.magno/Datasets/stocks_copy/'
all_files = glob.glob(os.path.join(path, "*.csv"))

In [None]:
all_patterns = pd.DataFrame()
for filename in all_files:
    df = pd.read_csv(filename)
    patterns = (df >>
                 screener >>
                 add_label(df))
    all_patterns = pd.concat([all_patterns, patterns]).reset_index(drop=True)

In [None]:
!pip install pycaret

In [None]:
import pycaret