### Using SVM - Support Vector Machine to predict Stock prices

In [32]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, accuracy_score

# matplotlib
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

# numpy
import numpy as np

# pandas
import pandas as pd
import pandas_flavor as pf
import os

In [33]:
# base dir
BASE_DIR = os.path.dirname(os.path.abspath('__file__'))
DATA_DIR = os.path.join(BASE_DIR, 'data')

### Functions to clean data

In [34]:
import datetime as dt
import time

@pf.register_dataframe_method
def convert_to_time_stamp(date: str) -> float:
    '''
        receive a string date and return a timestamp date
        params: date: str
        return: ts: float
    '''
    d = dt.datetime.strptime(date, '%Y-%m-%d')
    ts = time.mktime(d.timetuple())
    return ts


# test
# ts = convert_to_time_stamp('2022-02-07')
# print(ts, dt.datetime.fromtimestamp(ts))


  def convert_to_time_stamp(date: str) -> float:


### Importing Dataset


In [37]:
df = pd.read_csv(os.path.join(DATA_DIR, 'abev3.csv'))

# splitting data
# get all columns except the last one
# remove the column ticker because it contains text
# df['date_ts'] = df['date'].convert_to_time_stamp()
df.insert(2, 'date_ts', df['date'].apply(convert_to_time_stamp))
# df['date_ts'] = df['date'].apply(convert_to_time_stamp)
X = df.iloc[:, 2:-1]

# get just the last column
y = df.iloc[:, -1]
print(df.head())
print(X)
print(y)

  ticker        date       date_ts   open   high    low    volume  adj_close  \
0  ABEV3  2023-02-14  1.676344e+09  13.15  13.36  12.99  29701200  13.000000   
1  ABEV3  2020-07-13  1.594609e+09  14.91  14.95  13.98  36972600  12.410870   
2  ABEV3  2020-02-27  1.582772e+09  14.90  14.97  14.28  91108200  12.854115   
3  ABEV3  2021-04-01  1.617246e+09  15.39  15.40  14.99  15974100  13.714002   
4  ABEV3  2021-10-22  1.634872e+09  14.83  15.23  14.54  29599100  13.714002   

   close  
0   13.0  
1   14.0  
2   14.5  
3   15.0  
4   15.0  
          date_ts   open       high        low    volume  adj_close
0    1.676344e+09  13.15  13.360000  12.990000  29701200  13.000000
1    1.594609e+09  14.91  14.950000  13.980000  36972600  12.410870
2    1.582772e+09  14.90  14.970000  14.280000  91108200  12.854115
3    1.617246e+09  15.39  15.400000  14.990000  15974100  13.714002
4    1.634872e+09  14.83  15.230000  14.540000  29599100  13.714002
..            ...    ...        ...        ..

 ### Splitting the dataset into Training and Test set

In [38]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [39]:
print(X_train)

          date_ts   open       high        low    volume  adj_close
658  1.592795e+09  13.70  14.180000  13.680000  24735500  12.508382
566  1.623035e+09  19.32  19.820000  19.190001  27901200  17.919630
294  1.617851e+09  15.20  15.670000  15.140000  19194000  14.125422
77   1.603336e+09  13.41  13.570000  13.260000  21189900  12.029677
352  1.639019e+09  16.10  16.110001  15.690000  19725400  14.454558
..            ...    ...        ...        ...       ...        ...
707  1.644203e+09  14.43  14.510000  14.270000  21182100  13.574749
192  1.608520e+09  15.65  15.790000  15.310000  24879600  14.031122
629  1.583982e+09  12.48  12.680000  11.390000  36313400  10.265562
559  1.616036e+09  15.25  15.440000  15.160000  20329000  13.988282
684  1.599793e+09  12.37  12.380000  12.040000  25286500  10.673347

[558 rows x 6 columns]


In [40]:
print(y_train)

658    14.11
566    19.60
294    15.45
77     13.57
352    15.81
       ...  
707    14.29
192    15.42
629    11.58
559    15.30
684    12.04
Name: close, Length: 558, dtype: float64


### Feature Scaling

In [41]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [42]:
print(X_train)

[[-1.38523575 -0.75008963 -0.59594687 -0.62188262 -0.20259496 -0.8018873 ]
 [-0.25910283  2.79176312  2.99122454  2.82414518  0.02582363  2.66988472]
 [-0.45215419  0.19524475  0.3517278   0.29122092 -0.60243752  0.23558001]
 ...
 [-1.71342305 -1.51896176 -1.54998192 -2.05407921  0.63279928 -2.24084532]
 [-0.51972216  0.22675602  0.20544211  0.30372888 -0.52054249  0.14759357]
 [-1.12461641 -1.58828607 -1.74078905 -1.64756076 -0.16283799 -1.97921659]]


In [43]:
print(X_test)

[[-1.06348348 -1.53786828 -1.54998192 -1.44742868 -0.26881088 -1.69483759]
 [-0.41354392  0.4914497   0.43441091  0.53513175 -1.07118013  0.3352983 ]
 [ 1.50409957  0.78135166  0.65065835  0.74777239 -0.05420982  0.92447354]
 ...
 [-0.8382569   0.08180504  0.10367847  0.1849006  -0.25812484 -0.14781425]
 [ 0.85737752 -0.4034669  -0.52598452 -0.52807086 -0.21546005 -0.2822796 ]
 [-1.24366475 -0.81311156 -0.93303969 -1.01589312  1.44068834 -1.33083202]]


In [51]:
print(y_test.shape)

(186,)


### Training the SVM model on the training set

In [52]:
classifier = SVC(kernel='linear', random_state=0)
classifier.fit(X_train, y_train)

ValueError: Unknown label type: 'continuous'