# Classification vs Regression ML Models

In [None]:
???

Classification Models | ![](src/classification.png)
-|-
Regression Models | ![](src/regression.png)

## Data loading

In [1]:
import pandas as pd

df = pd.read_excel(
    '../../data/data_stock_apple.xlsx',
    parse_dates=['Date'], index_col=0
    )
df

Unnamed: 0_level_0,Open,High,Low,Close,Volume,change_tomorrow,change_tomorrow_direction
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2007-01-09,3.087500,3.320714,3.041071,3.306071,3349298400,4.567032,UP
2007-01-10,3.383929,3.492857,3.337500,3.464286,2952880000,-1.252610,DOWN
...,...,...,...,...,...,...,...
2023-06-28,187.929993,189.899994,187.600006,189.250000,51216800,0.179332,UP
2023-06-29,189.080002,190.070007,188.940002,189.589996,46347300,2.258084,UP


## Feature selection

Create two target variables:

1. `target_categorical`
2. `target_numerical`

In [2]:
target_categorical = df.change_tomorrow_direction
target_numerical = df.change_tomorrow

And select the explanatory variables:

In [3]:
explanatory = df[['Open','High','Low','Close','Volume']]

## Machine Learning System

K Nearest Neighbors

### ML classification model

#### Fit the mathematical equation

In [4]:
from sklearn.neighbors import KNeighborsClassifier

model_kn_c = KNeighborsClassifier()
model_kn_c.fit(X=explanatory, y=target_categorical)

#### Calculate predictions

In [5]:
model_kn_c.predict(X=explanatory)

array(['DOWN', 'DOWN', 'DOWN', ..., 'UP', 'UP', 'DOWN'], dtype=object)

#### Compare predictions to reality

In [6]:
model_kn_c.score(X=explanatory, y=target_categorical)

0.6845912707981674

#### Compare predictions to reality in a `DataFrame`

In [7]:
df_pred_classification = target_categorical.to_frame()
df_pred_classification['prediction_classification'] = model_kn_c.predict(X=explanatory)
df_pred_classification

Unnamed: 0_level_0,change_tomorrow_direction,prediction_classification
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2007-01-09,UP,DOWN
2007-01-10,DOWN,DOWN
...,...,...
2023-06-28,UP,UP
2023-06-29,UP,DOWN


### ML regression model

#### Fit the mathematical equation

In [8]:
from sklearn.neighbors import KNeighborsRegressor

model_kn_r = KNeighborsRegressor()
model_kn_r.fit(X=explanatory, y=target_numerical)

#### Calculate predictions

In [9]:
model_kn_r.predict(X=explanatory)

array([-0.75379311, -0.75379311, -0.29096649, ...,  0.0259851 ,
       -0.52048583,  0.27665005])

#### Compare predictions to reality

In [10]:
model_kn_r.score(X=explanatory, y=target_numerical)

0.18789010182513333

#### Compare predictions to reality in a `DataFrame`

In [11]:
df_pred_regression = target_numerical.to_frame()
df_pred_regression['prediction_regression'] = model_kn_r.predict(X=explanatory)
df_pred_regression

Unnamed: 0_level_0,change_tomorrow,prediction_regression
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2007-01-09,4.567032,-0.753793
2007-01-10,-1.252610,-0.753793
...,...,...
2023-06-28,0.179332,-0.520486
2023-06-29,2.258084,0.276650


#### Join regression and classification `DataFrame`

In [12]:
pd.concat([df_pred_classification, df_pred_regression], axis=1)

Unnamed: 0_level_0,change_tomorrow_direction,prediction_classification,change_tomorrow,prediction_regression
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2007-01-09,UP,DOWN,4.567032,-0.753793
2007-01-10,DOWN,DOWN,-1.252610,-0.753793
...,...,...,...,...
2023-06-28,UP,UP,0.179332,-0.520486
2023-06-29,UP,DOWN,2.258084,0.276650


## Other algorithms

### Decision Tree

#### Regression 

In [13]:
from sklearn.tree import DecisionTreeRegressor
model_dt_r = DecisionTreeRegressor()

model_dt_r.fit(X=explanatory, y=target_numerical)
model_dt_r.predict(X=explanatory)
model_dt_r.score(X=explanatory, y=target_numerical)

1.0

#### Classification

In [14]:
from sklearn.tree import DecisionTreeClassifier
model_dt_c = DecisionTreeClassifier()

model_dt_c.fit(X=explanatory, y=target_categorical)
model_dt_c.predict(X=explanatory)
model_dt_c.score(X=explanatory, y=target_categorical)

1.0

### Random Forest

#### Regression 

In [15]:
from sklearn.ensemble import RandomForestRegressor
model_rf_r = RandomForestRegressor()

model_rf_r.fit(X=explanatory, y=target_numerical)
model_rf_r.predict(X=explanatory)
model_rf_r.score(X=explanatory, y=target_numerical)

0.8441657364976928

#### Classification

In [16]:
from sklearn.ensemble import RandomForestClassifier
model_rf_c = RandomForestClassifier()

model_rf_c.fit(X=explanatory, y=target_categorical)
model_rf_c.predict(X=explanatory)
model_rf_c.score(X=explanatory, y=target_categorical)

1.0

## Compare all models in a `DataFrame`

### Regression models

In [17]:
list_model_regression = [model_kn_r, model_dt_r, model_rf_r]
list_model_regression_names = map(lambda x: x.__class__.__name__, list_model_regression)
list_model_regression_predictions = list(map(lambda x: x.predict(X=explanatory), list_model_regression))
df_pred_regression = pd.DataFrame(
    list_model_regression_predictions,
    index=list_model_regression_names).T.set_index(df.index)

df_pred_regression.insert(0, 'target_numerical', target_numerical)
df_pred_regression

Unnamed: 0_level_0,target_numerical,KNeighborsRegressor,DecisionTreeRegressor,RandomForestRegressor
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2007-01-09,4.567032,-0.753793,4.567032,3.257692
2007-01-10,-1.252610,-0.753793,-1.252610,-0.499195
...,...,...,...,...
2023-06-28,0.179332,-0.520486,0.179332,0.372556
2023-06-29,2.258084,0.276650,2.258084,1.840134


### Classification models

In [18]:
list_model_classification = [model_kn_c, model_dt_c, model_rf_c]
list_model_classification_names = map(lambda x: x.__class__.__name__, list_model_classification)
list_model_classification_predictions = list(map(lambda x: x.predict(X=explanatory), list_model_classification))
df_pred_classification = pd.DataFrame(
    list_model_classification_predictions,
    index=list_model_classification_names).T.set_index(df.index)

df_pred_classification.insert(0, 'target_categorical', target_categorical)
df_pred_classification

Unnamed: 0_level_0,target_categorical,KNeighborsClassifier,DecisionTreeClassifier,RandomForestClassifier
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2007-01-09,UP,DOWN,UP,UP
2007-01-10,DOWN,DOWN,DOWN,DOWN
...,...,...,...,...
2023-06-28,UP,UP,UP,UP
2023-06-29,UP,DOWN,UP,UP
