# Anomaly Detection

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

# fix_yahoo_finance is used to fetch data 
import yfinance as yf
yf.pdr_override()

In [13]:
# input
symbol = 'TCS.NS'
start = '2007-01-01'
end = '2018-12-28'

# Read data 
dataset = yf.download(symbol,start,end)

# View Columns
dataset.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2007-01-02,307.350006,314.5,304.0,312.125,195.329224,1545060
2007-01-03,313.25,322.0,309.787994,320.037994,200.281219,4384972
2007-01-04,321.25,321.25,312.524994,314.763,196.980026,3970700
2007-01-05,316.25,317.149994,313.274994,315.75,197.597763,2718740
2007-01-08,314.75,314.75,303.75,310.950012,194.593872,4951892


In [14]:
# Create more data
dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)
dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)
dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)
dataset['Return'] = dataset['Adj Close'].pct_change()
dataset = dataset.dropna()
dataset.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Increase_Decrease,Buy_Sell_on_Open,Buy_Sell,Return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2007-01-03,313.25,322.0,309.787994,320.037994,200.281219,4384972,0,1,0,0.025352
2007-01-04,321.25,321.25,312.524994,314.763,196.980026,3970700,0,0,1,-0.016483
2007-01-05,316.25,317.149994,313.274994,315.75,197.597763,2718740,1,0,0,0.003136
2007-01-08,314.75,314.75,303.75,310.950012,194.593872,4951892,0,0,1,-0.015202
2007-01-09,311.975006,315.462006,306.312012,314.100006,196.565216,4020568,0,0,0,0.010131


In [15]:
dataset.shape

(2948, 10)

In [16]:
X = dataset[['Open', 'High', 'Low', 'Volume']].values
y = dataset['Buy_Sell'].values

In [17]:
from sklearn import metrics
from sklearn.model_selection import train_test_split

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [19]:
from sklearn.ensemble import IsolationForest

model = IsolationForest()

In [20]:
model.fit(X_test)

IsolationForest(bootstrap=False, contamination=0.1, max_features=1.0,
        max_samples='auto', n_estimators=100, n_jobs=1, random_state=None,
        verbose=0)

In [21]:
y_pred = model.predict(X_test)

In [22]:
y_pred

array([ 1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,
        1,  1,  1, -1,  1, -1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1, -1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1, -1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1, -1,  1, -1,  1,  1,  1,
        1,  1, -1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1, -1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1, -1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,  1,  1,  1,  1,
        1, -1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,  1,  1,
       -1,  1,  1,  1,  1

In [23]:
print('Anomaly Detection Score:') 
sklearn_score_anomalies = model.decision_function(X_test)
score = [-1*s + 0.5 for s in sklearn_score_anomalies]
print(score)

Anomaly Detection Score:
[0.4679160812006987, 0.5701248838319002, 0.47906664069218496, 0.4514877564016711, 0.4503077088488223, 0.45672692455698577, 0.5242171021345607, 0.45888693094016353, 0.4533294012718132, 0.44927881330987074, 0.4857028954748148, 0.576310074525217, 0.4372678211841018, 0.4197848298849878, 0.4631524933151848, 0.4430200919479935, 0.44791367490782635, 0.45178921525269206, 0.4605162443659075, 0.5096288617961426, 0.4113478248680753, 0.45566944307152857, 0.5019071622931114, 0.4160736880454377, 0.5113678825420678, 0.418177567423355, 0.4257173474024783, 0.561872401824704, 0.4195865132592607, 0.4537879723442949, 0.48864906991543466, 0.44485548085841403, 0.45679392046671535, 0.44482689933361674, 0.4253306033586121, 0.44083965915268114, 0.5298610579793895, 0.43146089192352505, 0.4404913715878247, 0.47585195291510196, 0.44092360809867365, 0.43329102735693675, 0.4287651075911705, 0.4118793836400282, 0.576482252655612, 0.4822627591114662, 0.4201307470334096, 0.42384348232583147, 0