In [85]:
import matplotlib 
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
import pandas as pd
import plotly.graph_objects as go
from sklearn.ensemble import IsolationForest

In [33]:
df = pd.read_csv('ShampooSale.csv')

In [34]:
df.dropna(inplace=True)

In [35]:
df['datetime_col'] =  pd.to_datetime(df['Datetime'])

In [36]:
df.head()

Unnamed: 0,Datetime,Sales,datetime_col
0,2018-01-01,2660,2018-01-01
1,2018-02-01,1459,2018-02-01
2,2018-03-01,1831,2018-03-01
3,2018-04-01,1193,2018-04-01
4,2018-05-01,1803,2018-05-01


In [87]:
fig0 = go.Figure()
fig0.add_trace(go.Scatter(x=df.datetime_col
                         ,y=df.Sales
                         ,mode='lines+markers'
                         ,name='Sales'))
fig0.show()

## Fit Model to identify Outliers

In [58]:
model = IsolationForest(contamination=0.05)
model.fit(df[['Sales']])

IsolationForest(contamination=0.05)

In [59]:
model.predict(df[['Sales']])

array([ 1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,  1,
        1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1, -1,  1,
        1,  1])

In [88]:
model.score_samples(df[['Sales']])

array([-0.4359473 , -0.48344067, -0.41342791, -0.5410684 , -0.43212101,
       -0.46524918, -0.4670131 , -0.45365542, -0.41588917, -0.53387337,
       -0.45643702, -0.4126814 , -0.42792225, -0.4787083 , -0.75937848,
       -0.44889173, -0.41811719, -0.43828816, -0.44594893, -0.45767906,
       -0.4407094 , -0.4687558 , -0.44057957, -0.46119348, -0.44560071,
       -0.45560644, -0.46738459, -0.446988  , -0.48254017, -0.4531014 ,
       -0.57236008, -0.46762021, -0.68499858, -0.55720481, -0.5761015 ,
       -0.64263994])

In [89]:
df['outliers'] = pd.Series(model.predict(df[['Sales']])).apply(lambda x: 'yes' if(x==-1) else 'no')
df['score'] = pd.Series(model.score_samples(df[['Sales']]))

In [92]:
df[df.outliers =='yes']

Unnamed: 0,Datetime,Sales,datetime_col,outliers,score
14,2019-03-01,50,2019-03-01,yes,-0.759378
32,2020-09-01,6820,2020-09-01,yes,-0.684999


In [93]:
fig1 = go.Figure(data=[go.Histogram(x=df.score)])
fig1.show()

## Create Plot with Outliers

In [86]:
fig2 = go.Figure()
fig2.add_trace(go.Scatter(x=df.datetime_col
                         ,y=df.Sales
                         ,mode='lines+markers'
                         ,name='Sales'))
fig2.add_trace(go.Scatter(x=df.datetime_col[df.outliers=='yes']
                         ,y=df.Sales[df.outliers=='yes']
                         ,mode='markers'
                         ,name='Outlier'))
fig2.show()