In [151]:
import pandas as pd
import numpy as np
from datetime import timedelta
import datetime
from dateutil.parser import parse
import yfinance as yf
from math import log, sqrt, pi, exp
from scipy.stats import norm
import seaborn as sns
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt

Assumptions for the model -

1. For time period if the options are traded on same day then time of 1 day is taken (max difference of 0.002)
2. If spot price is not present then we have taken average of close, open, high and low of stock price. (max change of 5%)
3. We have used constant risk rate of interest. (max change of 0.2% variation in a year)


In [8]:
df = pd.read_csv('Adani_Enterprises.csv')
end_date = df['Date'][df.shape[0]-1]
start_date = datetime.datetime.strptime(end_date, '%Y-%m-%d') - timedelta(365)
start_date = start_date.strftime('%Y-%m-%d')
index = df[df['Date']==start_date].index[0]
df1= df[index:]
ret = df1['Close'].pct_change()[1:]
daily_vol = ret.std()
vol = daily_vol*252**0.5

In [11]:
df2 = pd.read_csv('Adani_Options.csv')
df2.head()

Unnamed: 0,Symbol,Date,Expiry,Option Type,Strike Price,Open,High,Low,Close,LTP,Settle Price,No. of contracts,Turnover in Lacs,Premium Turnover in Lacs,Open Int,Change in OI,Underlying Value
0,ADANIENT,25-Jun-2020,25-Jun-2020,CE,107.5,0.0,0.0,0.0,35.85,0.0,161.65,0,0.0,0.0,0,0,161.65
1,ADANIENT,25-Jun-2020,25-Jun-2020,CE,105.0,0.0,0.0,0.0,49.75,0.0,161.65,0,0.0,0.0,0,0,161.65
2,ADANIENT,25-Jun-2020,25-Jun-2020,CE,112.5,0.0,0.0,0.0,31.15,0.0,161.65,0,0.0,0.0,0,0,161.65
3,ADANIENT,25-Jun-2020,25-Jun-2020,CE,115.0,0.0,0.0,0.0,44.2,0.0,161.65,0,0.0,0.0,0,0,161.65
4,ADANIENT,25-Jun-2020,25-Jun-2020,CE,117.5,0.0,0.0,0.0,26.7,0.0,161.65,0,0.0,0.0,0,0,161.65


In [12]:
time=[]

for i in range(df2.shape[0]):
    if (parse(df2['Expiry'][i]) - parse(df2['Date'][i])).days == 0:
        time.append(1/365)
    else:
        time.append(((parse(df2['Expiry'][i]) - parse(df2['Date'][i])).days)/365)
    
df2['Time'] = time

In [None]:
df = yf.download("ADANIENT.NS", start="2020-01-01", end="2021-05-20",group_by="ticker") 
df.reset_index(inplace=True)

for i in range(df2.shape[0]):
    if df2['Underlying Value'][i] == '-':
        try:
            s=df[str(parse(df2['Date'][i]).date().strftime('%d-%m-%Y')) == df['Date']]
            df2['Stock Price'][i] = float((s.Close.item() + s.Open.item() + s.High.item() + s.Low.item())/4)
        except:
            df2['Stock Price'][i] = float(0.0)
            continue
    else:
        df2['Stock Price'][i] = float(df2['Underlying Value'][i])

In [89]:
df2 = df2[df2['Stock Price'] !=0]

In [18]:
r = 0.06   # risk free rate

## Black-Scholes Model

\begin{equation*}
d1 = (ln(S/K) + (r+σ^2/2)t)/σ\sqrt{t}\\
d2 = d1 - σ\sqrt{t}
\end{equation*}

C : call option price<br/>
P : Put option price<br/>
S : Stock price<br/>
K : Strike price<br/>
r : risk free rate<br/>
t : time of maturity in year<br/>
N = Normal distribution

Delta: δ measures how much the option price would change in relation to changes in the underlying stock price. A delta of 0.5 means the option would change 50 cents for every 1 dollar the stock moves (δ is the first derivative of the price).

Gamma: γ measures how fast the δ will change when the stock price changes. A high number means this is a very ‘active’ option, and could gain or loss value quickly (this is the second derivative of the price).

In [98]:
df2.reset_index(drop=True, inplace=True)
for i in range(df2.shape[0]):
    df2['d1'][i] = (np.log(df2['Stock Price'][i]/df2['Strike Price'][i])+(r+vol**2/2.)*df2['Time'][i])/vol*np.sqrt(df2['Time'][i])
    df2['d2'][i] = df2['d1'][i]-vol*np.sqrt(df2['Time'][i])

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [118]:
df2['delta'] = norm.cdf(df2['d1'])
df2['gamma'] = norm.pdf(df2['d1'])/(df2['Stock Price']*vol*np.sqrt(df2['Time']))
df2['Current_Strike_diff'] = df2['Stock Price'] - df2['Strike Price']

In [122]:
df2.head()

Unnamed: 0,Symbol,Date,Expiry,Option Type,Strike Price,Open,High,Low,Close,LTP,...,Open Int,Change in OI,Underlying Value,Time,Stock Price,d1,d2,delta,gamma,Current_Strike_diff
0,ADANIENT,25-Jun-2020,25-Jun-2020,CE,107.5,0.0,0.0,0.0,35.85,0.0,...,0,0,161.65,0.00274,161.65,0.040751,0.013288,0.516253,0.08979,54.15
1,ADANIENT,25-Jun-2020,25-Jun-2020,CE,105.0,0.0,0.0,0.0,49.75,0.0,...,0,0,161.65,0.00274,161.65,0.043098,0.015635,0.517188,0.089781,56.65
2,ADANIENT,25-Jun-2020,25-Jun-2020,CE,112.5,0.0,0.0,0.0,31.15,0.0,...,0,0,161.65,0.00274,161.65,0.036215,0.008753,0.514445,0.089806,49.15
3,ADANIENT,25-Jun-2020,25-Jun-2020,CE,115.0,0.0,0.0,0.0,44.2,0.0,...,0,0,161.65,0.00274,161.65,0.034023,0.00656,0.513571,0.089813,46.65
4,ADANIENT,25-Jun-2020,25-Jun-2020,CE,117.5,0.0,0.0,0.0,26.7,0.0,...,0,0,161.65,0.00274,161.65,0.031877,0.004415,0.512715,0.089819,44.15


In [132]:
df2.drop(['Symbol', 'Date', 'Expiry', 'Option Type','Open','High', 'Low', 'Close', 'LTP','No. of contracts',
          'Turnover in Lacs', 'Premium Turnover in Lacs', 'Open Int','Change in OI', 'Underlying Value','d1','d2', 
          'Stock Price'],axis=1, inplace=True)

In [133]:
df2.head()

Unnamed: 0,Strike Price,Settle Price,Time,Stock Price,delta,gamma,Current_Strike_diff
0,107.5,161.65,0.00274,161.65,0.516253,0.08979,54.15
1,105.0,161.65,0.00274,161.65,0.517188,0.089781,56.65
2,112.5,161.65,0.00274,161.65,0.514445,0.089806,49.15
3,115.0,161.65,0.00274,161.65,0.513571,0.089813,46.65
4,117.5,161.65,0.00274,161.65,0.512715,0.089819,44.15


In [147]:
clf=IsolationForest(n_estimators=100, max_samples='auto', \
                        max_features=df2.shape[1], bootstrap=False, n_jobs=-1, random_state=42, verbose=0)
clf.fit(df2)
pred = clf.predict(df2)
df=pd.DataFrame()
df= df2
df['anomaly']=pred
outliers=df.loc[df['anomaly']==-1]
outliers.shape[0]

  pickler.file_handle.write(chunk.tostring('C'))


2457

In [144]:
df2['Anomaly'] = pred

In [None]:
plt.plot

In [154]:
df2.to_csv('Adani_Options_Anomaly.csv')