# Stock Options Analysis

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os 
import yfinance as yf
sns.set_style("whitegrid")

## Data

Let us try to study the Black-Scholes Model, which require the information

i. ***Stock Price (S)***: the corresponding stock prices for each option.\

ii.***Strike Price (K)***: data under the column strike_price.

iii. ***Time to Expiry (T)***: subtracting the current date from the expiration date (exdate). Convert the resulting timedelta to years.

iv. ***Risk-free Interest Rate (r)***: determine an appropriate risk-free interest rate to use in the model. 

v. ***Volatility (σ)***: implied volatility, available in your data. 

### Load options data and download stock data

In [15]:
current_dir = os.getcwd()
# print("Current directory:", current_dir)

# Read the CSV file into a DataFrame
options_data = pd.read_csv('../data/AMD_options_data.csv')

# Check if stock data file exists
stock_data_file = '../data/AMD_stock_data.csv'
if os.path.exists(stock_data_file):
    # If file exists, load it directly
    stock_data = pd.read_csv(stock_data_file)
else:
    # Download stock data
    ticker_symbol = 'AMD'
    start_date = options_data['date'].min()
    end_date = (pd.to_datetime(options_data['date'].max()) + pd.Timedelta(days=1)).date()

    stock_data = yf.download(ticker_symbol, start=start_date, end=end_date)

    # Save stock data
    stock_data.to_csv(stock_data_file, index=True)

print(options_data.info())
print(stock_data.info())

  options_data = pd.read_csv('../data/AMD_options_data.csv')


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1386118 entries, 0 to 1386117
Data columns (total 38 columns):
 #   Column            Non-Null Count    Dtype  
---  ------            --------------    -----  
 0   secid             1386118 non-null  int64  
 1   date              1386118 non-null  object 
 2   symbol            1386118 non-null  object 
 3   symbol_flag       1386118 non-null  int64  
 4   exdate            1386118 non-null  object 
 5   last_date         1263749 non-null  object 
 6   cp_flag           1386118 non-null  object 
 7   strike_price      1386118 non-null  int64  
 8   best_bid          1386118 non-null  float64
 9   best_offer        1386118 non-null  float64
 10  volume            1386118 non-null  int64  
 11  open_interest     1386118 non-null  int64  
 12  impl_volatility   1166644 non-null  float64
 13  delta             1166644 non-null  float64
 14  gamma             1166644 non-null  float64
 15  vega              1166644 non-null  float64
 16  

### Marge data

In [16]:

# Select relevant columns
selected_columns = ['date', 'exdate', 'cp_flag', 'strike_price', 'best_bid', 'best_offer', 
                    'impl_volatility', 'delta', 'gamma', 'vega', 'theta']

options_data = options_data[selected_columns].copy()
options_data['date'] = pd.to_datetime(options_data['date']).dt.date
print(options_data.info())

stock_data = stock_data[['Date', 'Adj Close', 'Volume']].copy()
stock_data = stock_data.rename(columns={'Date':'date'})
stock_data['date'] = pd.to_datetime(stock_data['date']).dt.date
print(stock_data.info())

merged_data_file = '../data/AMD_merged_data.csv'
if os.path.exists(merged_data_file):
    merged_data = pd.read_csv(merged_data_file)
else:
    # Merge datasets based on date using inner join
    merged_data = pd.merge(options_data, stock_data, on='date')
    merged_data.to_csv('../data/AMD_merged_data.csv', index=False)


print(merged_data.info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1386118 entries, 0 to 1386117
Data columns (total 11 columns):
 #   Column           Non-Null Count    Dtype  
---  ------           --------------    -----  
 0   date             1386118 non-null  object 
 1   exdate           1386118 non-null  object 
 2   cp_flag          1386118 non-null  object 
 3   strike_price     1386118 non-null  int64  
 4   best_bid         1386118 non-null  float64
 5   best_offer       1386118 non-null  float64
 6   impl_volatility  1166644 non-null  float64
 7   delta            1166644 non-null  float64
 8   gamma            1166644 non-null  float64
 9   vega             1166644 non-null  float64
 10  theta            1166644 non-null  float64
dtypes: float64(7), int64(1), object(3)
memory usage: 116.3+ MB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 252 entries, 0 to 251
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   date 

In [17]:
print(merged_data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1386118 entries, 0 to 1386117
Data columns (total 13 columns):
 #   Column           Non-Null Count    Dtype  
---  ------           --------------    -----  
 0   date             1386118 non-null  object 
 1   exdate           1386118 non-null  object 
 2   cp_flag          1386118 non-null  object 
 3   strike_price     1386118 non-null  int64  
 4   best_bid         1386118 non-null  float64
 5   best_offer       1386118 non-null  float64
 6   impl_volatility  1166644 non-null  float64
 7   delta            1166644 non-null  float64
 8   gamma            1166644 non-null  float64
 9   vega             1166644 non-null  float64
 10  theta            1166644 non-null  float64
 11  Adj Close        1386118 non-null  float64
 12  Volume           1386118 non-null  int64  
dtypes: float64(8), int64(2), object(3)
memory usage: 137.5+ MB
None


### Clean data

In [18]:
data = merged_data.dropna().copy()

# Convert date columns to datetime
data['date'] = pd.to_datetime(data['date'])
data['exdate'] = pd.to_datetime(data['exdate'])


# Example of outlier removal for numeric columns (you can adjust the threshold based on your data)
numeric_columns = ['strike_price', 'best_bid', 'best_offer', 'impl_volatility', 'delta', 'gamma', 'vega', 'theta', 'Adj Close', 'Volume']
for col in numeric_columns:
    Q1 = merged_data[col].quantile(0.25)
    Q3 = merged_data[col].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    outlier_data = merged_data[(merged_data[col] >= lower_bound) & (merged_data[col] <= upper_bound)]

# Check the cleaned data
print(outlier_data.info())

<class 'pandas.core.frame.DataFrame'>
Index: 1362240 entries, 0 to 1386117
Data columns (total 13 columns):
 #   Column           Non-Null Count    Dtype  
---  ------           --------------    -----  
 0   date             1362240 non-null  object 
 1   exdate           1362240 non-null  object 
 2   cp_flag          1362240 non-null  object 
 3   strike_price     1362240 non-null  int64  
 4   best_bid         1362240 non-null  float64
 5   best_offer       1362240 non-null  float64
 6   impl_volatility  1146412 non-null  float64
 7   delta            1146412 non-null  float64
 8   gamma            1146412 non-null  float64
 9   vega             1146412 non-null  float64
 10  theta            1146412 non-null  float64
 11  Adj Close        1362240 non-null  float64
 12  Volume           1362240 non-null  int64  
dtypes: float64(8), int64(2), object(3)
memory usage: 145.5+ MB
None


In [19]:
outlier_data.describe()

Unnamed: 0,strike_price,best_bid,best_offer,impl_volatility,delta,gamma,vega,theta,Adj Close,Volume
count,1362240.0,1362240.0,1362240.0,1146412.0,1146412.0,1146412.0,1146412.0,1146412.0,1362240.0,1362240.0
mean,163891.5,36.87146,38.25431,0.6528334,-0.009500788,0.009288305,16.46931,-18.83926,84.79102,84933060.0
std,109593.1,59.91825,61.12931,0.3339461,0.5884128,0.01368124,21.52126,32.05152,16.42251,26111190.0
min,5000.0,0.0,0.01,0.08018,-0.999928,0.0,0.000516,-1014.567,55.94,26018900.0
25%,85000.0,0.85,1.19,0.48112,-0.426511,0.001694,2.104176,-21.16417,72.45,66557800.0
50%,130000.0,10.95,11.75,0.555801,0.001512,0.004522,7.876283,-9.864411,84.64,83493600.0
75%,215000.0,46.15,48.45,0.6948887,0.3884873,0.010945,22.1693,-3.971274,96.93,100609400.0
max,600000.0,485.3,491.55,2.999406,0.999996,0.356821,153.6658,8.13967,123.34,153129800.0


In [20]:
data.describe()

Unnamed: 0,date,exdate,strike_price,best_bid,best_offer,impl_volatility,delta,gamma,vega,theta,Adj Close,Volume
count,1166644,1166644,1166644.0,1166644.0,1166644.0,1166644.0,1166644.0,1166644.0,1166644.0,1166644.0,1166644.0,1166644.0
mean,2022-08-19 11:53:22.749768448,2023-02-01 21:27:38.430850816,159062.9,29.72666,30.89889,0.6532276,-0.0109308,0.009271234,16.46618,-18.84714,84.75904,86393470.0
min,2022-02-28 00:00:00,2022-03-04 00:00:00,5000.0,0.0,0.01,0.08018,-0.999928,0.0,0.000516,-1014.567,55.94,26018900.0
25%,2022-05-18 00:00:00,2022-08-26 00:00:00,83000.0,0.63,0.8,0.481462,-0.4304412,0.001695,2.109112,-21.16794,72.45,66557800.0
50%,2022-08-15 00:00:00,2022-12-30 00:00:00,125000.0,7.85,8.4,0.556272,0.00149,0.004514,7.883527,-9.868769,84.64,83806700.0
75%,2022-11-16 00:00:00,2023-04-21 00:00:00,205000.0,35.05,36.8,0.6954575,0.3862302,0.010918,22.19063,-3.972248,96.93,101672600.0
max,2023-02-28 00:00:00,2025-06-20 00:00:00,600000.0,482.5,486.55,2.999406,0.999996,0.356821,153.6658,8.13967,123.34,225394100.0
std,,,107353.2,51.61338,52.85049,0.3339506,0.5886337,0.01365108,21.4892,32.03529,16.35851,29005680.0


In [6]:
data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1386118 entries, 0 to 1386117
Data columns (total 13 columns):
 #   Column           Non-Null Count    Dtype         
---  ------           --------------    -----         
 0   date             1386118 non-null  datetime64[ns]
 1   exdate           1386118 non-null  datetime64[ns]
 2   cp_flag          1386118 non-null  object        
 3   strike_price     1386118 non-null  int64         
 4   best_bid         1386118 non-null  float64       
 5   best_offer       1386118 non-null  float64       
 6   impl_volatility  1166644 non-null  float64       
 7   delta            1166644 non-null  float64       
 8   gamma            1166644 non-null  float64       
 9   vega             1166644 non-null  float64       
 10  theta            1166644 non-null  float64       
 11  Adj Close        1386118 non-null  float64       
 12  Volume           1386118 non-null  int64         
dtypes: datetime64[ns](2), float64(8), int64(2), object(1)
mem

In [22]:
data.head()

Unnamed: 0,date,exdate,cp_flag,strike_price,best_bid,best_offer,impl_volatility,delta,gamma,vega,theta,Adj Close,Volume,dte,moneyness
0,2022-02-28,2022-03-04,P,143000,18.3,21.7,0.919996,-0.931618,0.011129,1.700479,-71.04977,123.339996,124666100,4,0.000863
1,2022-02-28,2022-03-04,P,144000,19.65,22.55,1.009123,-0.921234,0.011298,1.898582,-86.91409,123.339996,124666100,4,0.000857
2,2022-02-28,2022-03-04,P,145000,20.45,23.7,1.0292,-0.926204,0.010543,1.805097,-84.34332,123.339996,124666100,4,0.000851
3,2022-02-28,2022-03-04,P,146000,21.3,24.75,1.032943,-0.933979,0.009645,1.653858,-77.66636,123.339996,124666100,4,0.000845
4,2022-02-28,2022-03-04,P,147000,23.5,25.75,1.347105,-0.879729,0.011528,2.587893,-158.5352,123.339996,124666100,4,0.000839


In [21]:

# Step 3: Feature Engineering, compute days to expiration
data['dte'] = (data['exdate'] - data['date']).dt.days
data['moneyness'] = data['Adj Close'] / data['strike_price']



In [None]:
# Convert date column to datetime if not already in datetime format
merged_data['date'] = pd.to_datetime(merged_data['date'])

# Filter rows for the end of February 28, 2022, and the beginning of March 1, 2022
filtered_rows = merged_data[(merged_data['date'] == '2022-02-28 23:59:59') | (merged_data['date'] == '2022-03-01 00:00:00')]

print(filtered_rows)


In [8]:
import yfinance as yf

# Define the stock symbol for which you want to fetch options data
stock_symbol = 'AMD'  # Example: Apple Inc.

# Create a Ticker object for the stock symbol
ticker = yf.Ticker(stock_symbol)

# Get options data
options_data = ticker.options

# Print the available expiration dates for options
print("Available expiration dates for options:")
print(options_data)


Available expiration dates for options:
('2024-04-05', '2024-04-12', '2024-04-19', '2024-04-26', '2024-05-03', '2024-05-10', '2024-05-17', '2024-05-24', '2024-06-21', '2024-07-19', '2024-08-16', '2024-09-20', '2024-10-18', '2024-11-15', '2024-12-20', '2025-01-17', '2025-03-21', '2025-06-20', '2025-08-15', '2025-10-17', '2025-11-21', '2025-12-19', '2026-01-16', '2026-06-18', '2026-12-18')


In [None]:
df.info()

In [None]:
df2.info()

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.head()

In [None]:
import yfinance as yf

data = yf.download('AMD', start=df['date'].min(), end=df['date'].max())
# Print data
data

In [None]:
df['date'].min()

In [None]:
df['date'].max()

In [None]:
import math

import numpy as np
import QuantLib as ql


In [None]:
import sys
print(sys.path)


In [None]:
import pkg_resources

# Get a list of all installed packages
installed_packages = pkg_resources.working_set

# Print each package name
for package in installed_packages:
    print(package)


In [None]:
import sys

# Print out all packages in the current Anaconda environment
for line in sys.path:
    print(line)
