In [2]:
import pandas as pd
import yfinance as yf
import pyarrow.parquet as pq
import pyarrow as pa
from scipy.stats import norm
import numpy as np
from datetime import datetime
import requests

def get_risk_free_rate():
    # response = requests.get('https://api.example.com/risk-free-rate')
    # return response.json()['rate']
    return 0.044

def black_scholes_greeks(S, K, T, r, sigma, option_type='call'):
    d1 = (np.log(S / K) + (r + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    
    if option_type == 'call':
        vega = S * norm.pdf(d1) * np.sqrt(T)
        theta = - (S * norm.pdf(d1) * sigma) / (2 * np.sqrt(T)) - r * K * np.exp(-r * T) * norm.cdf(d2)
    else:
        vega = S * norm.pdf(d1) * np.sqrt(T)
        theta = - (S * norm.pdf(d1) * sigma) / (2 * np.sqrt(T)) + r * K * np.exp(-r * T) * norm.cdf(-d2)
    
    return vega, theta

def pull_options_data(assets, start_date, end_date):
    data = []
    for asset in assets:
        ticker = yf.Ticker(asset)
        options = ticker.options
        for expiration in options:
            opt = ticker.option_chain(expiration)
            for option_type in ['calls', 'puts']:
                df = getattr(opt, option_type)
                df['Asset'] = asset
                df['Expiration'] = expiration
                df['Option Type'] = option_type[:-1]  # 'calls' -> 'call', 'puts' -> 'put'
                df['Volatility'] = df['impliedVolatility']
                df['Risk-Free Rate'] = get_risk_free_rate()
                df['Dividend Yield'] = 0.02  # Placeholder, replace with actual dividend yield
                df['date'] = datetime.now().strftime('%Y-%m-%d')  # Add current date
                df['Option Index'] = df.apply(lambda row: f"{asset}{expiration.replace('-', '')}{option_type[0].upper()}{int(row['strike']*1000):08d}", axis=1)
                data.append(df.reset_index())

    combined_df = pd.concat(data, ignore_index=True)
    return combined_df

def save_to_parquet(df, file_path):
    table = pa.Table.from_pandas(df)
    pq.write_table(table, file_path)

def calculate_greeks(df):
    greeks = []
    for _, row in df.iterrows():
        S = row['lastPrice']
        K = row['strike']
        T = (pd.to_datetime(row['Expiration']) - pd.to_datetime(row['date'])).days / 365
        r = row['Risk-Free Rate']
        sigma = row['Volatility']
        option_type = row['Option Type']
        
        vega, theta = black_scholes_greeks(S, K, T, r, sigma, option_type)
        
        # Determine ITM, ATM, OTM status
        if option_type == 'call':
            if S > K:
                moneyness = 'ITM'
            elif S == K:
                moneyness = 'ATM'
            else:
                moneyness = 'OTM'
        else:
            if S < K:
                moneyness = 'ITM'
            elif S == K:
                moneyness = 'ATM'
            else:
                moneyness = 'OTM'
        
        greeks.append({
            'Option Index': row['Option Index'],
            'Asset': row['Asset'],
            'Option Type': option_type,
            'Moneyness': moneyness,
            'Strike': K,
            'Maturity': row['Expiration'],
            'Vega': vega,
            'Theta': theta
        })
    
    return pd.DataFrame(greeks)

def main():
    assets = ['AAPL', 'GOOGL', 'MSFT']
    start_date = '2023-01-01'
    end_date = '2023-12-31'
    options_file_path = './data/options_data.parquet'
    greeks_file_path = './data/greeks_data.parquet'

    df = pull_options_data(assets, start_date, end_date)
    save_to_parquet(df, options_file_path)
    print(f"Options data saved to {options_file_path}")

    greeks_df = calculate_greeks(df)
    save_to_parquet(greeks_df, greeks_file_path)
    print(f"Greeks data saved to {greeks_file_path}")

if __name__ == "__main__":
    main()

Options data saved to options_data.parquet
Greeks data saved to greeks_data.parquet
