# Skew Generation File

This file generates the skews of the Volsurface for predicting equity returns.

Please do not rerun, as it would create some new file paths.

In [1]:
import numpy as np
import pandas as pd

# Assuming the data is already loaded into a DataFrame called 'df'
df = pd.read_csv('VNO_option_data.csv')

# Define a function to find the closest delta value
def find_closest_delta(df, target_delta):
    return df.iloc[(df['delta'] - target_delta).abs().argsort()[:1]]

# Initialize list to store results
results = []

# Group by TICKER and date
grouped = df.groupby(['ticker', 'date'])

for (ticker, date), group in grouped:
    # Filter calls and puts
    call_atm = find_closest_delta(group[(group['cp_flag'] == 'C')], 50)
    put_atm = find_closest_delta(group[(group['cp_flag'] == 'P')], -50)
    
    call_25 = find_closest_delta(group[(group['cp_flag'] == 'C')], 25)
    put_25 = find_closest_delta(group[(group['cp_flag'] == 'P')], -25)

    # Calculate ATM IV as average of call and put near delta 50 and -50
    atm_iv = (call_atm['impl_volatility'].values[0] + put_atm['impl_volatility'].values[0]) / 2
    
    # Calculate RR (Risk Reversal) as Call IV (near delta 25) - Put IV (near delta -25)
    rr = call_25['impl_volatility'].values[0] - put_25['impl_volatility'].values[0]
    
    # Append results
    results.append([ticker, date, atm_iv, rr])

# Create the resulting DataFrame
result_df = pd.DataFrame(results, columns=['TICKER', 'date', 'ATM IV', 'RR'])

# Display the resulting DataFrame
result_df.head()


Unnamed: 0,TICKER,date,ATM IV,RR
0,VNO,2002-08-02,0.390422,-0.038674
1,VNO,2002-08-05,0.389641,-0.074775
2,VNO,2002-08-06,0.381769,-0.102353
3,VNO,2002-08-07,0.398877,-0.07672
4,VNO,2002-08-08,0.398386,-0.069558


In [2]:
len(result_df)

5307

In [6]:
import os
import numpy as np
import pandas as pd

# Define a function to find the closest delta value
def find_closest_delta(df, target_delta):
    return df.iloc[(df['delta'] - target_delta).abs().argsort()[:1]]

# Function to process each file and return a DataFrame with results
def process_file(file_path):
    df = pd.read_csv(file_path)
    
    # Initialize list to store results
    results = []

    # Group by TICKER and date
    grouped = df.groupby(['ticker', 'date'])

    for (ticker, date), group in grouped:
        # Filter calls and puts
        call_atm = find_closest_delta(group[(group['cp_flag'] == 'C')], 50)
        put_atm = find_closest_delta(group[(group['cp_flag'] == 'P')], -50)

        call_25 = find_closest_delta(group[(group['cp_flag'] == 'C')], 25)
        put_25 = find_closest_delta(group[(group['cp_flag'] == 'P')], -25)

        # Calculate ATM IV as average of call and put near delta 50 and -50
        atm_iv = (call_atm['impl_volatility'].values[0] + put_atm['impl_volatility'].values[0]) / 2

        # Calculate RR (Risk Reversal) as Call IV (near delta 25) - Put IV (near delta -25)
        rr = call_25['impl_volatility'].values[0] - put_25['impl_volatility'].values[0]

        # Append results
        results.append([ticker, date, atm_iv, rr])

    # Create the resulting DataFrame for this file
    return pd.DataFrame(results, columns=['TICKER', 'date', 'ATM IV', 'RR'])

# Directory where the files are stored
directory_path = 'C:/Users/KARAN JESWANI/Sem/Sem3/Systematic trading/option_data/option_data'

# Initialize an empty list to store all results
all_results = []

# Loop through each file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith(".csv"):  # Process only CSV files
        file_path = os.path.join(directory_path, filename)
        
        # Process the file and get the results DataFrame
        file_results = process_file(file_path)
        
        # Append the results to the list
        all_results.append(file_results)

# Concatenate all the DataFrames into a single DataFrame
final_result_df = pd.concat(all_results, ignore_index=True)

# Save the final DataFrame to a CSV file
final_result_df.to_csv('combined_results.csv', index=False)

# Display the final DataFrame
final_result_df.head()

KeyboardInterrupt: 

In [7]:
all_results

[     TICKER        date    ATM IV        RR
 0       AAL  2013-12-17  0.447346  0.027771
 1       AAL  2013-12-18  0.419270 -0.001097
 2       AAL  2013-12-19  0.421457 -0.026108
 3       AAL  2013-12-20  0.411252 -0.037721
 4       AAL  2013-12-23  0.399073 -0.022857
 ...     ...         ...       ...       ...
 2438    AAL  2023-08-25  0.319537 -0.028895
 2439    AAL  2023-08-28  0.303341 -0.030842
 2440    AAL  2023-08-29  0.305571 -0.032263
 2441    AAL  2023-08-30  0.303324 -0.026125
 2442    AAL  2023-08-31  0.296986 -0.025564
 
 [2443 rows x 4 columns],
      TICKER        date    ATM IV        RR
 0      AAPL  2000-08-31  0.574545 -0.027703
 1      AAPL  2000-09-01  0.572235 -0.017502
 2      AAPL  2000-09-05  0.578295 -0.022779
 3      AAPL  2000-09-06  0.597924 -0.016349
 4      AAPL  2000-09-07  0.592598 -0.020317
 ...     ...         ...       ...       ...
 5781   AAPL  2023-08-25  0.199907 -0.034107
 5782   AAPL  2023-08-28  0.201908 -0.032893
 5783   AAPL  2023-08-29  0

In [1]:
import os
import numpy as np
import pandas as pd

# Function to process each file and return a DataFrame with results
def process_file(file_path):
    df = pd.read_csv(file_path)
    
    # Initialize list to store results
    results = []

    # Group by TICKER and date
    grouped = df.groupby(['ticker', 'date'])

    for (ticker, date), group in grouped:
        # Filter for exact deltas
        call_atm = group[(group['cp_flag'] == 'C') & (group['delta'] == 50)]
        put_atm = group[(group['cp_flag'] == 'P') & (group['delta'] == -50)]
        
        call_25 = group[(group['cp_flag'] == 'C') & (group['delta'] == 25)]
        put_25 = group[(group['cp_flag'] == 'P') & (group['delta'] == -25)]

        # Ensure all required rows are found
        if call_atm.empty or put_atm.empty or call_25.empty or put_25.empty:
            continue  # Skip this group if any of the required deltas are missing

        # Calculate ATM IV as average of call and put at delta 50 and -50
        atm_iv = (call_atm['impl_volatility'].values[0] + put_atm['impl_volatility'].values[0]) / 2

        # Calculate RR (Risk Reversal) as Call IV (delta 25) - Put IV (delta -25)
        rr = call_25['impl_volatility'].values[0] - put_25['impl_volatility'].values[0]

        # Append results
        results.append([ticker, date, atm_iv, rr])

    # Create the resulting DataFrame for this file
    return pd.DataFrame(results, columns=['TICKER', 'date', 'ATM IV', 'RR'])

# Directory where the files are stored
directory_path = 'option_data'

# Initialize an empty list to store all results
all_results = []

# Loop through each file in the directory
for filename in os.listdir(directory_path):
    if filename.endswith(".csv"):  # Process only CSV files
        file_path = os.path.join(directory_path, filename)
        
        # Process the file and get the results DataFrame
        file_results = process_file(file_path)
        
        # Append the results to the list
        all_results.append(file_results)

# Concatenate all the DataFrames into a single DataFrame
final_result_df = pd.concat(all_results, ignore_index=True)

# Save the final DataFrame to a CSV file
final_result_df.to_csv('combined_results.csv', index=False)

# Display the final DataFrame
final_result_df.head()


  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)
  df = pd.read_csv(file_path)


Unnamed: 0,TICKER,date,ATM IV,RR
0,CL,2000-08-31,0.363433,-0.006201
1,CL,2000-09-01,0.358156,-0.011888
2,CL,2000-09-05,0.366865,-0.010889
3,CL,2000-09-06,0.350661,-0.019246
4,CL,2000-09-07,0.348214,-0.011531
