# Download VIX Futures data

In [None]:
# Step by step describtion
# 1. download the .csv file from the CBOE Webpage
# 2. "clean" the data = only dates, maturity and closing prices (settlement maybe?) needed
# 3. apply the formula given in the appendix of the paper to calculate the return
# 4. compare the result to the his given results

In [1]:
import requests
import pandas as pd
import io
import re
import numpy as np

In [2]:
# Regular expression pattern to match various date formats
date_pattern = re.compile(r'(\d{1,2}/\d{1,2}/\d{4})|(\d{2}/\d{2}/\d{4})|(\d{4}-\d{1,2}-\d{2})|(\d{2}-[A-Za-z]{3}-\d{4})')

# Function to standardize date format to 'YYYY-MM-DD'
def standardize_date(date_str):
    match = date_pattern.search(date_str)
    if match:
        return pd.to_datetime(match.group()).strftime('%Y-%m-%d')
    else:
        return date_str  # Keep the original value for invalid date formats



In [6]:
# 1. download the .csv files from the CBOE Webpage
# define changing parts in the URL
list_exp_years = ['04','05','06','07','08','09','10','11','12','13']
list_exp_mon = ['F','G','H','J','K','M','N','Q','U','V','X','Z']
counter = -1
check_counter = -1000
expiration_dates = []
for i in range(len(list_exp_years)):
    for j in range(len(list_exp_mon)):
        # Define the URL of the CSV file you want to download
        url = "https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_" + list_exp_mon[j] + list_exp_years[i] + "_VX.csv"
        try:
            # Send an HTTP GET request to the URL
            response = requests.get(url)
            
            # Check if the request was successful (status code 200)
            if response.status_code == 200:
                content = response.text.splitlines()
                if "Trade Date" not in content[0]:
                    # Skip the first row and create the DataFrame
                    df = pd.read_csv(io.StringIO('\n'.join(content[1:])), usecols=range(11))
                else:
                    # Create the DataFrame without skipping the first row
                    df = pd.read_csv(io.StringIO(response.text), usecols=range(11))
                print("Success")
                print(url)
                df['Trade Date'] = df['Trade Date'].apply(standardize_date)
                expiration_dates.append(df['Trade Date'].iloc[-1])
                counter = counter + 1
                if counter <= 0:
                    merged_df = df
            else:
                print(f"Failed to retrieve the file. Status Code: {response.status_code}")
        except requests.exceptions.RequestException as e:
            print("An error occurred: {e}")
        # Merge the DataFrames based on the "Trade Date" column
        #if counter <= 0:
        #    merged_df = df
        if counter == 1:
            merged_df = pd.merge(merged_df[['Trade Date', 'Futures', 'Settle']], df[['Trade Date', 'Futures', 'Settle']], on='Trade Date', how='outer', suffixes=('_1', '_2'))
            merged_df = merged_df.rename(columns={'Settle_1': merged_df.iloc[0]['Futures_1'], 'Settle_2': merged_df.iloc[0]['Futures_2']})
            merged_df = merged_df.drop(columns = ['Futures_1', 'Futures_2'])
            print(merged_df.head())
        elif counter > 1:
            if counter == check_counter:
                print(check_counter)
            else:
                colnames = merged_df.columns.tolist()
                name = df.iloc[0]['Futures']
                merged_df = pd.merge(merged_df[colnames], df[['Trade Date', 'Futures', 'Settle']], on='Trade Date', how='outer')
                merged_df = merged_df.rename(columns={'Settle': name})
                merged_df = merged_df.drop(columns = ['Futures'])
                check_counter = counter
            
# Print the merged DataFrame
print(merged_df)

Failed to retrieve the file. Status Code: 403
Failed to retrieve the file. Status Code: 403
Failed to retrieve the file. Status Code: 403
Failed to retrieve the file. Status Code: 403
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_K04_VX.csv
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_M04_VX.csv
   Trade Date  K (May 04)  M (Jun 04)
0  2004-03-26       203.2       201.6
1  2004-03-29       198.0       197.7
2  2004-03-30       196.2       197.9
3  2004-03-31       197.5       198.5
4  2004-04-01       196.2       196.1
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_N04_VX.csv
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_Q04_VX.csv
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_U04_VX.csv
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_V04_VX.csv
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-

Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_F12_VX.csv
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_G12_VX.csv
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_H12_VX.csv
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_J12_VX.csv
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_K12_VX.csv
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_M12_VX.csv
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_N12_VX.csv
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_Q12_VX.csv
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_U12_VX.csv
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_V12_VX.csv
Success
https://cdn.cboe.com/resources/futures/archive/volume-and-price/CFE_X12_VX.csv
Success
https://cdn.cboe.com/resources/futu

In [244]:
merged_df.to_csv("merged_df.csv", index=False)

In [7]:
def interpolate_returns(dataset, dates, T):
    # Convert expiration_dates to a Pandas Series
    dates = pd.Series(dates)
    
    results = []
    for i in range(2000): #2320
        # Step 1: Calculate the maturity date
        maturity_date = pd.to_datetime(dataset['Trade Date'].iloc[i]) + pd.DateOffset(days=int(T * 30))
        #print("Maturity date", maturity_date)
        # Step 2: Find the two closest dates to the maturity date
        closest_dates_index = (pd.to_datetime(dates) - maturity_date).abs().argsort().values
        #print("indices",closest_dates_index)
        #print(i)
        close = 0
        index1 = closest_dates_index[close]+1
        #print(dataset.iloc[i, index1],dataset.iloc[i+1, index1])
        while pd.isna(dataset.iloc[i, index1]) | pd.isna(dataset.iloc[i+1, index1]):
            #print("Value ", dataset.iloc[i, index1], "or ", dataset.iloc[i+1, index1], "is NaN.")
            #print("Maturity Date: ", maturity_date)
            #print("Closest expiration: ", pd.to_datetime(dates[index1-1]))
            #print("Current Date: ", dataset['Trade Date'].iloc[i])
            #print("close: ", i, dataset.iloc[i, closest_dates_index[close]+1], dataset.iloc[i+1, closest_dates_index[close]+1])
            close = close + 1
            index1 = closest_dates_index[close]+1
        close1 = close+1
        index2 = closest_dates_index[close1]+1
        #print(dataset.iloc[i, index2],dataset.iloc[i+1, index2])
        while pd.isna(dataset.iloc[i, index2]) | pd.isna(dataset.iloc[i+1, index2]):
            #print("Value ", dataset.iloc[i, index2], "or ", dataset.iloc[i+1, index2], "is NaN.")
            #print("Maturity Date: ", maturity_date)
            #print("2nd closest expiration: ",pd.to_datetime(dates[index2-1]))
            #print("Current Date: ", dataset['Trade Date'].iloc[i])
            #print("close: ", i, dataset.iloc[i, closest_dates_index[close1]+1], dataset.iloc[i+1, closest_dates_index[close1]+1])
            close1 = close1 + 1
            index2 = closest_dates_index[close1]+1
        #print(dataset.iloc[i, index2],dataset.iloc[i+1, index2])        
        #print("dateindex", closest_dates_index)
        #closest_dates = date_series[(date_series - maturity_date).abs().argsort()[:2]]

        # Step 3: Get the prices for the closest dates
        #at time t
        closest_price1_t = dataset.iloc[i, index1]
        closest_price2_t = dataset.iloc[i, index2]
        #at time t+1
        closest_price1_t1 = dataset.iloc[i+1, index1]
        closest_price2_t1 = dataset.iloc[i+1, index2]
        #print("1st", pd.to_datetime(dates[closest_dates_index[0]]))
        #print("2nd", pd.to_datetime(dates[closest_dates_index[1]]))
        # Calculate the time in months (S1 is always the earlier month of the two dates)
        if pd.to_datetime(dates[index1-1]) < pd.to_datetime(dates[index2-1]):
            S1 = abs(pd.to_datetime(dataset['Trade Date'].iloc[i]) - pd.to_datetime(dates[index1-1])).days / 30
            S2 = abs(pd.to_datetime(dataset['Trade Date'].iloc[i]) - pd.to_datetime(dates[index2-1])).days / 30
        else:
            #print(maturity_date, pd.to_datetime(dates[closest_dates_index[0]]),pd.to_datetime(dates[closest_dates_index[1]]))
            S2 = abs(pd.to_datetime(dataset['Trade Date'].iloc[i]) - pd.to_datetime(dates[index1-1])).days / 30
            S1 = abs(pd.to_datetime(dataset['Trade Date'].iloc[i]) - pd.to_datetime(dates[index2-1])).days / 30
        #print("S1: ", S1, "S2: ", S2, "T: ", T)
        #apply interpolation formula given
        #print("weight 1: ",(T-S1)/(S2-S1))
        #print("weight 2: ",(S2-T)/(S2-S1))
        #print(closest_price1_t1,closest_price2_t1,closest_price1_t,closest_price2_t)
        #if (S2-S1) <0.1:
            #print("1st: ",S2-S1)
            #print(S2, S1, maturity_date)
        #elif (((T-S1)/(S2-S1))*closest_price1_t + ((S2-T)/(S2-S1))*closest_price2_t) < 0.1:
            #print("2nd: ",(((T-S1)/(S2-S1))*closest_price1_t + ((S2-T)/(S2-S1))*closest_price2_t))
            #print(closest_price1_t, closest_price2_t)
        r = (((T-S1)/(S2-S1))*closest_price1_t1 + ((S2-T)/(S2-S1))*closest_price2_t1)/(((T-S1)/(S2-S1))*closest_price1_t + ((S2-T)/(S2-S1))*closest_price2_t)-1

        #results.append({
        #    'Maturity Date': maturity_date,
        #    'Closest Dates': dates[closest_dates_index],
        #    'Closest Prices': [closest_price1, closest_price2],
        #    'Time to Maturity (Months)': [S1, S2]
        #})
        results.append(round(r*100,3))

    return results


In [8]:
merged_df.replace(0, np.nan, inplace=True)

results1 = interpolate_returns(merged_df, expiration_dates, 1)
results2 = interpolate_returns(merged_df, expiration_dates, 2)
results3 = interpolate_returns(merged_df, expiration_dates, 3)
results4 = interpolate_returns(merged_df, expiration_dates, 4)
results5 = interpolate_returns(merged_df, expiration_dates, 5)
results6 = interpolate_returns(merged_df, expiration_dates, 6)


In [9]:
# Create a dataframe where the returns are stored
trade_dates = merged_df['Trade Date'].iloc[1:2001]

# Create a new DataFrame with the "Date" column
new_df = pd.DataFrame({'Date': trade_dates})

# Display the new DataFrame
new_df['1 month'] = results1
new_df['2 month'] = results2
new_df['3 month'] = results3
new_df['4 month'] = results4
new_df['5 month'] = results5
new_df['6 month'] = results6
print(new_df)


            Date  1 month  2 month  3 month  4 month  5 month  6 month
1     2004-03-29   -1.391   -2.069   -1.754   -2.057   -2.154   -0.683
2     2004-03-30    0.861   -0.224    0.499    0.795    1.305    0.584
3     2004-03-31    0.050    0.431    0.151    0.906    1.333    0.668
4     2004-04-01   -1.580   -0.993   -1.463   -0.210    0.475    0.350
5     2004-04-02   -1.377   -2.906   -1.581   -1.201   -0.481   -0.776
...          ...      ...      ...      ...      ...      ...      ...
1996  2012-02-29    0.031   -0.455   -0.693   -0.590   -0.399   -0.491
1997  2012-03-01   -3.388   -2.560   -2.293   -2.324   -2.562   -2.309
1998  2012-03-02    1.726    1.766    2.232    2.563    2.552    2.341
1999  2012-03-05   -0.574    0.123    0.383    0.601    0.945    0.780
2000  2012-03-06    9.132    5.284    3.430    1.959    1.034    1.194

[2000 rows x 7 columns]


In [266]:
new_df.to_csv("VIX_FUT_RET_PROV.csv", index=False)