In [1]:
import pandas as pd
from pathlib import Path

# settings
Since my dataset contains 1-month USDRUB derivatives, I assume the maturity of 30 days; I use the USD OIS rates as the USD interest rate. Together, this implies an annualization factor of $360/30$.

In [2]:
DATA_PATH = Path("../data")

filename_raw_data = "usdrub-data-hf.ftr"
rates_ann_factor = 360 / 30

# raw data
The raw data is a collection of USDRUB spot and forward prices and FX option contracts plus the USD OIS rate, all stacked in the long-form as a .ftr pyarrow file with columns ['date', 'contract', 'value']. The (contract, value) pairs are:
| contract  | value
| --------- | -----
| spot      | USDRUB spot rate
| forward   | USDRUB 1-month forward rate
| v_atm     | volatility of 1-month ATM contracts, in percent.
| v_25r     | quote of 25-delta risk reversal, in percent
| v_10r     | quote of 10-delta risk reversal, in percent
| v_25b     | quote of 25-delta butterfly spread (market strangle), in percent
| v_10b     | quote of 10-delta butterfly spread (market strangle), in percent
| r_base    | risk-free rate in the base currency, in percent.

In [3]:
data = pd.read_feather(DATA_PATH / "raw" / filename_raw_data)

print(f"number of unique dates: {len(data['date'].unique())}")
data.query("date == '2022-02-28 16:44:00+01:00'").reset_index(drop=True)\
    .style.format(precision=2)

number of unique dates: 57307


Unnamed: 0,date,contract,value
0,2022-02-28 16:44:00+01:00,spot,109.31
1,2022-02-28 16:44:00+01:00,r_base,0.24
2,2022-02-28 16:44:00+01:00,v_atm,85.48
3,2022-02-28 16:44:00+01:00,v_25r,21.32
4,2022-02-28 16:44:00+01:00,v_10r,46.03
5,2022-02-28 16:44:00+01:00,v_25b,3.77
6,2022-02-28 16:44:00+01:00,v_10b,14.67
7,2022-02-28 16:44:00+01:00,forward,112.44


We need to introduce one new variable, the interest rate for the counter currency, and to de-annualize convert all relevant values (interest rates and the option contract quotes expressed as volatilities).

In [4]:
# pivot for easier access
data = data.pivot(index="date", columns="contract", values="value")

# from percent to fractions of 1 where relevant
data[['r_base', 'v_atm', 'v_25r', 'v_10r', 'v_25b', 'v_10b']] /= 100

# calculate interest rate for the counter currency
# f = s * (1 + r_counter) / (1 + r_base)
data["r_counter"] = (
        data["forward"] / data["spot"] *
            (1 + data["r_base"] / rates_ann_factor)
        - 1
) * rates_ann_factor

# unpivot
data = data.reset_index().melt(id_vars="date", value_name="value",
                               var_name="contract")

data.query("date == '2022-02-28 16:44:00+01:00'").reset_index(drop=True)\
    .style.format(precision=4)

Unnamed: 0,date,contract,value
0,2022-02-28 16:44:00+01:00,forward,112.4394
1,2022-02-28 16:44:00+01:00,r_base,0.0024
2,2022-02-28 16:44:00+01:00,spot,109.3052
3,2022-02-28 16:44:00+01:00,v_10b,0.1467
4,2022-02-28 16:44:00+01:00,v_10r,0.4603
5,2022-02-28 16:44:00+01:00,v_25b,0.0377
6,2022-02-28 16:44:00+01:00,v_25r,0.2132
7,2022-02-28 16:44:00+01:00,v_atm,0.8548
8,2022-02-28 16:44:00+01:00,r_counter,0.3465


In [5]:
# store in data/processed/
data.to_feather(DATA_PATH / "processed" / "data.ftr")