In [1]:
import pandas as pd
import plotly.express as px
import numpy as np
import plotly.graph_objects as go

In [2]:
# dataset = "US Soybeans Futures Historical Data"
datasets = {
    "crude": "Crude Oil WTI Futures Historical Data",
    "feeder": "Feeder Cattle Futures Historical Data",
    "lean_hogs": "Lean Hogs Futures Historical Data",
    "cattle": "Live Cattle Futures Historical Data",
    "london_coffee": "London Robusta Coffee Futures Historical Data",
    "lumber": "Lumber Futures Historical Data",
    "oats": "Oats Futures Historical Data",
    "orange": "Orange Juice Futures Historical Data",
    "us_cocoa": "US Cocoa Futures Historical Data",
    "us_coffee_c": "US Coffee C Futures Historical Data",
    "soybean_oil": "US Soybean Oil Futures Historical Data",
    "soybeans": "US Soybeans Futures Historical Data",
    "sugar_11": "Sugar #11 Futures Historical Data",
    "wheat": "Wheat Futures Historical Data",
}


In [3]:
df = pd.read_csv(f"data/clean/all.csv")

In [4]:
# Plot all future prices
fig = go.Figure()
for k in datasets:
    fig.add_trace(go.Scatter(x=df.index, y=df[k], mode='lines', name=k))
fig.update_layout(yaxis=dict(tick0=0, dtick=250))
fig.update_layout(title="All Future Prices")
fig.update_layout(showlegend=True)

# Add horizontal lines every 1k from 0-10k
for i in range(0, 10000, 1000):
    fig.add_shape(
        dict(
            type="line",
            x0=df.index[0],
            y0=i,
            x1=df.index[-1],
            y1=i,
            line=dict(color="red", width=1),
        )
    )

# fig.show(renderer="browser")
fig.show()

In [5]:
# Plot futures log returns
fig = go.Figure()
for k in datasets:
    fig.add_trace(go.Scatter(x=df.index, y=np.log(df[k]).diff(), mode='lines', name=k))
fig.update_layout(title="All Future Log Returns")
fig.update_layout(showlegend=True)

fig.show()

In [6]:
# Create layout to select which future to plot log return distribution histogram and add callback for dropdown
layout = go.Layout(
    updatemenus=[
        dict(
            buttons=[
                dict(label=k, method="update", args=[{"visible": [k == i for i in datasets]}, {"title": f"{datasets[k]}"}])
                for k in datasets
            ]
        )
    ]
)

fig = go.Figure(layout=layout)
for k in datasets:
    fig.add_trace(go.Histogram(x=np.log(df[k]).diff(), name=k, visible=False))
fig.update_layout(title="All Future Log Return Distributions")
fig.update_layout(showlegend=True)

fig.show()

In [7]:
# Perform ad-fuller test on all futures log returns for stationarity
from statsmodels.tsa.stattools import adfuller

unaccepted = set()
for k in datasets:
    data = np.log(df[k]).diff().dropna()
    result = adfuller(data)
    # Print mean, std, and whether the series is stationary at 95% confidence
    print(f"{k}:")
    mean = data.mean().round(5)
    std = data.std().round(5)
    print(f"({mean}, {std})")
    print()
    if result[1] >= 0.05:
        unaccepted.add(k)
        print(f"{k} is not stationary at 95% confidence")

crude:
(0.0001, 0.03652)

feeder:
(0.00038, 0.01222)

lean_hogs:
(0.00012, 0.02655)

cattle:
(0.00029, 0.01146)

london_coffee:
(0.00085, 0.0165)

lumber:
(0.00024, 0.04039)

oats:
(0.00021, 0.02356)

orange:
(0.00118, 0.01962)

us_cocoa:
(0.00079, 0.03338)

us_coffee_c:
(-0.00457, 0.01805)

soybean_oil:
(0.00021, 0.0185)

soybeans:
(4e-05, 0.01371)

sugar_11:
(0.00039, 0.01704)

wheat:
(1e-05, 0.02134)

