In [None]:
import pandas as pd
import numpy as np
import os, sys
import datetime
%matplotlib inline
import matplotlib.pyplot as plt
import plotly.plotly as py
import plotly.graph_objs as go
import zipfile
import urllib.request
from PIL import Image
def str_to_date(d):
    try:
        dt = datetime.datetime.strptime(str(d),'%Y-%m-%d')
    except:
        return None
    s = '%04d%02d%02d' %(dt.year,dt.month,dt.day)
    return int(s)

# Make important folders
TEMP_FOLDER = './temp_folder'
try:
    os.mkdir(TEMP_FOLDER)
except:
    pass
SAVE_IMAGE_FOLDER = f'{TEMP_FOLDER}/gold'
try:
    os.mkdir(SAVE_IMAGE_FOLDER)
except:
    pass


In [None]:
df_oi = None
years = np.linspace(2013,2019,7,dtype=int)
df_oi = None
for y in years:
    df_temp = pd.read_csv(f'{TEMP_FOLDER}/cme_open_interest_{y}.csv')
    df_temp = df_temp[~df_temp.Open_Interest.isnull()]
    if df_oi is None:
        df_oi = df_temp.copy()
    else:
        df_oi = df_oi.append(df_temp,ignore_index=True)
        df_oi.index = list(range(len(df_oi)))
print(len(df_oi))
df_oi.tail()

In [None]:
df_etf = pd.read_csv('./etf_cap_hist.csv')
df_etf['trade_date'] = df_etf.date.apply(str_to_date)
df_etf.tail()

In [None]:
c1 = ~df_oi.Product_Description.isnull()
c2 = df_oi.Product_Description.str.contains('gold',case=False)
df_oi.Product_Description[c1 & c2].unique()

In [None]:
df_oi_gc = df_oi[df_oi.Product_Description == 'GOLD FUTURES'][['trade_date','Open_Interest','Total_Volume']]
def to_int(s):
    try:
        return int(float(str(s)))
    except:
        print(s)
        return None
df_oi_gc.Open_Interest = df_oi_gc.Open_Interest.apply(to_int)
df_oi_gc.Total_Volume = df_oi_gc.Total_Volume.astype(float).astype(int)
df_etf_gc = df_etf[df_etf.symbol=='GLD']
df_both = df_etf_gc[['trade_date','nav','shares']].merge(df_oi_gc,how='inner',on='trade_date')
df_both['nav_diff'] = df_both.nav.pct_change()
df_both['share_diff'] = df_both.shares.pct_change()
df_both['oi_diff'] = df_both.Open_Interest.pct_change()



In [None]:
def plot_pandas(df_in,x_column,num_of_x_ticks=20):
    df_cl = df_in.copy()
    df_cl = df_cl.drop_duplicates()
    xs = list(df_cl[x_column])
    df_cl[x_column] = df_cl[x_column].apply(lambda i:str(i))

    x = list(range(len(df_cl)))
    n = len(x)
    s = num_of_x_ticks
    k = n//s*s
    x_indices = x[::-1][:k][::k//s][::-1]
    x_labels = [str(t) for t in list(df_cl.iloc[x_indices][x_column])]
    ax = df_cl.plot.bar(figsize=(16,10))
    ax.set_xticks(x_indices)#, rotation='vertical')
    ax.set_xticklabels(x_labels, rotation='vertical')
    ax.grid()
    return ax

In [None]:
def multi_plot(df,x_column,save_image_folder,dates_per_plot=100):
    plots = int(len(df)/dates_per_plot) + 1 if len(df) % dates_per_plot > 0 else 0
    f = plt.figure()
    image_names = []
    for p in range(plots):
        low_row = p * dates_per_plot
        high_row = low_row + dates_per_plot
        df_sub = df.iloc[low_row:high_row]
        ax = plot_pandas(df_sub,x_column)
        fig = ax.get_figure()
        image_name = f'{save_image_folder}/gold_{p+1}.png'
        fig.savefig(image_name)
        image_names.append(image_name)
    return image_names

In [None]:
df_both_2 = df_both[['trade_date','nav_diff','oi_diff']]
df_both_2.oi_diff = df_both_2.oi_diff.apply(lambda oi: .1 if abs(oi)>.1 else oi)
df_both_2['nav_roll'] = df_both_2.nav_diff.rolling(3).sum()
df_both_2['oi_roll'] = df_both_2.oi_diff.rolling(3).sum()
df_both_3  = df_both_2[['trade_date','nav_roll','oi_roll','nav_diff']]
df_both_3 = df_both_3[~df_both_3.nav_roll.isnull()]
# plot_pandas(df_both_2[['trade_date','nav_diff','oi_diff']],'trade_date')
multi_plot(df_both_3,'trade_date',SAVE_IMAGE_FOLDER,dates_per_plot=40)

In [None]:
pl = 1
pl_hist = []
trade_hist = []
last_n = 1000
df_b4 = df_both_3[-last_n:]
print(len(df_b4))

for i in df_b4.index:
    r = df_b4.loc[i]
    nav_roll = float(r.nav_roll)
    oi_roll = float(r.oi_roll)
    nav_diff = float(r.nav_diff)
    if abs(nav_roll) < .002 or abs(oi_roll) < .002:
        continue
    if nav_roll * oi_roll >=0:
        continue
    sign = 1
    if nav_roll < 0:
        sign = -1
    trade = nav_diff*sign
    pl = (1+trade) * pl
    trade_hist.append(trade)
    pl_hist.append(pl)

In [None]:
from scipy import stats
pl_std = np.array(trade_hist).std()
pl_geomean = stats.gmean(np.array(trade_hist)+1)-1
pl_avg = np.array(trade_hist).mean()
pl,pl_geomean,pl_std,pl_geomean/pl_std,(pl_geomean+1)**len(trade_hist),pl_avg/pl_std