In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
plt.rcParams["figure.figsize"] = 10, 6
plt.rcParams["figure.dpi"] = 150

In [3]:
import requests, zipfile, io
import os.path

if not os.path.exists("../../data/F_Disagg06_16.txt"):
    url = "https://www.cftc.gov/files/dea/history/fut_disagg_txt_hist_2006_2016.zip"

    r = requests.get(url)
    z = zipfile.ZipFile(io.BytesIO(r.content))
    z.extractall("../../data")

In [4]:
df = pd.read_csv("../../data/F_Disagg06_16.txt", low_memory=False)

# Animation Generator
This generates animated scatterplots by creating and saving several different scatterplots and then combining them into an animated GIF with a command line tool.

There are two cells below (after the one to clear out any previous animation files), the first cell creates graphs for a single subbgroup over time. The second cell produces graphs showing all subgroups simultaniously over time.

After running one of those cells, the following bash cell will produce a gif file out of the generated graphs.

In [None]:
# Clear out previous graphs
!rm -r ./graphs
!mkdir ./graphs
!rm ./animated_chart.gif

In [None]:
# Animate single group over time
dates = df['Report_Date_as_YYYY-MM-DD'].unique()
colors = [(230, 25, 75), (60, 180, 75), (255, 225, 25), (0, 130, 200), (245, 130, 48), (70, 240, 240), (240, 50, 230), (250, 190, 190), (0, 128, 128), (230, 190, 255), (170, 110, 40), (255, 250, 200), (128, 0, 0), (170, 255, 195), (0, 0, 128), (128, 128, 128), (255, 255, 255), (0, 0, 0)]
prod_merc_data = []
swap_data = []
for subgroup_code in df['CFTC_SubGroup_Code'].unique()[:5]:
    subgroup = df[df['CFTC_SubGroup_Code'] == subgroup_code]
    by_date = subgroup.groupby('date')

    prod_merc_data.append(by_date['Prod_Merc_Positions_Long_All'].sum() / by_date['Prod_Merc_Positions_Long_All'].sum().max())
    swap_data.append(by_date['Swap_Positions_Long_All'].sum() / by_date['Swap_Positions_Long_All'].sum().max())


# for i in range(40, 80, 5):
my_dpi=96

for i in range(10, len(prod_merc), 4):
    fig = plt.figure(figsize=(480/my_dpi, 480/my_dpi), dpi=my_dpi)
    plt.xlim(0, 1)
    plt.ylim(0, 1)
    rgba_colors = np.zeros((15,4))
    # the fourth column needs to be your alphas
    rgba_colors[:, 3] = np.linspace(0, 1, 15) ** 3
    for prod_merc, swap, color in zip(prod_merc_data, swap_data, colors):
        # for red
        rgba_colors[:,0] = color[0] / 255
         # for green
        rgba_colors[:,1] = color[1] / 255
         # for blue
        rgba_colors[:,2] = color[2] / 255

        plt.scatter(prod_merc[i-15:i], 
                    swap[i-15:i],
                    color=rgba_colors)

    plt.title(subgroup_code + ' Prod/Merc vs. Swap ' + str(dates[i]))
    plt.xlabel('Prod/Merc Positions')
    plt.ylabel('Swap Positions')

    filename='graphs/step'+str(i).zfill(3)+'.png'
    plt.savefig(filename, dpi=96)
    plt.close()

In [None]:
# Animate all subgroups over time
dates = df['Report_Date_as_YYYY-MM-DD'].unique()

subgroup_max_values = {}
prod_max_values = {}
swap_max_values = {}
for subgroup_code in df['CFTC_SubGroup_Code'].unique():
    subgroup = df[df['CFTC_SubGroup_Code'] == subgroup_code]
    by_date = subgroup.groupby('date')
    
    subgroup_max_values[subgroup_code] = by_date['Tot_Rept_Positions_Long_All'].sum().max()
    prod_max_values[subgroup_code] = by_date['Prod_Merc_Positions_Long_All'].sum().max()
    swap_max_values[subgroup_code] = by_date['Swap_Positions_Long_All'].sum().max()

my_dpi=70

# Avg over two weeks at a time
for i, dates in enumerate(zip(dates, dates[1:])):
    date1, date2 = dates
    fig = plt.figure(figsize=(300/my_dpi, 300/my_dpi), dpi=my_dpi)
    plt.xlim(-0.02, 1.4)
    plt.ylim(-0.02, 1.02)
    
    current_data = df[(df['Report_Date_as_YYYY-MM-DD'] == date1) | (df['Report_Date_as_YYYY-MM-DD'] == date2)]
    for subgroup_code in sorted(subgroup_max_values.keys()):
        subgroup = current_data[current_data['CFTC_SubGroup_Code'] == subgroup_code]
        if len(subgroup) != 0:
            prod_merc_data = subgroup['Prod_Merc_Positions_Long_All'].sum() * 0.5 / prod_max_values[subgroup_code]
            swap_data = subgroup['Swap_Positions_Long_All'].sum() * 0.5 / swap_max_values[subgroup_code]
        else:
            prod_merc_data = [0]
            swap_data = [0]
        plt.scatter(prod_merc_data, 
                    swap_data,
                    label=subgroup_code,
                    s=150,
                    alpha=0.7)

    plt.title('Prod/Merc vs. Swap ' + str(date1))
    plt.xlabel('Prod/Merc Positions')
    plt.ylabel('Swap Positions')
    plt.legend()

    filename='graphs/step'+str(i).zfill(3)+'.png'
    plt.savefig(filename, dpi=my_dpi)
    plt.close()

In [None]:
!convert -delay 5 ./graphs/*.png animated_chart.gif