In [1]:
import numpy as np
import pandas as pd
from plotly import __version__ as plotly_version
from plotly.offline import init_notebook_mode, iplot
from statsmodels.tsa.arima_process import arma_generate_sample
from scipy.signal import savgol_filter

print("Plotly version: " + plotly_version)

init_notebook_mode(connected=True)

Plotly version: 3.4.2


In [2]:
num_points = 1500

np.random.seed(13)
xs = arma_generate_sample([1, -1, 0, 0, 0], [1, -1, -1, -0.8, -0.3], nsample=num_points, sigma=0.5, burnin=100)

np.random.seed(10)
os = arma_generate_sample([1, -1, 0, 0, 0], [1, -1, -1, -0.8, -0.3], nsample=num_points, sigma=0.5, burnin=100) * 0.5

df = pd.date_range(start=pd.to_datetime('2014-01-01'), periods=1500, name='Date').to_frame(index=False)
df['A'] = xs + 20
df['B'] = xs + os + 20

In [3]:
df.head()

Unnamed: 0,Date,A,B
0,2014-01-01,12.333677,7.401696
1,2014-01-02,13.135447,8.013742
2,2014-01-03,13.885421,9.309362
3,2014-01-04,14.853525,11.225588
4,2014-01-05,15.735236,12.557178


In [4]:
# Defining everything:

min_x = 0
max_x = 60

min_y = 0
max_y = 80

A_mean = df['A'].mean()
B_mean = df['B'].mean()
black = "rgb(0,0,0)"

gray = "rgb(162, 162, 162)"
guide_line_style = {'color': gray, 'dash': 'dash'}

A_std = df['A'].std()
B_std = df['B'].std()
lower_range_x = A_mean - 2*A_std
upper_range_x = A_mean + 2*A_std
lower_range_y = B_mean - 2*B_std
upper_range_y = B_mean + 2*B_std

In [5]:
scatter = {
    'x': df['A'],
    'y': df['B'],
    'mode': 'markers',
    'marker': {'size': 6}
}


In [6]:
black_point = {
    'x' : [A_mean],
    'y' : [B_mean],
    'mode' : 'markers',
    'marker' : {'size': 16, 'color' : black}
} 


In [7]:
layout = {
    'title': 'Scatter plot of A vs B',
    'xaxis': {
        'title': 'Series A',
        'range': [0, 60]
        },
    'yaxis': {
        'title': 'Series B',
        'range': [0, 80],
        'tickvals': [0, 20, 40, 60, 80]
        },
    'showlegend': False
}

In [8]:
# the vertical set: two lines, one at x = mean of series A minus 2 times its standard deviation the other one at x = mean of series a plus 2 times its standard deviation.
# the horizontal set: two lines, one at y = mean of series B minus 2 times its standard deviation the other one at y = mean of series a plus 2 times its standard deviation.

lower_vertical = {
    'x': [lower_range_x, lower_range_x],
    'y': [min_y, max_y],
    'mode': 'lines',
    'line': guide_line_style
}

upper_vertical = {
    'x' : [upper_range_x, upper_range_x],
    'y' : [min_y, max_y],
    'mode' : 'lines',
    'line' : guide_line_style
}

lower_horizontal = {
    'x' : [min_x, max_x],
    'y': [lower_range_y, lower_range_y],
    'mode' : 'lines',
    'line' : guide_line_style    
}

upper_horizontal = {
    'x' : [min_x, max_x],
    'y': [upper_range_y, upper_range_y],
    'mode' : 'lines',
    'line' : guide_line_style    
}

In [9]:
figure = {
    'data':
        [scatter, black_point, lower_vertical, upper_vertical, lower_horizontal, upper_horizontal],
    'layout': layout
}

iplot(figure)

In [10]:
# bonus challenge:

import datetime
lagged_df = df.copy()
lagged_df['Date'] = lagged_df['Date'] + datetime.timedelta(days=1)

merged_df = pd.merge(df, lagged_df, on='Date', suffixes=('', '_lagged'))

merged_df.head()

Unnamed: 0,Date,A,B,A_lagged,B_lagged
0,2014-01-02,13.135447,8.013742,12.333677,7.401696
1,2014-01-03,13.885421,9.309362,13.135447,8.013742
2,2014-01-04,14.853525,11.225588,13.885421,9.309362
3,2014-01-05,15.735236,12.557178,14.853525,11.225588
4,2014-01-06,16.571813,13.540015,15.735236,12.557178


In [11]:
#defining everything again

second_A_mean = merged_df['A'].mean()
second_A_std = merged_df['A'].std()
A_lagged_mean = merged_df['A_lagged'].mean()
A_lagged_std = merged_df['A_lagged'].std()
second_lower_range_x = second_A_mean - 2*second_A_std
second_upper_range_x = second_A_mean + 2*second_A_std
second_lower_range_y = A_lagged_mean - 2*A_lagged_std
second_upper_range_y = A_lagged_mean + 2*A_lagged_std

In [12]:
layout = {
    'title': 'Scatter plot of A vs lagged A'
    'xaxis': {
        'title': 'Series A',
        'range': [min_x, max_x]
        },
    'yaxis': {
        'title': 'Series A lagged',
        'range': [min_y, max_y]
        },
    'showlegend': False
}

SyntaxError: invalid syntax (<ipython-input-12-4d3cea6f4165>, line 3)

In [None]:
second_scatter = {
    'x': merged_df['A'],
    'y': merged_df['A_lagged'],
    'mode': 'markers',
    'marker': {'size': 6}
}

second_black_point = {
    'x' : [A_mean],
    'y' : [A_lagged_mean],
    'mode' : 'markers',
    'marker' : {'size': 12, 'color' : pt_color_black}
}

second_lower_vertical = {
    'x': [lower_range_x, lower_range_x],
    'y': [min_y, max_y],
    'mode': 'lines',
    'line': guide_line_style
}

second_upper_vertical = {
    'x' : [upper_range_x, upper_range_x],
    'y' : [min_y, max_y],
    'mode' : 'lines',
    'line' : guide_line_style
}

second_lower_horizontal = {
    'x' : [min_x, max_x],
    'y': [lower_range_y, lower_range_y],
    'mode' : 'lines',
    'line' : guide_line_style    
}

second_upper_horizontal = {
    'x' : [min_x, max_x],
    'y': [upper_range_y, upper_range_y],
    'mode' : 'lines',
    'line' : guide_line_style    
}

In [None]:
figure = {
    'data':
        [second_scatter, second_black_point, lower_vertical, upper_vertical, lower_horizontal, upper_horizontal],
    'layout': layout
}

iplot(figure)