###### Data Viz testing with altair and Alphavantage api in notebook

In [1]:
# imports
import sys,os
import pandas as pd
import requests
import altair as alt
from altair.expr import datum
#from jupyter_datatables import init_datatables_mode


In [2]:
# Get versions and locations
print(f'Your Altair Version: {alt.__version__}')
print(f'Your Pandas Version: {pd.__version__}')
print(f'Using Virtualenv: {os.getenv("VIRTUAL_ENV")}')
print(f'With Python Executable: {sys.executable}')
print(f'And Python Version: {sys.version}')

Your Altair Version: 4.1.0
Your Pandas Version: 1.2.3
Using Virtualenv: C:\Users\jon.galloy\PycharmProjects\VENV\DATA_VIZ
With Python Executable: c:\Users\jon.galloy\PycharmProjects\VENV\DATA_VIZ\Scripts\python.exe
And Python Version: 3.7.9 (tags/v3.7.9:13c94747c7, Aug 17 2020, 18:58:18) [MSC v.1900 64 bit (AMD64)]


In [3]:
# enable altair renderer so as to open charts here in notebook rather then separate browser, also enable DataTables so that it
# opens the df's rather then default pandas view of df's
alt.renderers.enable('default')
#init_datatables_mode()

RendererRegistry.enable('default')

In [4]:
# set up alphavantage api stuff
# symbol = input()
api_key = 'LAJBI8EN52GC18OY'
base_url = 'https://www.alphavantage.co/query?'
params = {'function': 'TIME_SERIES_DAILY_ADJUSTED',
          'symbol': 'BB',
          'apikey': api_key}

In [5]:
# get response from api and format it a bit and put it into a dict
# this helps with the mulit level nested json by 'stripping' metadata header info
response = requests.get(base_url, params=params)
response_dict = response.json()
_, header = response.json()

In [6]:
# create a pandas dataframe from response_dict
df = pd.DataFrame.from_dict(response_dict[header], orient='index')
# strip the cols names down to bare bones header
df_cols = [i.split(' ')[1] for i in df.columns]
df.columns = df_cols
# create a date col and set it to the index values which are our dates which comes from the api
df['date'] = df.index
df

Unnamed: 0,open,high,low,close,adjusted,volume,dividend,split,date
2021-03-25,9.31,9.97,9.2536,9.88,9.88,14363928,0.0000,1.0,2021-03-25
2021-03-24,10.21,10.22,9.545,9.58,9.58,12636058,0.0000,1.0,2021-03-24
2021-03-23,10.69,10.69,10.02,10.07,10.07,12245514,0.0000,1.0,2021-03-23
2021-03-22,11.0,11.34,10.66,10.71,10.71,7903615,0.0000,1.0,2021-03-22
2021-03-19,10.83,11.17,10.69,10.95,10.95,10519101,0.0000,1.0,2021-03-19
...,...,...,...,...,...,...,...,...,...
2020-11-05,4.78,4.92,4.74,4.86,4.86,2597311,0.0000,1.0,2020-11-05
2020-11-04,4.7,4.76,4.575,4.68,4.68,3113912,0.0000,1.0,2020-11-04
2020-11-03,4.53,4.69,4.52,4.67,4.67,2781730,0.0000,1.0,2020-11-03
2020-11-02,4.5,4.55,4.45,4.48,4.48,2864480,0.0000,1.0,2020-11-02


In [7]:
#this saves a new df with a new index 01234 etc not the date string in the index col
df1 = df.reset_index(drop=True)
df1

Unnamed: 0,open,high,low,close,adjusted,volume,dividend,split,date
0,9.31,9.97,9.2536,9.88,9.88,14363928,0.0000,1.0,2021-03-25
1,10.21,10.22,9.545,9.58,9.58,12636058,0.0000,1.0,2021-03-24
2,10.69,10.69,10.02,10.07,10.07,12245514,0.0000,1.0,2021-03-23
3,11.0,11.34,10.66,10.71,10.71,7903615,0.0000,1.0,2021-03-22
4,10.83,11.17,10.69,10.95,10.95,10519101,0.0000,1.0,2021-03-19
...,...,...,...,...,...,...,...,...,...
95,4.78,4.92,4.74,4.86,4.86,2597311,0.0000,1.0,2020-11-05
96,4.7,4.76,4.575,4.68,4.68,3113912,0.0000,1.0,2020-11-04
97,4.53,4.69,4.52,4.67,4.67,2781730,0.0000,1.0,2020-11-03
98,4.5,4.55,4.45,4.48,4.48,2864480,0.0000,1.0,2020-11-02


In [8]:
#this works now with all cols as 'objects'. no need to convert 'date' col to datetime in pandas
chart = alt.Chart(df1).mark_circle(size=50).encode(
    x='date:T',
    y='open:Q',    
    tooltip=['open','close','high','low']
).properties(width=1200).interactive()
chart

In [9]:
chart = alt.Chart(df1).mark_line(point=True,color='orange',strokeDash=[5,2],).encode(
    x='date:T',
    y='open:Q',    
    tooltip=['open']
).properties(width=1200).interactive()
chart

In [10]:
open = alt.Chart(df1).mark_line().encode(
    x='date:T',
    y='open:Q',    
    tooltip=['open']
).interactive()

close = alt.Chart(df1).mark_point().encode(
    x='date:T',
    y='close:Q',    
    tooltip=['close']    
).properties(width=1200).interactive()

open.mark_line(point=True,strokeDash=[6,2],color='orange') + close.mark_line(point=True,strokeDash=[4,1],color='blue') 

In [11]:
line = alt.Chart(df1).mark_line(        
    color='orange',
    strokeWidth=2,
    strokeDash=[10,5],
    strokeCap='square'
).transform_window(
    rolling_mean='mean(open)',
    frame=[-15, 15]
).encode(    
    x='date:T',
    y='rolling_mean:Q'
)

points = alt.Chart(df1).mark_point(shape='diamond',size= 60, fontStyle='italic', color='purple').encode(    
    tooltip=['open','close'],
    x='date:T',
    y=alt.Y('open:Q',
            axis=alt.Axis(title='Open Price'))
).properties(width=1200).interactive()

points + line

In [12]:
# chart different columns from the same df
base = alt.Chart(df.reset_index()).encode(x='date:T')

alt.layer(
    base.mark_line(color='blue').encode(y='open:Q', tooltip=['open']),
    base.mark_line(color='orange').encode(y='close:Q', tooltip=['close'])
).properties(width=1200).interactive()

In [13]:
# notice the new index field and 'date' field
meltData = df.drop(columns=['adjusted','split','dividend','volume','date'])
# waht does pandas 'melt' do?....these cells below show how to set up a melt table so as to get to chart in cell #82
meltData.index.name=('date')

In [14]:
meltData

Unnamed: 0_level_0,open,high,low,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-03-25,9.31,9.97,9.2536,9.88
2021-03-24,10.21,10.22,9.545,9.58
2021-03-23,10.69,10.69,10.02,10.07
2021-03-22,11.0,11.34,10.66,10.71
2021-03-19,10.83,11.17,10.69,10.95
...,...,...,...,...
2020-11-05,4.78,4.92,4.74,4.86
2020-11-04,4.7,4.76,4.575,4.68
2020-11-03,4.53,4.69,4.52,4.67
2020-11-02,4.5,4.55,4.45,4.48


In [15]:
myData = meltData.reset_index().melt('date')
myData

Unnamed: 0,date,variable,value
0,2021-03-25,open,9.31
1,2021-03-24,open,10.21
2,2021-03-23,open,10.69
3,2021-03-22,open,11.0
4,2021-03-19,open,10.83
...,...,...,...
395,2020-11-05,close,4.86
396,2020-11-04,close,4.68
397,2020-11-03,close,4.67
398,2020-11-02,close,4.48


In [16]:
alt.Chart(myData).mark_line().encode(
    x='date:T',
    y='value:Q',
    color='variable:N'
).properties(width=1200).interactive()

In [17]:
# grab the 2 cols you want to get the difference for...
df_difs = df1[['open','close']]

In [18]:
# grab the other 2 cols you want to get the difference from...
df_hilo_Diff = df1[['high', 'low']]

In [19]:
df_difs

Unnamed: 0,open,close
0,9.31,9.88
1,10.21,9.58
2,10.69,10.07
3,11.0,10.71
4,10.83,10.95
...,...,...
95,4.78,4.86
96,4.7,4.68
97,4.53,4.67
98,4.5,4.48


In [20]:
df_hilo_Diff

Unnamed: 0,high,low
0,9.97,9.2536
1,10.22,9.545
2,10.69,10.02
3,11.34,10.66
4,11.17,10.69
...,...,...
95,4.92,4.74
96,4.76,4.575
97,4.69,4.52
98,4.55,4.45


In [21]:
# this is computing the difference between 'open' and 'close' and 'high' and 'low' putting result in the 'close' col or the 'low' after converting from object to numeric(float64). # otherwise math won't work
df_difs = df_difs[['open', 'close']].apply(pd.to_numeric).diff(axis=1)
df_hilo_Diff = df_hilo_Diff[['high', 'low']].apply(pd.to_numeric).diff(axis=1)

In [22]:
df_diff = df_difs['close']
df_hilo_Diffs = df_hilo_Diff['low']
df_date = df1['date']

In [23]:
df_date

0     2021-03-25
1     2021-03-24
2     2021-03-23
3     2021-03-22
4     2021-03-19
         ...    
95    2020-11-05
96    2020-11-04
97    2020-11-03
98    2020-11-02
99    2020-10-30
Name: date, Length: 100, dtype: object

In [24]:
df_diff

0     0.57
1    -0.63
2    -0.62
3    -0.29
4     0.12
      ... 
95    0.08
96   -0.02
97    0.14
98   -0.02
99   -0.06
Name: close, Length: 100, dtype: float64

In [25]:
df_hilo_Diffs

0    -0.7164
1    -0.6750
2    -0.6700
3    -0.6800
4    -0.4800
       ...  
95   -0.1800
96   -0.1850
97   -0.1700
98   -0.1000
99   -0.1678
Name: low, Length: 100, dtype: float64

In [26]:
# put the above created pandas series(df_date, df_diff) into a pandas datatframe called posneg
df_posneg = pd.DataFrame(dict(date = df_date, diff = df_diff))
df_hilo = pd.DataFrame(dict(date = df_date, diff = df_hilo_Diffs))

In [27]:
df_posneg

Unnamed: 0,date,diff
0,2021-03-25,0.57
1,2021-03-24,-0.63
2,2021-03-23,-0.62
3,2021-03-22,-0.29
4,2021-03-19,0.12
...,...,...
95,2020-11-05,0.08
96,2020-11-04,-0.02
97,2020-11-03,0.14
98,2020-11-02,-0.02


In [28]:
df_hilo

Unnamed: 0,date,diff
0,2021-03-25,-0.7164
1,2021-03-24,-0.6750
2,2021-03-23,-0.6700
3,2021-03-22,-0.6800
4,2021-03-19,-0.4800
...,...,...
95,2020-11-05,-0.1800
96,2020-11-04,-0.1850
97,2020-11-03,-0.1700
98,2020-11-02,-0.1000


In [29]:


chart2 = alt.Chart(df_posneg).mark_bar().encode(
    x="date:T",
    y=alt.Y('diff:Q', axis=alt.Axis(format='$', title='Change in $')),
    tooltip=['diff:Q','date:T'],
    color=alt.condition(
        alt.datum.diff > 0,
        alt.value("black"),  # The positive color
        alt.value("red")  # The negative color
    )).properties(width=1200,height=600,title='Net Difference Daily Open/Close').interactive()
chart2.configure_title(
    fontSize=20,
    font='Palatino Linotype',
    fontStyle='italic',
    anchor='start',
    color='steelblue'
)


In [30]:
chart3 = alt.Chart(df_hilo).mark_bar().encode(
    x="date:T",
    y="diff:Q",
    tooltip=['diff','date:T'],
    color=alt.condition(
        alt.datum.diff > 0,
        alt.value("steelblue"),  # The positive color
        alt.value("darkgreen")  # The negative color
    )
).properties(width=1200, title='Net diff of Daily Hi/Lo').interactive()
chart3.configure_title(
    fontSize=20,
    font='Palatino Linotype',
    fontStyle='italic',
    anchor='start',
    color='steelblue'
)

In [31]:
# Making the area graph   
chart4 = alt.Chart(df_hilo).mark_area(color = 'green', 
                           opacity = 0.5, 
                           line = {'color':'darkgreen'}).encode( 
      
  # Map the date to x-axis 
    x = 'date:T', 
      
  # Map the price to y-axis 
    y = 'diff:Q'
).properties(width=1200, title='yo').interactive()
chart4

In [32]:
# Making the area graph   
chart5 = alt.Chart(myData).mark_area(color = 'green', 
                           opacity = 0.5, 
                           line = {'color':'darkgreen'}).encode( 
      
  x='date:T',
    y='value:Q',
    color='variable:N'
).properties(width=1200, title='yo').interactive()
chart5

In [None]:
selection = alt.selection_multi(fields=['variable'], bind='legend')

alt.Chart(myData).mark_area().encode(
    alt.X('date:T', axis=alt.Axis(domain=False, tickSize=0)),
    alt.Y('value:Q', stack='center', axis=None),
    alt.Color('variable:N', scale=alt.Scale(scheme='set1')),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_selection(
    selection
).properties(width=1200)

In [40]:
alt.Chart(myData).mark_line(point=True).encode(
x='date:T',
y='value:Q',
color='variable:N',
tooltip='value:Q'  
).properties(width=1200).interactive()