In [2]:
##### import dependancies

import os
import numpy as np
import pandas as pd
import pickle
import quandl
from datetime import datetime

# import plotting information 

import plotly.offline as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
py.init_notebook_mode(connected=True)

# to get started I am using this post https://blog.patricktriest.com/analyzing-cryptocurrencies-python/ 

## Quandl Helper Function 
We're using pickle to serialize and save the download so we don re download the same data each time we run the script. This function will return the data as a Panda datafram 

[QUANDL](https://blog.quandl.com/api-for-bitcoin-data) is a free API that allows free information of 30+ diferent crypto currencies. Quandl also allows access to forex information 



In [3]:
# we will be using Quandl's free data API (Application Programming Interface)
# https://blog.quandl.com/api-for-bitcoin-data

def get_quandl_data(quandl_id): # Essentially try the cache, if read IO error load from quandl API
    '''Download and cache Quandl dataseries'''
    cache_path = '{}.pkl'.format(quandl_id).replace('/','-')
    
    try:
        f = open(cache_path, 'rb') # Opening binary I/O stream 
        df = pickle.load(f)   # depickles byte information
        print('Loaded {} from cache'.format(quandl_id))
        
    except (OSError, IOError) as e:
        print('Downloading {} from Quandl'.format(quandl_id))
        df = quandl.get(quandl_id, returns="pandas") #gets info from quandl 
        df.to_pickle(cache_path) # 
        print('Cached {} at {}'.format(quandl_id, cache_path))
        
    return df


# I am adding a delete path helper function so i can reload data or get rid of data I dont want
def delete_cache(path_to_delete): # Pass in exchange name in quandl
    
    # MORE EFFICIENT ALTERNATIVE
    # As a more efficient alternative, you could append the current date to each
    # cache filename, and then re-configure the script to download only the most 
    # recent data and to concatenate these updates onto the already downloaded data.
    
    quandl_id = 'BCHARTS/{}USD'.format(path_to_delete) # creates full path name in cache 
    cache_path = '{}.pkl'.format(quandl_id).replace('/','-')
    
    try:
        f = open(cache_path, 'rb') #rb = read bytes 
        os.remove(cache_path)
        print('Deleted {} from cache path'.format(quandl_id))
        
    except (OSError, IOError) as e:
        print('The {} path was not deleted'.format(quandl_id))
       

In [9]:
exchange_names = ['KRAKEN','COINBASE','ITBIT','BITSTAMP']

print("Do you want to delete/ reload any exchange data from cache? (y/n)")
answer = input()
answer = answer.lower()

if answer == 'y':
    print("Which cache would you like to delete?\n 1: Kraken,\n 2: Coinbase,\n 3: Itbit,\n 4: Bitstamp?")
    answer_2 = int(input())
    
    try:
        if 1 == answer_2: delete_cache('KRAKEN')
        elif 2 == answer_2: delete_cache('COINBASE')
        elif 3 == answer_2: delete_cache('ITBIT')
        elif 1 == answer_2: delete_cache('BITSTAMP')
            
    except ValueError as e:
        print("Your chioce is not available")

Do you want to delete/ reload any exchange data from cache? (y/n)
y
Which cache would you like to delete?
 1: Kraken,
 2: Coinbase,
 3: Itbit,
 4: Bitstamp?
4


In [10]:
# make a btc price aggregate from 3 different exchanges

exchange_data = {}

for exchange in exchange_names:
    ex = 'BCHARTS/{}USD'.format(exchange)
    temp_dataframe = get_quandl_data(ex)
    exchange_data[exchange] = temp_dataframe

Downloading BCHARTS/KRAKENUSD from Quandl
Cached BCHARTS/KRAKENUSD at BCHARTS-KRAKENUSD.pkl
Downloading BCHARTS/COINBASEUSD from Quandl
Cached BCHARTS/COINBASEUSD at BCHARTS-COINBASEUSD.pkl
Downloading BCHARTS/ITBITUSD from Quandl
Cached BCHARTS/ITBITUSD at BCHARTS-ITBITUSD.pkl
Loaded BCHARTS/BITSTAMPUSD from cache


In [11]:
temp = list(exchange_data.values()) #must tern temp into iteritable object for plotly

In [12]:
# Make basic plot of BTC USD
# print(temp[0]['Weighted Price'])

plot_num = 3
btc_trace = go.Scatter(x=temp[plot_num].index, 
                       y=temp[plot_num]['Weighted Price'])

                      
py.iplot([btc_trace])

# Name: Weighted Price, Length: 3383, dtype: float64 (3) GOOD
# Name: Weighted Price, Length: 1216, dtype: float64 (2) TRASH
# Name: Weighted Price, Length: 330, dtype: float64  (1) TRASH
# Name: Weighted Price, Length: 2535, dtype: float64 (0) kraken


## Bitcoin Price Agregate 

Bitcion price is not ever totally accurate on each exchange. So i will oull data from 3 different exchanges and then turn it into an aggregate price for use in analysis.


In [13]:
# combine all data into single dataframe with date column
# I can re use this for the weighted volume as well

def combine_df_on_col(df, label, column):
    
    # add somethign to resample everything the same 
    
    
    combined_df = {} # this will be our return value wrapped in the panda Data Frame 
    
    for i in range(len(df)):
        combined_df[label[i]] = df[i][column]
     
    return pd.DataFrame(combined_df)

# pd.DataFrame(series_dict) comand to return the combines data set on the column
    

# NaN Values
## How I will deal with NaN values from data samples at different rates 


In [14]:
btc_df_usd = combine_df_on_col(list(exchange_data.values()),list(exchange_data.keys()),'Weighted Price')

In [15]:
print(btc_df_usd.isnull().sum()) # shows how many NaN values are in each columns

# print(len(exchange_data['BITSTAMP'])-len(exchange_data['ITBIT'])) Each NaN is from not sampling correctly

KRAKEN       847
COINBASE    2200
ITBIT       1436
BITSTAMP      33
dtype: int64


In [18]:
# What to do with all these NaN values?

# I think it would be better to replace them with nearest neighbore values? 
# average the two neighbores that have values
# gonna replace this with fillna() function that averages its neighbores 

# using ffill which propogates last valid operation forward... might not work 

# Jk i dont like the ffill method, gonna use interpolation 
# df.interpolate(method='linear', limit_direction='forward', axis=0) nope, maybe download scipy
# below seems to be the best for calculating the avergag 


btc_df_usd.replace(0, np.nan, inplace=True )


In [16]:
def df_scatter(df, title, seperate_y_axis=False, y_axis_label='', scale='linear', initial_hide=False):
    '''Generate a scatter plot of the entire dataframe'''
    label_arr = list(df)
    series_arr = list(map(lambda col: df[col], label_arr))
    
    layout = go.Layout(
        title=title,
        legend=dict(orientation="h"),
        xaxis=dict(type='date'),
        yaxis=dict(
            title=y_axis_label,
            showticklabels= not seperate_y_axis,
            type=scale
        )
    )
    
    y_axis_config = dict(
        overlaying='y',
        showticklabels=False,
        type=scale )
    
    visibility = True
    
    # Form Trace For Each Series
    trace_arr = []
    for index, series in enumerate(series_arr):
        trace = go.Scatter(
            x=series.index, 
            y=series, 
            name=label_arr[index],
            visible=visibility
        )
        
        trace_arr.append(trace)

    fig = go.Figure(data=trace_arr, layout=layout)
    py.iplot(fig)

In [17]:
df_scatter(btc_df_usd, 'Bitcoin Price (USD) By Exchange')

# Make Aggregate Price data 

In [18]:
btc_df_usd['avg_btc_price_usd'] = btc_df_usd.mean(axis=1) 

In [19]:
btc_trace = go.Scatter(x=btc_df_usd.index, y=btc_df_usd['avg_btc_price_usd'])
py.iplot([btc_trace])

# SMA (Simple Moving Average)
# Technical Analysis Indicator Looking Back over a Period of Time 
- Use to help identify support and reistance levels for traders
- Usually used in 20, 50, 100, 200 day periods 
- Helps smooth out price data do understand trends 
- 50 and 200 day signals are most commonly used 
- Its impossible to predict the future, but technical anaylysis indicators are useful 

### SMA Equation 
\begin{equation*}
SMA = \frac{A_1+A_2+...+A_n}{n}
\end{equation*}
### WHERE 

A = Average in perions n,

n = number of time periods


In [20]:
# SMA using pandas rolling window code

n = {20,50,100,200}

for i,v in enumerate(n):
    name = 'sma_{}'.format(str(v))
    btc_df_usd[name] = btc_df_usd.rolling(window=v)['avg_btc_price_usd'].mean()



In [21]:
df_scatter(btc_df_usd, 'Bitcoin Price (USD) By Exchange')

# TO DELETE COLUMNS del btc_df_usd[]


# Bollinger Bands
## An indicator of price volitility
- Not a standalone trading system
- Based off of SMA above 
- Breakouts provide no insight for a BUY/SELL indicator
- Purely a sign of volitility
- trend lines calculated 2 standard deviations away from 20 day SMA
- We use smaler SMA to better understand volitility
- moves towards upper bands indicate overbought
- moves towards the lower bands indicate oversold 


## Bollinger Bands Equation 

\begin{equation*}
Bollinger_{upper} = SMA_{p} + x * \sigma_{SMA}  \\
Bollinger_{lower} = SMA_{p} - x * \sigma_{SMA} 
\end{equation*}

## Where

p = time period (usually 20, but could varry based off of time series variability),

x = multiplying factor (usually 2)





In [22]:
btc_df_usd['20_sma_std'] = btc_df_usd.rolling(window=20)['avg_btc_price_usd'].std()
btc_df_usd['2_std_20_sma_u'] = btc_df_usd['sma_20'] + (2 * btc_df_usd['20_sma_std']) # oreder of operations matter
btc_df_usd['2_std_20_sma_d'] = btc_df_usd['sma_20'] - (2 * btc_df_usd['20_sma_std'])


In [23]:
df_scatter(btc_df_usd, '2_std_20_sma_u')

# Relative Strength Index (RSI)

- gauge for primary direction of the trend (Momentum Indicator)
- looking to track bull and bear markets 
- bull markets or overbought markets may have a signal of 0.66 or higher
- bear markets or oversold may have a signal of 0.33 or lower
## RSI Equation
\begin{equation}
RSI_{step1}=100-\frac{100}{1+\frac{Average Gain}{Average Loss}}
\end{equation}

\begin{equation}
Standard Period k = 14
Average Gain = % gain in k periods
Average Loss = % loss in k periods
\end{equation}

\begin{equation}
RSI_{step2}=100-\frac{100}{1+\frac{(Previous Average Gain 13X) + Current Gain}{(Previous Average Loss 13X) + Current Loss}}
\end{equation}

Note** In the situation that there is an average loss of 0, the RSI is set to 100 by default to deal with the DBZ error. 


# Pseudo Code 
- Loop through each stock’s historical price data.
- Compute the price movement every day (up/down).
- Gather the average gain and loss over the last 14 days.
- Calculate the Relative Strength (RS) and Relative Strength Index (RSI).

# Sliding WIndow Alogrithm Algorithm
- improves time complexity for problems like this
- Based on the idea that you slide a window along the data set as you loop through it, this minimized the use of for loops. Time complexity should be O(n)

In [25]:
#Create new data frame with avg btc price
#declare variables
#review moving window algorithm "dynamic programming"
#find average gain 
#find average loss 
#plot 2 RSI and AVG Price on same window 

#IF average loss goes to 0 we default a 100 RSI value

# Variables in python do NOT need to be declares before hand and can change types 

avg_btc_price_t1 = list(btc_df_usd['avg_btc_price_usd']) # convert dataframe to a list
avg_btc_price_t2 = list(btc_df_usd['avg_btc_price_usd']) 
avg_btc_price_t2.insert(0,0) # NOTE: i set this equal to avg_btc_price_t2 before and the type came up as NoneType
btc_price_change = [(avg_btc_price_t2[k]-avg_btc_price_t1[k]) for k in range(len(avg_btc_price_t1))]


print([btc_price_change[i] for i in range(10)])
# 0 1 2 3 4 5 5 t1
# 1 2 3 4 5 5 0 t2   change = t2 - t1 DELETE LAST entry or just dont do it


rsi_k = 14
rs = []
rsi = []
up_price = []
down_price = []
avg_gain = []
avg_loss = []
change = []
i = 1
    
rsi = [abs(100 - (100/(1+rs[j]))) for j in range(len(rs))]  

df_dict = {
        'AvgGain' : avg_gain,
        'AvgLoss' : avg_loss,
        'RS' : rs,
        'RSI' : rsi
}

df = pd.DataFrame(df_dict, columns = ['Avg btc price', 'upPrices', 'downPrices', 'AvgGain','AvgLoss', 'RS', "RSI"])
df.to_csv("/Users/jonnymorsicato/Desktop/Data Science/crypto_analysis"+"_RSI.csv", index = False)


[-5.92923064836, 0.33843313401999975, 0.49652560020000003, 0.23975686706000054, -0.015484952920000516, 0.03042440589999984, -0.06042440590000009, -0.4880536932099995, -0.31802118577000016, 0.009114423129999771]


In [None]:
# current = avg_btc_price[1]
# previous = avg_btc_price[0]
    
# while i <= len(avg_btc_price) - rsi_k:
    
#     current = avg_btc_price[i]
#     previous = avg_btc_price[i-1]
#     up_price = []
#     down_price = []
    
#     for j in range(0,rsi_k): # calc change for k days and then find the avergage = sum(k_days_gain/k_days_lost)
    
#         change.append(current-previous)
       
# #         if  change >= 0:
# #             up_price.append(change)
# #             down_price.append(0) # placeholder 
# #         elif change < 0:
# #             down_price.append(change)
# #             up_price.append(0)
        
#         previous = current
#         current = avg_btc_price[i+j]
    
# #     avg_gain.append(sum(up_price)/rsi_k) # can replace this with list comprehension
# #     avg_loss.append(sum(down_price)/rsi_k) 
    
# #     if avg_loss[i-1] == 0:
# #         rs.append(avg_gain[i-1]/min(avg_loss)) # Assumption: use is to use the minimum loss 
# #     else: 
# #         rs.append(avg_gain[i-1]/avg_loss[i-1])
        
#     i+=1 # move window