In [None]:
# AUTHOR: Aditya Yele (ayele@cs.stonybrook.edu)
# Task: Effect of financial sector on Crypto-Currency and vice versa

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from utils import *
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import Imputer
from sklearn import preprocessing
import math
plt.style.use('fivethirtyeight')

In [None]:
# Helper functions and Initialization
%matplotlib inline

def plotAllData(label, x = None, y = None):
    plt.title(label)
    plt.plot(x, y)
    plt.show()
    
def plotBitCoin(bitCoin, date, fieldName):
    plotAllData(fieldName, date, bitCoin[fieldName])
    
def plotAllFields(data):
    columns = data.columns
    date = pd.to_datetime(data[columns[0]])
    for i in range(1, len(columns)):
        print ('Graph: ', columns[i])
        plt.title(columns[i])
        plt.plot(date, data[columns[i]])
        plt.show()

In [None]:
# Normalize data using Min Max scaling
def dataNormalization(x):
    #x = nData.values
    min_max_scaler = preprocessing.MinMaxScaler()
    x_scaled = min_max_scaler.fit_transform(x)
    #scaledData = pd.DataFrame(x_scaled, columns = list(nData.columns))
    return x_scaled

def normalize(x):
    mx_list = np.amax(x, axis = 0, keepdims = True)
    #print mx_list.shape
    mxAll = np.amax(mx_list)    
    nrm_const_list = np.reciprocal(mx_list)*mxAll
    #print nrm_const_list.shape
    x = x*nrm_const_list
    return x

def preproc_data(df, bt_ftrs):
    nw_df = df[['Date','Close']].copy()
    nw_df['Date'] = pd.to_datetime(nw_df['Date'])
    #print(nw_nvda_df.head())
    #print('Columns:', nw_nvda_df.columns)
    rw_data = nw_df.merge(bt_ftrs, left_on='Date', right_on='Date', how='inner')
    #nvda_rw_data.replace([np.inf, -np.inf], np.nan)
    #nvda_rw_data.dropna(axis=1, how='any')
    #print(nvda_rw_data.head())
    #nvda_rw_data=Imputer().fit_transform(nvda_rw_data)
    #print nvda_rw_data.max()
    x = rw_data[['btc_market_price','btc_total_bitcoins','btc_trade_volume','btc_n_transactions']].copy()
    y = rw_data[['Close']].copy()    
    #print(nvda_pred_data.head())
    #print(nvda_y.shape)
    x=Imputer().fit_transform(x,y)
    y = Imputer().fit_transform(y)
    #print np.amax(x, axis = 1)
    ###############################################################
#     mx_list = np.amax(x, axis = 0, keepdims = True)
#     print mx_list.shape
#     mxAll = np.amax(mx_list)    
#     nrm_const_list = np.reciprocal(mx_list)*mxAll
#     print nrm_const_list.shape
#     x = x*nrm_const_list
    ############################################################
    x = dataNormalization(x)
    y = dataNormalization(y)
    return x,y

def linreg(X,y,nm):
    x = normalize(X)
    y = normalize(y)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)
    regression_model = LinearRegression()
    regression_model.fit(X_train, y_train)
    #print("score",regression_model.score(X_test, y_test))
    y_predict = regression_model.predict(X_test)
    print("error")
    error(y_predict, y_test)
    print ("sse",SSE(y_test,y_predict))
    print ("mape", float(MAPE(y_test, y_predict)))
    print (regression_model.coef_)
    #print (y_test.shape)
    #print (X_test.shape)
    #print (y_predict.shape)    
    plt.figure(figsize=(8,8),dpi=100)
    plt.title(nm.upper()+": Y vs Y_Prediction")
    plt.scatter(y_test,y_predict, label='Y_Prediction')
    #plt.plot(X_test[:,0],y_predict)    
    plt.xlabel('Actual', fontsize=18)
    plt.ylabel('Prediction', fontsize=18)
    a = np.linspace(0, max(y_predict), 1000)    
    plt.plot(a,a,color='r', label='Expected')
    plt.legend()
    plt.savefig(nm,bbox_inches="tight")    
    #fig = plt.figure()
    #ax = fig.add_subplot(111, projection='3d')

    #sp = ax.scatter(X_test[:,0],X_test[:,1],X_test[:,2], c=y_test, s=20)
    #plt.colorbar(sp)
    #sp = ax.plot(X_test[:0], X_test[:,1],X_test[:,2], c=y_predict, s=20 )
    #plt.colorbar(sp)
    plt.show()
    return regression_model
    #print("accuracy",accuracy_score(y_predict, y_test))
    #regression_model_mse = mean_squared_error(y_predict, y_test)
    #print("mse",regression_model_mse)
    #print("sqrt",math.sqrt(regression_model_mse))
    

In [None]:
from mpl_toolkits.mplot3d import Axes3D
#[np.isnan(nvda_x) == True]
#print t
#print tmp
#print np.all(np.isfinite(nvda_y))
#np.argwhere(np.isnan(nvda_x))
#print list(map(tuple, np.where(np.isnan(nvda_x))))
#print nvda_x
#nvda_x
#nvda_x=Imputer().fit_transform(nvda_x,nvda_y)
#nvda_y = Imputer().fit_transform(nvda_y)
#print(nvda_x)
print("#######################NVDIA################################")
nvda_x, nvda_y = preproc_data(nvda_Data, bt_ftrs)

print (nvda_x.shape)
print (nvda_y.shape)
nvda_model = linreg(nvda_x,nvda_y,"nvidia")
print("#######################################################")


bt_ftrs = btc_Data[['Date','btc_market_price','btc_total_bitcoins','btc_trade_volume','btc_n_transactions']].copy()
bt_ftrs['Date'] = pd.to_datetime(bt_ftrs['Date'])
print("#######################AMD################################")
amd_x, amd_y = preproc_data(amd_Data, bt_ftrs)
print (amd_x.shape)
print (amd_y.shape)
amd_model = linreg(amd_x,amd_y,"amd")
print("#######################################################")


bt_ftrs = btc_Data[['Date','btc_market_price','btc_total_bitcoins','btc_trade_volume','btc_n_transactions']].copy()
bt_ftrs['Date'] = pd.to_datetime(bt_ftrs['Date'])
print("#######################TSM################################")
tsm_x, tsm_y = preproc_data(tsm_Data, bt_ftrs)
print (tsm_x.shape)
print (tsm_y.shape)
tsm_model=linreg(tsm_x,tsm_y,"tsm")
print("#######################################################")



In [None]:
print (nvda_x.max())
print (nvda_y.max())

In [None]:
# Bitcoin Price
btc_Data = pd.read_csv('Data/bitcoin_price.csv')
dateData = pd.to_datetime(btc_Data['Date'])

In [None]:
btc_Data['100ma'] = btc_Data['Close'].rolling(window=100,min_periods=0).mean()

In [None]:
plotAllFields(btc_Data)

In [None]:
# Bit coin dataset
bData = pd.read_csv('Data/bitcoin_dataset.csv')
bData.columns

In [None]:
sns.pairplot(bData[bData.columns[[8, 9]]],palette='afmhot')

In [None]:
sns.pairplot(bData[['btc_hash_rate', 'btc_difficulty']],palette='afmhot')

In [None]:
plotAllFields(bData)

In [None]:
# bitcoin_cash_price.csv
bData = pd.read_csv('Data/bitcoin_cash_price.csv')
print('Columns:', bData.columns)

In [None]:
plotAllFields(bData)

In [None]:
sns.pairplot(bData[bData.columns[[1,2,3,4,5,6]]],palette='afmhot')

In [None]:
# bitcoin_cash_price.csv
amd_Data = pd.read_csv('asiccompanies/amd.us.txt')
print('Columns:', amd_Data.columns)

In [None]:
amd_Data['100ma'] = amd_Data['Close'].rolling(window=100,min_periods=0).mean()

In [None]:
plotAllFields(amd_Data)

In [None]:
# bitcoin_cash_price.csv
nvda_Data = pd.read_csv('asiccompanies/nvda.us.txt')
print('Columns:', nvda_Data.columns)
print(nvda_Data.tail())

In [None]:
nvda_Data['100ma'] = nvda_Data['Close'].rolling(window=100,min_periods=0).mean()

In [None]:
plotAllFields(nvda_Data)

In [None]:
# bitcoin_cash_price.csv
tsm_Data = pd.read_csv('asiccompanies/tsm.us.txt')
print('Columns:', tsm_Data.columns)

In [None]:
tsm_Data['100ma'] = tsm_Data['Close'].rolling(window=100,min_periods=0).mean()

In [None]:
plotAllFields(tsm_Data)

In [None]:
btc_Data = pd.read_csv('Data/bitcoin_dataset.csv')
bt_ftrs = btc_Data[['Date','btc_market_price','btc_total_bitcoins','btc_trade_volume','btc_n_transactions']].copy()
bt_ftrs['Date'] = pd.to_datetime(bt_ftrs['Date'])
print(bt_ftrs.tail())
#print('Columns:', bt_ftrs.columns)

In [None]:

#nw_nvda_df = nvda_Data[['Date','Close']].copy()
#nw_nvda_df['Date'] = pd.to_datetime(nw_nvda_df['Date'])
#print(nw_nvda_df.head())
#print('Columns:', nw_nvda_df.columns)

In [None]:
#nvda_rw_data = nw_nvda_df.merge(bt_ftrs, left_on='Date', right_on='Date', how='inner')
#nvda_rw_data.replace([np.inf, -np.inf], np.nan)
#nvda_rw_data.dropna(axis=1, how='any')
#print(nvda_rw_data.head())
#nvda_rw_data=Imputer().fit_transform(nvda_rw_data)
#print nvda_rw_data.max()
#nvda_x = nvda_rw_data[['btc_market_price','btc_total_bitcoins','btc_trade_volume','btc_n_transactions']].copy()
#nvda_y = nvda_rw_data[['Close']].copy()

#print(nvda_pred_data.head())
#print(nvda_y.shape)

In [None]:
#[np.isnan(nvda_x) == True]
#print t
#print tmp
#print np.all(np.isfinite(nvda_y))
#np.argwhere(np.isnan(nvda_x))
#print list(map(tuple, np.where(np.isnan(nvda_x))))
#print nvda_x
#nvda_x
#nvda_x=Imputer().fit_transform(nvda_x,nvda_y)
#nvda_y = Imputer().fit_transform(nvda_y)
#print(nvda_x)
print("#######################NVDIA################################")
nvda_x, nvda_y = preproc_data(nvda_Data, bt_ftrs)

print (nvda_x.shape)
print (nvda_y.shape)
nvda_model = linreg(nvda_x,nvda_y,"nvidia")
print("#######################################################")


#bt_ftrs = btc_Data[['Date','btc_market_price','btc_total_bitcoins','btc_trade_volume','btc_n_transactions']].copy()
#bt_ftrs['Date'] = pd.to_datetime(bt_ftrs['Date'])
print("#######################AMD################################")
amd_x, amd_y = preproc_data(amd_Data, bt_ftrs)
print (amd_x.shape)
print (amd_y.shape)
amd_model = linreg(amd_x,amd_y,"amd")
print("#######################################################")


#bt_ftrs = btc_Data[['Date','btc_market_price','btc_total_bitcoins','btc_trade_volume','btc_n_transactions']].copy()
#bt_ftrs['Date'] = pd.to_datetime(bt_ftrs['Date'])
print("#######################TSM################################")
tsm_x, tsm_y = preproc_data(tsm_Data, bt_ftrs)
print (tsm_x.shape)
print (tsm_y.shape)
tsm_model=linreg(tsm_x,tsm_y,"tsm")
print("#######################################################")



In [None]:
plt.plot(pd.to_datetime(nvda_Data['Date']), nvda_Data['Close'])
#btc_Data[['Date','btc_market_price','btc_total_bitcoins','btc_trade_volume','btc_n_transactions']]
plt.plot(pd.to_datetime(btc_Data['Date']), btc_Data['btc_market_price'])


In [4]:
gdp_data = pd.read_csv('gdp/greece.csv')
#print (gdp_data.columns)
#plt.plot(gdp_data['TIME'], gdp_data['Value'])
x = gdp_data['GDP']
#print (x)

In [5]:
btc_Data = pd.read_csv('ProcessedDataAll.csv')
#btc_Data['Date'] = pd.to_datetime(btc_Data['Date'])
#btc_Data['Date'].sort_values()
#btc_Data = btc_Data.sort_values(by='Date',ascending=True)
#print(btc_Data.head())
#print(len(btc_Data))
tmp = []
for i in range(0,len(btc_Data),3):
    #print(btc_Data.iloc[[i]])
    #print(btc_Data.iloc[[0]])
    rw1 = btc_Data.iloc[[i]]
    rw2 = btc_Data.iloc[[i]]
    rw3 = btc_Data.iloc[[i]]
    avg = (rw1['btc_market_price']+rw2['btc_market_price']+rw3['btc_market_price'])/float(3)
#     print (avg.values[0])
   
    tmp.append(avg.values[0])
# t = pd.DataFrame(tmp, columns=list('A'))
#print (len(tmp))

In [9]:
y=np.asarray(tmp)
print ("tmp", len(tmp))
y = y.reshape(-1,1)
#y=-1*y
x = x.reshape(-1,1)
#print("beforeX",max(x))
#print("beforeY",max(y))
x = x/float(max(x))
y = y/float(max(y))
# x=dataNormalization(x)
# y=dataNormalization(y)
#print("after",max(x))
#print("after",max(y))
# mxX = np.amax(x)   
# mxY = np.amax(y)
# x = x/float(mxX)
# y = y/float(mxY)
#x = x*(float(mxX/mxY))
#print (x.shape)
# print (y.shape)
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0, random_state=1)
regression_model = LinearRegression()
#print (X_train.shape)
#print (y_train.shape)
regression_model.fit(X_train, y_train)
print("score",regression_model.score(X_train, y_train))
y_predict = regression_model.predict(X_train)
print("error")
error(y_predict, y_train)
print ("sse",SSE(y_train,y_predict))
print ("mape", float(MAPE(y_train, y_predict)))
#print (regression_model.coef_)
#plt.scatter(y_train, y_predict)
#plt.scatter( X_train, y_train)
# ['01/01/2013','01/04/2013','01/07/2013','01/10/2013','01/01/2014','01/04/2014','01/07/2014','01/10/2014','01/01/2015','01/04/2015','01/07/2015','01/10/2015','01/01/2016','01/04/2016','01/07/2016','01/10/2016','01/01/2017']
#     dates.datestr2num('01/01/2013')
#plt.plot([20131,20132,20133,20134,20141,20142,20143,20144,20151,20152,20153,20154,20161,20162,20163,20164,20171], y)
#plt.show()
#plt.plot([20131,20132,20133,20134,20141,20142,20143,20144,20151,20152,20153,20154,20161,20162,20163,20164,20171], x)
#plt.plot(X_train, y_predic)
# print (y_predict.shape)
# print (y_test.shape)

plt.figure(figsize=(8,8),dpi=100)
plt.title("Greece Gdp and Bitcoin"+": Y vs Y_Prediction")
plt.scatter(y_train,y_predict, label='Y_Prediction')
plt.xlabel('Actual', fontsize=18)
plt.ylabel('Prediction', fontsize=18)
a = np.linspace(0, max(y_predict), 1000)    
plt.plot(a,a,color='r', label='Expected')
plt.legend()
plt.savefig("stk_grc",bbox_inches="tight")    
#return regression_model


tmp 17
score 0.136931403237
error
[ 175.89623671]
sse [ 1.12395911]
mape 175.8962367112159


In [None]:
plt.figure(figsize=(8,8),dpi=100)
plt.savefig('nvda',bbox_inches="tight")


############################Permutation Test##########################

In [None]:
tsm_Data = pd.read_csv('asiccompanies/tsm.us.txt')
print('Columns:', tsm_Data.columns)
nw_tsm_df = tsm_Data[['Date','Close']].copy()
nw_tsm_df['Date'] = pd.to_datetime(nw_tsm_df['Date'])

In [None]:
btc_Data = pd.read_csv('Data/bitcoin_dataset.csv')
bt_ftrs = btc_Data[['Date','btc_market_price']].copy()
bt_ftrs['Date'] = pd.to_datetime(bt_ftrs['Date'])

In [None]:
rw_data = nw_tsm_df.merge(bt_ftrs, left_on='Date', right_on='Date', how='inner')
x = rw_data[['btc_market_price']].copy()
y = rw_data[['Close']].copy() 
x=Imputer().fit_transform(x,y)
y = Imputer().fit_transform(y)
x = dataNormalization(x)
y = dataNormalization(y)
x = list(np.asarray(x).reshape(-1))
y = list(np.asarray(y).reshape(-1))
# print (len(x))
# print (len(y))
perm(x,y)

In [None]:
def perm(X,Y):
    T_obs = abs(np.mean(X) - np.mean(Y))
    perm = X + Y
    count = 0
    n_fact = 10000
    for _ in range(n_fact):
        tmp = np.random.permutation(perm)
        X_p = tmp[:len(X)]
        Y_p = tmp[len(X):]
        T_p = abs(np.mean(X_p) - np.mean(Y_p))
#         print(X_p, Y_p)
#         print(T_obs, T_p)
        if T_p > T_obs:
            count += 1
    p = count / n_fact
    print("Value of P:", p)

    if p <= 0.05:
        print("Reject Null")
    else:
        print("Accept Null")

