In [1]:
import pandas_datareader.data as web
from datetime import datetime
from datetime import timedelta
import pandas as pd
from collections import Counter

In [2]:
current_date = datetime.now()
yesterday = current_date - timedelta(days=1)
start = datetime(2017,1,1)
end = datetime(yesterday.year,yesterday.month,yesterday.day)

In [3]:
data = web.DataReader('BTC-USD', 'yahoo', start, end)

In [4]:
data.reset_index(inplace=True)

In [5]:
data.drop(['Adj Close'], axis=1, inplace=True)

In [6]:
def calculate(index):
    base_data = data.iloc[index]
    pre_data = data.iloc[index - 1]
    # collect return data
    date_data = base_data.Date 
    close_data = base_data.Close / pre_data.Close
    
    pre_candle_data = pre_data.Close - pre_data.Open
    base_candle_data = base_data.Close - base_data.Open
    
    pre_spread_data = pre_data.High - pre_data.Low
    base_spread_data = base_data.High - base_data.Low
    volume_data = base_data.Volume / pre_data.Volume
    
    # correction logic
    if pre_candle_data < 0:
        pre_candle_data = pre_candle_data * -1
    if base_candle_data < 0:
        base_candle_data = base_candle_data * -1
    if pre_spread_data < 0:
        pre_spread_data = pre_spread_data * -1
    if base_spread_data < 0:
        base_spread_data = base_spread_data * -1
        
    candle_data = base_candle_data / pre_candle_data
    spread_data = base_spread_data / pre_spread_data
    
    return_data = [date_data, close_data, candle_data, spread_data, volume_data]
    
    return return_data

In [7]:
empty_list = []
for i in range(len(data)):
    if i > 0:
        empty_list.append(calculate(i))

In [8]:
header = ['date', 'close_precent', 'candle_percent', 'spread_percent', 'volume_percent']
df = pd.DataFrame(empty_list)
df.columns = header

In [9]:
def calculator(x,y):
    columns = ['close_precent', 'candle_percent', 'spread_percent', 'volume_percent']
    
    # data engineering
    x_calc = x.reset_index().drop('index', axis=1)
    y_calc = y.reset_index().drop('index', axis=1)
    
    # calculate eucledian distance
    rec1 = (x_calc.close_precent - y_calc.close_precent)**2
    rec2 = (x_calc.candle_percent - y_calc.candle_percent)**2
    rec3 = (x_calc.spread_percent - y_calc.spread_percent)**2
    rec4 = (x_calc.volume_percent - y_calc.volume_percent)**2
    
    eucledian_distance = (rec1.sum() + rec2.sum() + rec3.sum() + rec4.sum()) ** 0.5

    # return value
    return eucledian_distance


def compare():
    return_list = []
    # get last 7
    last_7 = df[-7:]
    # runner
    for i in range(7, (len(df) - 7)):
        startpoint = i - 7
        endpoint = i
        first_7 = df[startpoint:endpoint]
        # calculate
        delta = last_7
        datum = first_7[-1:].date.iloc[0]
        
        # appending to list
        return_list.append([datum, calculator(last_7, first_7)])
    
    return_df = pd.DataFrame(return_list)
    return return_df.sort_values([1])
    

In [10]:
df_ranking = compare()

In [11]:

def knn(rawdata, dataframe, neighbors=3):
    target_df = dataframe[:neighbors]
    return_list = []
    test_list = []
    for i in range(neighbors):
        datum = target_df.iloc[i][0]
        next_datum = datum + timedelta(days=1)
        current_performance = rawdata[rawdata['Date'] == datum]
        next_performance = rawdata[rawdata['Date'] == next_datum]
        # calculation
        delta = next_performance.Close.iloc[0] / current_performance.Close.iloc[0]
        test_list.append(delta)
        if delta > 1:
            return_list.append('Up')
        elif delta < 1:
            return_list.append('Down')
        else:
            return_list.append('Equal')
            
    counter = data = Counter(return_list)
    print(return_list)
    print(test_list)
    print(dataframe[:neighbors])
    return counter.most_common()

In [12]:
print(knn(data, df_ranking, 5))

['Down', 'Up', 'Up', 'Down', 'Down']
[0.9937685079149213, 1.000134568068874, 1.0335025707019068, 0.9886311939770186, 0.9979577233091704]
              0         1
1444 2020-12-26  1.746635
177  2017-07-03  1.914969
144  2017-05-31  1.915814
1269 2020-06-30  1.916713
1002 2019-10-06  1.930521
[('Down', 3), ('Up', 2)]


In [None]:
# 01.06.2021 - Down (4:1) - medium
# 02.06.2021 - Up (5:0) - medium +
# 03.05.2021 - Down (4:1) - low
# 04.06.2021 - Down (3:2) - low