In [81]:
# get code from github
!git clone https://github.com/Wensi-Tang/OS-CNN.git
#change path to OS-CNN
%cd /content/OS-CNN

Cloning into 'OS-CNN'...
remote: Enumerating objects: 557, done.[K
remote: Counting objects: 100% (7/7), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 557 (delta 1), reused 4 (delta 0), pack-reused 550[K
Receiving objects: 100% (557/557), 49.18 MiB | 13.74 MiB/s, done.
Resolving deltas: 100% (166/166), done.
/content/OS-CNN


In [82]:
import torch
from os.path import dirname
from utils.dataloader.TSC_data_loader import TSC_data_loader
from Classifiers.OS_CNN.OS_CNN_easy_use import OS_CNN_easy_use
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn import preprocessing

In [83]:
cuda_available = torch.cuda.is_available()
print("Are we using GPU?: ", cuda_available)
if(cuda_available):
    print("the GPU name is: ",torch.cuda.get_device_name(0))

Are we using GPU?:  True
the GPU name is:  Tesla P100-PCIE-16GB


In [85]:
btc_df = pd.read_csv('https://drive.google.com/uc?export=download&id=1-FmXzXANW85DmNbKKKoDkZr7w3cZEZi1', parse_dates=['open_time'])
btc_df = btc_df.sort_values('open_time').reset_index(drop=True)
btc_df = btc_df[['open', 'high', 'low', 'close', 'volume']]
btc_df['label'] = (btc_df['open'] < btc_df['close']).astype(int)
btc_df.head()

Unnamed: 0,open,high,low,close,volume,label
0,36060.5,36060.5,35807.9,35815.6,10.423,0
1,35815.5,35939.5,35717.3,35722.0,9.339,0
2,35715.6,35849.5,35715.6,35723.4,3.884,1
3,35735.1,35735.1,35720.0,35727.6,1.684,0
4,35727.2,35850.0,35714.0,35831.6,3.474,1


In [86]:
train_df = btc_df.iloc[:int(len(btc_df)*0.8),:]
test_df = btc_df.iloc[int(len(btc_df)*0.8):,:]

In [87]:
def create_sequences(input_data: pd.DataFrame, target_column, sequence_length):
    sequences = []
    data_size = len(input_data)
    
    for i in tqdm(range(data_size - sequence_length)):
        seq = input_data[i:i+sequence_length]
        label = input_data.iloc[i + sequence_length][target_column]
        sequences.append((seq, label))
    return sequences

In [88]:
SEQUENCE_LENGTH = 30
train_sequences = create_sequences(train_df, 'label', SEQUENCE_LENGTH)
test_sequences = create_sequences(test_df, 'label', SEQUENCE_LENGTH)

100%|██████████| 102200/102200 [00:15<00:00, 6614.33it/s]
100%|██████████| 25528/25528 [00:04<00:00, 6367.03it/s]


In [89]:
train_sequences[0][0].dtypes, type(train_sequences[0][1])

(open      float64
 high      float64
 low       float64
 close     float64
 volume    float64
 label       int64
 dtype: object, numpy.float64)

In [91]:
X_train = np.float32(train_df.iloc[:,:-2].values)
y_train = np.int64(train_df.iloc[:,-1].values)

X_test = np.float32(test_df.iloc[:,:-2].values)
y_test = np.int64(test_df.iloc[:,-1].values)

print(f'train length: {len(X_train)}')
print(f'test length: {len(X_test)}')
print('unique train label',np.unique(y_train))
print('unique test label',np.unique(y_test))

train length: 102230
test length: 25558
train data shape (102230, 4)
train label shape (102230,)
test data shape (25558, 4)
test label shape (25558,)
unique train label [0 1]
unique test label [0 1]


In [None]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

In [92]:
Result_log_folder = './Results_of_OS_CNN/OS_CNN_result_iter_0/'
dataset_name = "btc"

In [103]:
# creat model and log save place,
model = OS_CNN_easy_use(
    Result_log_folder = Result_log_folder, # the Result_log_folder,
    dataset_name = dataset_name,           # dataset_name for creat log under Result_log_folder,
    device = "cuda:0",                # Gpu 
    max_epoch = 500,                        # In our expirement the number is 2000 for keep it same with FCN for the example dataset 500 will be enough,
    paramenter_number_of_layer_list = [8*128, 5*128*256 + 2*256*128],
    )

In [104]:
model.fit(X_train, y_train, X_test, y_test)

code is running on  cuda:0
epoch = 49 lr =  0.001
train_acc=	 0.8362809351462389 	 test_acc=	 0.772869551608107 	 loss=	 0.4696478545665741
log saved at:
./Results_of_OS_CNN/OS_CNN_result_iter_0/btc/btc_.txt
epoch = 99 lr =  0.001
train_acc=	 0.9144478137533014 	 test_acc=	 0.7423898583613742 	 loss=	 0.07084573060274124
log saved at:
./Results_of_OS_CNN/OS_CNN_result_iter_0/btc/btc_.txt
epoch = 149 lr =  0.0005
train_acc=	 0.9105252861195344 	 test_acc=	 0.905782925111511 	 loss=	 0.21111321449279785
log saved at:
./Results_of_OS_CNN/OS_CNN_result_iter_0/btc/btc_.txt
epoch = 199 lr =  0.00025
train_acc=	 0.9591802797613225 	 test_acc=	 0.9470615854135691 	 loss=	 0.42273613810539246
log saved at:
./Results_of_OS_CNN/OS_CNN_result_iter_0/btc/btc_.txt
epoch = 249 lr =  0.000125
train_acc=	 0.9507385307639636 	 test_acc=	 0.8961968855152985 	 loss=	 0.00851582083851099
log saved at:
./Results_of_OS_CNN/OS_CNN_result_iter_0/btc/btc_.txt
epoch = 299 lr =  0.0001
train_acc=	 0.9444292282108

In [105]:
y_predict = model.predict(X_test)

print('correct:',y_test)
print('predict:',y_predict)

correct: [0 1 0 ... 0 1 0]
predict: [0. 1. 0. ... 0. 1. 0.]


In [106]:
acc = accuracy_score(y_predict, y_test)
print(acc)

0.9711636278269035
