In [17]:
import os
import torch
from torch_geometric.data import Dataset, Data
import glob
import pandas as pd
import numpy as np

class CrypoDataset(Dataset):
    def __init__(self, root,transform=None, pre_transform=None):
        super(CrypoDataset, self).__init__(root, transform, pre_transform)

    @property
    def raw_file_names(self):
        crypto_path = glob.glob("./" + self.root + "/raw/*.csv")
        crypto_filenames = []
        for crypto_file in crypto_path:
            name = os.path.basename(crypto_file)
            crypto_filenames.append(name)
        return crypto_filenames


    @property
    def processed_file_names(self):
        data_rows = pd.read_csv(self.raw_paths[0]).shape[0]
        return [f'data_{i}.pt' for i in range(data_rows)]
        
    def download(self): 
        pass

    def process(self):
        file_no = 0
        data_rows = pd.read_csv(self.raw_paths[0]).shape[0]
        for i in range(data_rows):
            date_time = 0
            all_node_feats=[]
            label_y=[]
            for raw_path in self.raw_paths:
            # Read data from `raw_path`.
                data_file = pd.read_csv(raw_path)
                if(date_time == 0):
                    date_time=data_file.iloc[i]["Date"]
                daily_data = data_file.loc[data_file['Date'] == date_time]
                if(daily_data.shape[0]>0):
                    node_feats = self._get_node_features(daily_data)
                    all_node_feats.append(node_feats)
                    label_y.append(self._buy_or_not(data_file,date_time))
            edge_index = self.generateEdge(len(all_node_feats))
            all_node_feats = torch.tensor(all_node_feats, dtype=torch.float)
            label_y = torch.tensor(label_y, dtype=torch.float)
            data = Data(x=all_node_feats,edge_index=edge_index,y=label_y)
            
            if self.pre_filter is not None and not self.pre_filter(data):
                continue

            if self.pre_transform is not None:
                data = self.pre_transform(data)

            torch.save(data, os.path.join(self.processed_dir, 'data_{}.pt'.format(file_no)))
            file_no += 1

    def len(self):
        return len(self.processed_file_names)

    def get(self, idx):
        data = torch.load(os.path.join(self.processed_dir, 'data_{}.pt'.format(idx)))
        return data

    def _get_node_features(self, daily_data):
        all_node_feats = []
        all_node_feats.append(daily_data["Open"].item())
        all_node_feats.append(daily_data["High"].item())
        all_node_feats.append(daily_data["Low"].item())
        all_node_feats.append(daily_data["Close"].item())
        all_node_feats.append(daily_data["Volume"].item())
        return all_node_feats
    
    def _get_return_ratio(self,daily_data):
        return_ratio = []
        data = np.array(daily_data["Close"])
        for i in range(len(data)):
            if i == 0:
                return_ratio.append(0)
            else:
                return_ratio.append((data[i]-data[i-1])/data[i-1])
        return return_ratio
    
    def _buy_or_not(self,data_file,date_time):
        butOrNot=0
        data = np.array(data_file["Close"])
        data_row = data_file.loc[data_file['Date'] == date_time]
        if data_row.shape[0]>0:
            index = data_file.loc[data_file['Date'] == date_time].index[0]
            if(index < data_file.shape[0]-1 and data[index+1]>data[index]):
                butOrNot=1
        return butOrNot
    def generateEdge(self,n):
        edges = []
        for i in range(n):
            for j in range(n):
                edges.append([i,j])
        return torch.tensor(np.transpose(edges), dtype=torch.long)


In [18]:
CrypoDataset("./Dataset/Crypto/")

Processing...
Done!


CrypoDataset(730)

In [23]:
import os.path as osp

import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GATConv
from dataset import CrypoDataset
from torch_geometric.data import DataLoader
dataset = CrypoDataset(root="Dataset/Crypto/")

data = dataset[0]

# dataset = dataset.shuffle()
# train_dataset = dataset[:4]
# val_dataset = dataset[2:3]
# test_dataset = dataset[4:5]

# batch_size= 512
# train_loader = DataLoader(train_dataset, batch_size=batch_size)
# val_loader = DataLoader(val_dataset, batch_size=batch_size)
# test_loader = DataLoader(test_dataset, batch_size=batch_size)

class Net(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(Net, self).__init__()

        self.conv1 = GATConv(in_channels, 8, heads=8, dropout=0.6)
        # On the Pubmed dataset, use heads=8 in conv2.
        self.conv2 = GATConv(8 * 8, out_channels, heads=1, concat=False,
                             dropout=0.6)

    def forward(self, x, edge_index):
        x = F.dropout(x, p=0.6, training=self.training)
        x = F.elu(self.conv1(x, edge_index))
        x = F.dropout(x, p=0.6, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=-1)

device = torch.device('cpu')


model = Net(dataset.num_features,2).to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=5e-4)


def train(data):
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.edge_index)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()


@torch.no_grad()
def test(data):
    model.eval()
    out, accs = model(data.x, data.edge_index), []
    for _, mask in data('train_mask', 'val_mask', 'test_mask'):
        acc = float((out[mask].argmax(-1) == data.y[mask]).sum() / mask.sum())
        accs.append(acc)
    return accs


for epoch in range(1, 201):
    train(data)
    train_acc, val_acc, test_acc = test(data)
    print(f'Epoch: {epoch:03d}, Train: {train_acc:.4f}, Val: {val_acc:.4f}, '
          f'Test: {test_acc:.4f}')

AttributeError: 'Data' object has no attribute 'train_mask'

In [19]:
dataset = CrypoDataset(root="Dataset/Crypto/")

In [22]:
dataset[0]

Data(edge_index=[2, 25], x=[5, 5], y=[5])

In [113]:
 data_file = pd.read_csv("./Dataset/Crypto/raw/BNB.csv")

In [114]:
 data_file

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,7/1/2019,32.1141,34.0500,31.3682,33.2777,1835906.620
1,7/2/2019,33.2722,34.0000,30.5600,31.9777,2017364.120
2,7/3/2019,31.9530,33.0500,31.6230,32.5220,1929917.650
3,7/4/2019,32.5219,34.3839,32.1503,33.0888,2194259.800
4,7/5/2019,33.0399,33.6497,32.4000,32.5566,1501826.740
...,...,...,...,...,...,...
725,6/25/2021,308.6000,314.8400,275.2400,280.8100,2711888.871
726,6/26/2021,280.8900,290.3300,264.2600,279.6500,2263064.363
727,6/27/2021,279.5800,291.7000,269.7200,290.8700,1539903.027
728,6/28/2021,290.8900,299.9600,283.5500,290.0000,1884580.156


In [115]:
date_time=data_file.iloc[0]["Date"]

In [116]:
date_time

'7/1/2019'

In [131]:
daily_data = data_file.loc[data_file['Open'] == 33.2722]

In [134]:
daily_data["Open"].item()

33.2722

In [10]:
   def generateEdge(n):
        edges = []
        for i in range(n):
            for j in range(n):
                edges.append([i,j])
        return edges

In [12]:
np.transpose(generateEdge(5))

array([[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4,
        4, 4, 4],
       [0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1,
        2, 3, 4]])