In [1]:
import networkx as nx
from itertools import permutations, combinations
import torch
import torch.nn as nn
import numpy as np


class SparseLinear(nn.Module):
    """Applies a linear transformation to the incoming data: :math:`y = xA^T + b`

    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        connectivity: user defined sparsity matrix
        bias: If set to ``False``, the layer will not learn an additive bias.
            Default: ``True``
        coalesce_device: device to coalesce the sparse matrix on
            Default: 'gpu'
        max_size (int): maximum number of entries allowed before chunking occurrs
            Default: 1e8

    Shape:
        - Input: :math:`(N, *, H_{in})` where :math:`*` means any number of
          additional dimensions and :math:`H_{in} = \text{in\_features}`
        - Output: :math:`(N, *, H_{out})` where all but the last dimension
          are the same shape as the input and :math:`H_{out} = \text{out\_features}`.

    Attributes:
        weight: the learnable weights of the module of shape
            :math:`(\text{out\_features}, \text{in\_features})`. The values are
            initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
            :math:`k = \frac{1}{\text{in\_features}}`
        bias:   the learnable bias of the module of shape :math:`(\text{out\_features})`.
                If :attr:`bias` is ``True``, the values are initialized from
                :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
                :math:`k = \frac{1}{\text{in\_features}}`

    Examples:

        >>> m = nn.SparseLinear(20, 30)
        >>> input = torch.randn(128, 20)
        >>> output = m(input)
        >>> print(output.size())
        torch.Size([128, 30])
    """

    def __init__(
        self,
        in_features,
        out_features,
        connectivity,
        bias=True,
        coalesce_device="cuda",
        max_size=1e8,
    ):
        assert in_features < 2**31 and out_features < 2**31
        if connectivity is not None:
            assert isinstance(connectivity, torch.LongTensor) or isinstance(
                connectivity,
                torch.cuda.LongTensor,
            ), "Connectivity must be a Long Tensor"
            assert (
                connectivity.shape[0] == 2 and connectivity.shape[1] > 0
            ), "Input shape for connectivity should be (2,nnz)"
            assert (
                connectivity.shape[1] <= in_features * out_features
            ), "Nnz can't be bigger than the weight matrix"
        super(SparseLinear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.connectivity = connectivity
        self.max_size = max_size

        nnz = connectivity.shape[1]
        connectivity = connectivity.to(device=coalesce_device)
        indices = connectivity

        values = torch.empty(nnz, device=coalesce_device)

        self.register_buffer("indices", indices.cpu())
        self.weights = nn.Parameter(values.cpu())

        if bias:
            self.bias = nn.Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter("bias", None)

        self.reset_parameters()

    def reset_parameters(self):
        bound = 1 / self.in_features**0.5
        nn.init.uniform_(self.weights, -bound, bound)
        if self.bias is not None:
            nn.init.uniform_(self.bias, -bound, bound)

    @property
    def weight(self):
        """returns a torch.sparse_coo_tensor view of the underlying weight matrix
        This is only for inspection purposes and should not be modified or used in any autograd operations
        """
        weight = torch.sparse_coo_tensor(
            self.indices,
            self.weights,
            (self.out_features, self.in_features),
        )
        return weight.coalesce().detach()

    def forward(self, inputs):
        output_shape = list(inputs.shape)
        output_shape[-1] = self.out_features

        if len(output_shape) == 1:
            inputs = inputs.view(1, -1)
        inputs = inputs.flatten(end_dim=-2)

        target = torch.sparse_coo_tensor(
            self.indices,
            self.weights,
            torch.Size([self.out_features, self.in_features]),
        )
        output = torch.sparse.mm(target, inputs.t()).t()

        if self.bias is not None:
            output += self.bias

        return output.view(output_shape)

    def extra_repr(self):
        return "in_features={}, out_features={}, bias={}, connectivity={}".format(
            self.in_features,
            self.out_features,
            self.bias is not None,
            self.connectivity,
        )


def separating_cliques(G):
    clique_1 = []
    clique_2 = []
    clique_3 = []
    clique_4 = []
    for clique in nx.enumerate_all_cliques(G):
        clique = set(clique)
        if len(clique) == 1:
            clique_1.append(clique)
        elif len(clique) == 2:
            clique_2.append(clique)
        elif len(clique) == 3:
            clique_3.append(clique)
        elif len(clique) == 4:
            clique_4.append(clique)
    return clique_1, clique_2, clique_3, clique_4


def get_connection(clique_last, clique_next):
    connection_list = [[], []]
    component_mapping = {i: x for i, x in enumerate(clique_last)}
    for i, clique in enumerate(clique_next):
        component = [set(x) for x in combinations(clique, len(clique) - 1)]
        index_next = i
        index_last = [
            list(component_mapping.keys())[list(component_mapping.values()).index(x)]
            for x in component
        ]
        for j in index_last:
            connection_list[0].append(j)
            connection_list[1].append(i)

    return connection_list


G = nx.Graph()
# Add 4 nodes
G.add_nodes_from([1, 2, 3, 4, 5])
# Add 4 edges
G.add_edges_from([(1, 2), (2, 3), (2, 4), (3, 4), (4, 5), (3, 5), (2, 5)])
G

<networkx.classes.graph.Graph at 0x7f536c7f3640>

In [2]:
clique_1, clique_2, clique_3, clique_4 = separating_cliques(G)

connection_1 = get_connection(clique_1, clique_2)
connection_2 = get_connection(clique_2, clique_3)
connection_3 = get_connection(clique_3, clique_4)

connection_2

[[1, 2, 4, 1, 3, 5, 2, 3, 6, 4, 5, 6], [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]]

In [32]:
clique_1

[{0},
 {1},
 {2},
 {3},
 {4},
 {5},
 {6},
 {7},
 {8},
 {9},
 {10},
 {11},
 {12},
 {13},
 {14},
 {15},
 {16},
 {17},
 {18},
 {19},
 {20},
 {21},
 {22},
 {23},
 {24},
 {25},
 {26},
 {27},
 {28},
 {29},
 {30},
 {31},
 {32},
 {33},
 {34},
 {35},
 {36},
 {37},
 {38},
 {39},
 {40},
 {41},
 {42},
 {43},
 {44},
 {45},
 {46},
 {47},
 {48},
 {49},
 {50},
 {51},
 {52},
 {53},
 {54},
 {55},
 {56},
 {57},
 {58},
 {59},
 {60},
 {61},
 {62},
 {63},
 {64},
 {65},
 {66},
 {67},
 {68},
 {69},
 {70},
 {71},
 {72},
 {73},
 {74},
 {75},
 {76},
 {77},
 {78},
 {79},
 {80},
 {81},
 {82},
 {83},
 {84},
 {85},
 {86},
 {87},
 {88},
 {89},
 {90},
 {91},
 {92},
 {93},
 {94},
 {95},
 {96},
 {97},
 {98},
 {99}]

In [3]:
len_input = len(np.unique(connection_1[0]))
len_output = len(np.unique(connection_1[1]))

sl = SparseLinear(
    in_features=len_input,
    out_features=len_output,
    connectivity=torch.tensor([connection_1[1], connection_1[0]], dtype=torch.int64),
)
x = torch.ones(1, len_input)
output = sl(x)
print(output)

tensor([[-0.4475,  0.5695, -0.4326,  0.2320, -0.7304,  0.2701,  0.0411]],
       grad_fn=<ViewBackward0>)


In [12]:
sl.weight.to_dense().numpy() @ x.numpy().T

array([[-0.06015173],
       [ 0.5625891 ],
       [-0.09905863],
       [ 0.12613186],
       [-0.22061317],
       [ 0.6008882 ],
       [ 0.        ]], dtype=float32)

In [13]:
sl.weight.to_dense().numpy()

array([[-0.06015173,  0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.06583074,  0.12741561,  0.1874524 ,  0.18189035,  0.        ],
       [ 0.        , -0.05709397,  0.        ,  0.        , -0.04196466],
       [-0.11316157,  0.        ,  0.30750433,  0.        , -0.06821091],
       [ 0.        , -0.01736058,  0.        , -0.20325258,  0.        ],
       [ 0.2300624 ,  0.3708258 ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  0.        ]],
      dtype=float32)

In [15]:
x

tensor([[1., 1., 1., 1., 1.]])

In [14]:
sl.weight

tensor(indices=tensor([[0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4],
                       [0, 0, 1, 2, 3, 1, 4, 5, 2, 4, 6, 3, 5, 6]]),
       values=tensor([-0.0602,  0.0658,  0.1274,  0.1875,  0.1819, -0.0571,
                      -0.0420, -0.1132,  0.3075, -0.0682, -0.0174, -0.2033,
                       0.2301,  0.3708]),
       size=(7, 5), nnz=14, layout=torch.sparse_coo)

In [5]:
num_batches = 3
x = torch.ones(3, len_input)
x[1, :] = 2
x[2, :] = 3

for i in range(len_input):
    x[:, i] = x[:, i] + i / 10

output = sl(x)
output

Shape of target: torch.Size([7, 5])
Shape of inputs.t(): torch.Size([5, 3])


RuntimeError: addmm: index out of column bound: 5 not between 1 and 5

In [3]:
import torch
import torch.nn as nn


class ConvFilter(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv = nn.Conv2d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=2,
            stride=2,
            padding=0,  # No padding for exact 2x downsampling
        )

    def forward(self, x):
        # x shape: (batch_size, channels, height, width)
        return self.conv(x)


# Example usage
batch_size = 4
in_channels = 1
out_channels = 8
rows = 100
cols = 6

# Create model
conv1_tetrahedra = nn.Sequential(
    nn.Conv1d(
        in_channels=in_channels,
        out_channels=8,
        kernel_size=2,
        stride=2,
    ),
    nn.ReLU(),
)
# Create batch of images
x = torch.randn(batch_size, rows)
print(f"Input shape: {x.shape}")  # torch.Size([4, 3, 32, 32])

x = x.unsqueeze(1)

# Apply convolution
output = conv1_tetrahedra(x)
print(f"Output shape: {output.shape}")  # torch.Size([4, 16, 16, 16])

Input shape: torch.Size([4, 100])
Output shape: torch.Size([4, 8, 50])


In [24]:
output.shape

torch.Size([4, 8, 50])

In [26]:
output.flatten(start_dim=1).shape

torch.Size([4, 400])

In [4]:
import torch.nn.functional as F


class HNN(nn.Module):
    def __init__(
        self,
        num_nodes: int,
        num_edges: int,
        num_triangles: int,
        num_tetrahedra: int,
        nodes_to_edges_connections: tuple,
        edges_to_triangles_connections: tuple,
        triangles_to_tetrahedra_connections: tuple,
    ):
        """
        nodes_to_edges_connections: tuple of two lists, where the first list contains the indices of the edges
        and the second list contains the indices of the nodes connected to those edges, such that the i-th node
        in the first list is a member of the i-th edge in the second list.

        Same for edges_to_triangles_connections and triangles_to_tetrahedra_connections
        """
        super(HNN, self).__init__()
        self.sparse_layer_edges = SparseLinear(
            num_nodes,
            num_edges,
            connectivity=torch.tensor(
                [nodes_to_edges_connections[1], nodes_to_edges_connections[0]],
                dtype=torch.int64,
            ),
        )

        self.sparse_layer_triangles = SparseLinear(
            num_edges,
            num_triangles,
            connectivity=torch.tensor(
                [edges_to_triangles_connections[1], edges_to_triangles_connections[0]],
                dtype=torch.int64,
            ),
        )

        self.triangles_to_tetrahedra_connections = triangles_to_tetrahedra_connections

        if len(self.triangles_to_tetrahedra_connections[0]) != 0:
            self.sparse_layer_tetrahedra = SparseLinear(
                num_triangles,
                num_tetrahedra,
                connectivity=torch.tensor(
                    [
                        triangles_to_tetrahedra_connections[1],
                        triangles_to_tetrahedra_connections[0],
                    ],
                    dtype=torch.int64,
                ),
            )

        else:
            self.sparse_layer_tetrahedra = None

    def forward(self, x):
        x_s1 = F.relu(self.sparse_layer_edges(x))

        x_s2 = F.relu(self.sparse_layer_triangles(x_s1))

        if len(self.triangles_to_tetrahedra_connections[0]) != 0:
            x_s3 = F.relu(self.sparse_layer_tetrahedra(x_s2))

            return torch.cat([x_s1, x_s2, x_s3], 1)

        else:

            return torch.cat([x_s1, x_s2], 1)


hnn = HNN(
    num_nodes=len(clique_1),
    num_edges=len(clique_2),
    num_triangles=len(clique_3),
    num_tetrahedra=len(clique_4),
    nodes_to_edges_connections=connection_1,
    edges_to_triangles_connections=connection_2,
    triangles_to_tetrahedra_connections=connection_3,
)

x = torch.ones(1, len(clique_1))
output = hnn(x)
print(output)
print(output.shape)  # Should print the shape of the output tensor

tensor([[0.0000, 0.3031, 0.0000, 0.3026, 0.1994, 0.0000, 0.0000, 0.0000, 0.0000,
         0.0000, 0.0000, 0.0000]], grad_fn=<CatBackward0>)
torch.Size([1, 12])


In [47]:
len(clique_2), len(clique_3), len(clique_4)

(7, 4, 1)

In [48]:
max(connection_1[1]) + 1, max(connection_2[1]) + 1, max(connection_3[1]) + 1

(7, 4, 1)

In [5]:
from copy import deepcopy

import torch.nn as nn

from copy import deepcopy
from dataclasses import dataclass

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch
import torch.nn as nn


class SparseLinear(nn.Module):
    """Applies a linear transformation to the incoming data: :math:`y = xA^T + b`

    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        connectivity: user defined sparsity matrix
        bias: If set to ``False``, the layer will not learn an additive bias.
            Default: ``True``
        coalesce_device: device to coalesce the sparse matrix on
            Default: 'gpu'
        max_size (int): maximum number of entries allowed before chunking occurrs
            Default: 1e8

    Shape:
        - Input: :math:`(N, *, H_{in})` where :math:`*` means any number of
          additional dimensions and :math:`H_{in} = \text{in\_features}`
        - Output: :math:`(N, *, H_{out})` where all but the last dimension
          are the same shape as the input and :math:`H_{out} = \text{out\_features}`.

    Attributes:
        weight: the learnable weights of the module of shape
            :math:`(\text{out\_features}, \text{in\_features})`. The values are
            initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
            :math:`k = \frac{1}{\text{in\_features}}`
        bias:   the learnable bias of the module of shape :math:`(\text{out\_features})`.
                If :attr:`bias` is ``True``, the values are initialized from
                :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
                :math:`k = \frac{1}{\text{in\_features}}`

    Examples:

        >>> m = nn.SparseLinear(20, 30)
        >>> input = torch.randn(128, 20)
        >>> output = m(input)
        >>> print(output.size())
        torch.Size([128, 30])
    """

    def __init__(
        self,
        in_features,
        out_features,
        connectivity,
        bias=True,
        coalesce_device="cuda",
        max_size=1e8,
    ):
        assert in_features < 2**31 and out_features < 2**31
        if connectivity is not None:
            assert isinstance(connectivity, torch.LongTensor) or isinstance(
                connectivity,
                torch.cuda.LongTensor,
            ), "Connectivity must be a Long Tensor"
            assert (
                connectivity.shape[0] == 2 and connectivity.shape[1] > 0
            ), "Input shape for connectivity should be (2,nnz)"
            assert (
                connectivity.shape[1] <= in_features * out_features
            ), "Nnz can't be bigger than the weight matrix"
        super(SparseLinear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.connectivity = connectivity
        self.max_size = max_size

        nnz = connectivity.shape[1]
        connectivity = connectivity.to(device=coalesce_device)
        indices = connectivity

        values = torch.empty(nnz, device=coalesce_device)

        self.register_buffer("indices", indices.cpu())
        self.weights = nn.Parameter(values.cpu())

        if bias:
            self.bias = nn.Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter("bias", None)

        self.reset_parameters()

    def reset_parameters(self):
        bound = 1 / self.in_features**0.5
        nn.init.uniform_(self.weights, -bound, bound)
        if self.bias is not None:
            nn.init.uniform_(self.bias, -bound, bound)

    @property
    def weight(self):
        """returns a torch.sparse_coo_tensor view of the underlying weight matrix
        This is only for inspection purposes and should not be modified or used in any autograd operations
        """
        weight = torch.sparse_coo_tensor(
            self.indices,
            self.weights,
            (self.out_features, self.in_features),
        )
        return weight.coalesce().detach()

    def forward(self, inputs):
        output_shape = list(inputs.shape)
        output_shape[-1] = self.out_features

        if len(output_shape) == 1:
            inputs = inputs.view(1, -1)
        inputs = inputs.flatten(end_dim=-2)

        target = torch.sparse_coo_tensor(
            self.indices,
            self.weights,
            torch.Size([self.out_features, self.in_features]),
        )
        output = torch.sparse.mm(target, inputs.t()).t()

        if self.bias is not None:
            output += self.bias

        return output.view(output_shape)

    def extra_repr(self):
        return "in_features={}, out_features={}, bias={}, connectivity={}".format(
            self.in_features,
            self.out_features,
            self.bias is not None,
            self.connectivity,
        )


@dataclass
class GraphHomologicalStructure:
    nodes_to_edges_connections: tuple
    edges_to_triangles_connections: tuple
    triangles_to_tetrahedra_connections: tuple

    @property
    def num_nodes(self) -> int:
        return max(self.nodes_to_edges_connections[0]) + 1

    @property
    def num_edges(self) -> int:
        return max(self.edges_to_triangles_connections[0]) + 1

    @property
    def num_triangles(self) -> int:
        return (
            max(self.triangles_to_tetrahedra_connections[0]) + 1
            if self.triangles_to_tetrahedra_connections
            else 0
        )

    @property
    def num_tetrahedra(self) -> int:
        return (
            max(self.triangles_to_tetrahedra_connections[1]) + 1
            if self.triangles_to_tetrahedra_connections
            else 0
        )

    def get_nodes_to_edges_connections_tensor(self) -> torch.Tensor:
        return torch.tensor(
            [
                self.nodes_to_edges_connections[1],
                self.nodes_to_edges_connections[0],
            ],
            dtype=torch.int64,
        )

    def get_edges_to_triangles_connections_tensor(self) -> torch.Tensor:
        return torch.tensor(
            [
                self.edges_to_triangles_connections[1],
                self.edges_to_triangles_connections[0],
            ],
            dtype=torch.int64,
        )

    def get_triangles_to_tetrahedra_connections_tensor(self) -> torch.Tensor:
        return (
            torch.tensor(
                [
                    self.triangles_to_tetrahedra_connections[1],
                    self.triangles_to_tetrahedra_connections[0],
                ],
                dtype=torch.int64,
            )
            if self.triangles_to_tetrahedra_connections
            else torch.empty((2, 0), dtype=torch.int64)
        )

    def __deepcopy__(self, memo):
        return GraphHomologicalStructure(
            nodes_to_edges_connections=deepcopy(self.nodes_to_edges_connections, memo),
            edges_to_triangles_connections=deepcopy(
                self.edges_to_triangles_connections, memo
            ),
            triangles_to_tetrahedra_connections=deepcopy(
                self.triangles_to_tetrahedra_connections, memo
            ),
        )


class HNN(nn.Module):
    def __init__(
        self,
        homological_structure: GraphHomologicalStructure,
    ):
        super(HNN, self).__init__()
        self.homological_structure = homological_structure

        self.sparse_layer_edges = SparseLinear(
            homological_structure.num_nodes,
            homological_structure.num_edges,
            connectivity=self.homological_structure.get_nodes_to_edges_connections_tensor(),
        )

        self.sparse_layer_triangles = SparseLinear(
            self.homological_structure.num_edges,
            self.homological_structure.num_triangles,
            connectivity=self.homological_structure.get_edges_to_triangles_connections_tensor(),
        )

        if len(self.homological_structure.triangles_to_tetrahedra_connections[0]) != 0:
            self.sparse_layer_tetrahedra = SparseLinear(
                self.homological_structure.num_triangles,
                self.homological_structure.num_tetrahedra,
                connectivity=self.homological_structure.get_triangles_to_tetrahedra_connections_tensor(),
            )

        else:
            self.sparse_layer_tetrahedra = None

    def forward(self, x):
        x_s1 = F.relu(self.sparse_layer_edges(x))

        x_s2 = F.relu(self.sparse_layer_triangles(x_s1))

        if len(self.homological_structure.triangles_to_tetrahedra_connections[0]) != 0:
            x_s3 = F.relu(self.sparse_layer_tetrahedra(x_s2))

            return torch.cat([x_s1, x_s2, x_s3], 1)

        else:

            return torch.cat([x_s1, x_s2], 1)

In [6]:
class ConvolutedMixingHNN(nn.Module):
    @staticmethod
    def get_connections_for_convoluted_mixing_hnn(
        nodes_to_edges_connections: tuple,
        num_convolutional_channels: int,
    ) -> tuple:
        """
        This function modifies the connections for the convoluted mixing HNN.
        It expands the nodes_to_edges_connections to account for the convolutional channels.
        """
        new_nodes_to_edges_connections = ([], [])
        for connection_index in range(len(nodes_to_edges_connections[0])):
            node_index = nodes_to_edges_connections[0][connection_index]
            edge_index = nodes_to_edges_connections[1][connection_index]

            for channel in range(num_convolutional_channels):
                new_nodes_to_edges_connections[0].append(
                    node_index * num_convolutional_channels + channel
                )
                new_nodes_to_edges_connections[1].append(edge_index)

        return new_nodes_to_edges_connections

    def __init__(
        self,
        homological_structure: GraphHomologicalStructure,
        num_convolutional_channels: int,
        lighten: bool = False,
    ):
        super(ConvolutedMixingHNN, self).__init__()
        self.name = "hcnn"
        if lighten:
            self.name += "-lighten"

        self.homological_structure = homological_structure

        self.conv_layer_price_vol = nn.Sequential(
            nn.Conv1d(
                in_channels=1,
                out_channels=num_convolutional_channels,
                kernel_size=2,
                stride=2,
            ),
            nn.ReLU(),
        )

        convoluted_nodes_to_edges_connections = (
            self.get_connections_for_convoluted_mixing_hnn(
                homological_structure.nodes_to_edges_connections,
                num_convolutional_channels,
            )
        )

        self.convoluted_homological_structure = deepcopy(homological_structure)
        self.convoluted_homological_structure.nodes_to_edges_connections = (
            convoluted_nodes_to_edges_connections
        )

        self.hnn = HNN(self.convoluted_homological_structure)

        self.readout_layer = nn.Linear(
            in_features=homological_structure.num_edges
            + homological_structure.num_triangles
            + homological_structure.num_tetrahedra,
            out_features=3,
        )

    def forward(self, x):
        #  x.shape = (batch_size, 1, num_features) num_features è della dimensione di tutti i nodi (nodi nel senso di spazio-temporali, quindi vol1ask_lag0, vol1ask_lag1, ...) * 2 perche c'è price and volume

        # after conv_layer_price_vol -> x.shape = (batch_size, num_convolutional_channels, num_features // 2)
        x = self.conv_layer_price_vol(x)

        # after flatten -> # x.shape = (batch_size, num_convolutional_channels * num_features // 2)
        # Permute to have channels first, then flatten. so the columns will be feature_channel1, feature_channel2, ..., feature_channelN
        x = x.permute(0, 2, 1).flatten(start_dim=1)

        x = self.hnn(x)  # x.shape = (batch_size, num_classes)

        # after hnn -> x.shape = (batch_size, num_edges + num_triangles + num_tetrahedra)
        x = self.readout_layer(x)  # x.shape = (batch_size, num

        return x

In [9]:
connection_1

[[0, 1, 1, 2, 1, 3, 1, 4, 2, 3, 2, 4, 3, 4],
 [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6]]

In [10]:
cmhnn = ConvolutedMixingHNN(
    homological_structure=GraphHomologicalStructure(
        nodes_to_edges_connections=connection_1,
        edges_to_triangles_connections=connection_2,
        triangles_to_tetrahedra_connections=connection_3,
    ),
    num_convolutional_channels=8,
)

In [46]:
N_FEATURES = G.number_of_nodes() * 2
x = torch.randn(3, 1, N_FEATURES)
x

tensor([[[ 0.2990,  1.8206,  0.8336, -2.0264, -0.4700, -1.0977,  0.6162,
           0.7628,  0.2390,  1.5506]],

        [[ 1.5205, -0.3781,  1.6900,  1.0291,  0.2050,  0.5757,  0.8598,
           0.5767,  1.0066,  0.7828]],

        [[ 0.1221,  0.3215,  1.2858,  0.1554, -0.4019,  1.5746,  0.0791,
          -0.0583,  1.1217, -0.2726]]])

In [47]:
output = cmhnn(x)
print(output)

tensor([[-0.1222,  0.0877, -0.1908],
        [-0.0963,  0.1122, -0.1353],
        [-0.0119,  0.1761, -0.0889]], grad_fn=<AddmmBackward0>)


In [33]:
output.shape

torch.Size([3, 12])

In [7]:
import pandas as pd


class CustomWindowedDataset:
    def __init__(
        self,
        windows_limits: list[tuple[int, int]],
    ):
        self.windows_limits = windows_limits
        self.last_lag = windows_limits[-1][1]

        self.df = pd.DataFrame(
            data={f"feature_{i}": list(range(i, i + 100)) for i in range(5)},
        )

    def get_max_offset(self):
        return self.last_lag

    def get_window_data(self, cache_idx, start_idx):
        columns_number = 5
        window_means = [
            self._get_single_window_mean(
                start_idx, cache_idx, start_lag_window, end_lag_window, columns_number
            )
            for (start_lag_window, end_lag_window) in self.windows_limits
        ]

        # Stack into new DataFrame, reverse order (decreasing by window number)
        result = pd.DataFrame(
            window_means[::-1],
            columns=self.df.columns[:columns_number],
        )
        result.index = range(len(result))  # reset index
        return result

    def _get_single_window_mean(
        self,
        start_idx: int,
        cache_idx: int,
        start_lag_window: int,
        end_lag_window: int,
        columns_number: int,
    ) -> pd.Series:
        start_window_idx = start_idx - end_lag_window + 1
        end_window_idx = start_idx - start_lag_window + 1

        window_df = self.df.iloc[start_window_idx:end_window_idx, :columns_number]

        return window_df.mean()

In [15]:
df = CustomWindowedDataset(
    windows_limits=[(0, 1), (1, 3), (3, 7), (7, 15), (15, 31)],
)
df.df

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4
0,0,1,2,3,4
1,1,2,3,4,5
2,2,3,4,5,6
3,3,4,5,6,7
4,4,5,6,7,8
...,...,...,...,...,...
95,95,96,97,98,99
96,96,97,98,99,100
97,97,98,99,100,101
98,98,99,100,101,102


In [16]:
df.get_window_data(cache_idx=0, start_idx=50)

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4
0,27.5,28.5,29.5,30.5,31.5
1,39.5,40.5,41.5,42.5,43.5
2,45.5,46.5,47.5,48.5,49.5
3,48.5,49.5,50.5,51.5,52.5
4,50.0,51.0,52.0,53.0,54.0


In [20]:
# [(0, 1), (1, 3), (3, 7), (7, 15), (15, 31)]

df.df.iloc[20:36].mean()

feature_0    27.5
feature_1    28.5
feature_2    29.5
feature_3    30.5
feature_4    31.5
dtype: float64

# DF CONSTRUCTION FOR STHNN

In [1]:
import networkx as nx
from itertools import permutations, combinations
import torch
import torch.nn as nn
import numpy as np


def separating_cliques(G):
    clique_1 = []
    clique_2 = []
    clique_3 = []
    clique_4 = []
    for clique in nx.enumerate_all_cliques(G):
        clique = set(clique)
        if len(clique) == 1:
            clique_1.append(clique)
        elif len(clique) == 2:
            clique_2.append(clique)
        elif len(clique) == 3:
            clique_3.append(clique)
        elif len(clique) == 4:
            clique_4.append(clique)
    return clique_1, clique_2, clique_3, clique_4


def get_connection(clique_last, clique_next):
    connection_list = [[], []]
    component_mapping = {i: x for i, x in enumerate(clique_last)}
    for i, clique in enumerate(clique_next):
        component = [set(x) for x in combinations(clique, len(clique) - 1)]
        index_next = i
        index_last = [
            list(component_mapping.keys())[list(component_mapping.values()).index(x)]
            for x in component
        ]
        for j in index_last:
            connection_list[0].append(j)
            connection_list[1].append(i)

    return connection_list


import torch.nn.functional as F


from copy import deepcopy

import torch.nn as nn

from copy import deepcopy
from dataclasses import dataclass

import torch
import torch.nn as nn
import torch.nn.functional as F

import torch
import torch.nn as nn


class SparseLinear(nn.Module):
    """Applies a linear transformation to the incoming data: :math:`y = xA^T + b`

    Args:
        in_features: size of each input sample
        out_features: size of each output sample
        connectivity: user defined sparsity matrix
        bias: If set to ``False``, the layer will not learn an additive bias.
            Default: ``True``
        coalesce_device: device to coalesce the sparse matrix on
            Default: 'gpu'
        max_size (int): maximum number of entries allowed before chunking occurrs
            Default: 1e8

    Shape:
        - Input: :math:`(N, *, H_{in})` where :math:`*` means any number of
          additional dimensions and :math:`H_{in} = \text{in\_features}`
        - Output: :math:`(N, *, H_{out})` where all but the last dimension
          are the same shape as the input and :math:`H_{out} = \text{out\_features}`.

    Attributes:
        weight: the learnable weights of the module of shape
            :math:`(\text{out\_features}, \text{in\_features})`. The values are
            initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where
            :math:`k = \frac{1}{\text{in\_features}}`
        bias:   the learnable bias of the module of shape :math:`(\text{out\_features})`.
                If :attr:`bias` is ``True``, the values are initialized from
                :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
                :math:`k = \frac{1}{\text{in\_features}}`

    Examples:

        >>> m = nn.SparseLinear(20, 30)
        >>> input = torch.randn(128, 20)
        >>> output = m(input)
        >>> print(output.size())
        torch.Size([128, 30])
    """

    def __init__(
        self,
        in_features,
        out_features,
        connectivity,
        bias=True,
        coalesce_device="cuda",
        max_size=1e8,
    ):
        assert in_features < 2**31 and out_features < 2**31
        if connectivity is not None:
            assert isinstance(connectivity, torch.LongTensor) or isinstance(
                connectivity,
                torch.cuda.LongTensor,
            ), "Connectivity must be a Long Tensor"
            assert (
                connectivity.shape[0] == 2 and connectivity.shape[1] > 0
            ), "Input shape for connectivity should be (2,nnz)"
            assert (
                connectivity.shape[1] <= in_features * out_features
            ), "Nnz can't be bigger than the weight matrix"
        super(SparseLinear, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.connectivity = connectivity
        self.max_size = max_size

        nnz = connectivity.shape[1]
        connectivity = connectivity.to(device=coalesce_device)
        indices = connectivity

        values = torch.empty(nnz, device=coalesce_device)

        self.register_buffer("indices", indices.cpu())
        self.weights = nn.Parameter(values.cpu())

        if bias:
            self.bias = nn.Parameter(torch.Tensor(out_features))
        else:
            self.register_parameter("bias", None)

        self.reset_parameters()

    def reset_parameters(self):
        bound = 1 / self.in_features**0.5
        nn.init.uniform_(self.weights, -bound, bound)
        if self.bias is not None:
            nn.init.uniform_(self.bias, -bound, bound)

    @property
    def weight(self):
        """returns a torch.sparse_coo_tensor view of the underlying weight matrix
        This is only for inspection purposes and should not be modified or used in any autograd operations
        """
        weight = torch.sparse_coo_tensor(
            self.indices,
            self.weights,
            (self.out_features, self.in_features),
        )
        return weight.coalesce().detach()

    def forward(self, inputs):
        output_shape = list(inputs.shape)
        output_shape[-1] = self.out_features

        if len(output_shape) == 1:
            inputs = inputs.view(1, -1)
        inputs = inputs.flatten(end_dim=-2)

        target = torch.sparse_coo_tensor(
            self.indices,
            self.weights,
            torch.Size([self.out_features, self.in_features]),
        )
        output = torch.sparse.mm(target, inputs.t()).t()

        if self.bias is not None:
            output += self.bias

        return output.view(output_shape)

    def extra_repr(self):
        return "in_features={}, out_features={}, bias={}, connectivity={}".format(
            self.in_features,
            self.out_features,
            self.bias is not None,
            self.connectivity,
        )


@dataclass
class GraphHomologicalStructure:
    nodes_to_edges_connections: tuple
    edges_to_triangles_connections: tuple
    triangles_to_tetrahedra_connections: tuple

    @property
    def num_nodes(self) -> int:
        return max(self.nodes_to_edges_connections[0]) + 1

    @property
    def num_edges(self) -> int:
        return max(self.edges_to_triangles_connections[0]) + 1

    @property
    def num_triangles(self) -> int:
        return (
            max(self.triangles_to_tetrahedra_connections[0]) + 1
            if self.triangles_to_tetrahedra_connections
            else 0
        )

    @property
    def num_tetrahedra(self) -> int:
        return (
            max(self.triangles_to_tetrahedra_connections[1]) + 1
            if self.triangles_to_tetrahedra_connections
            else 0
        )

    def get_nodes_to_edges_connections_tensor(self) -> torch.Tensor:
        return torch.tensor(
            [
                self.nodes_to_edges_connections[1],
                self.nodes_to_edges_connections[0],
            ],
            dtype=torch.int64,
        )

    def get_edges_to_triangles_connections_tensor(self) -> torch.Tensor:
        return torch.tensor(
            [
                self.edges_to_triangles_connections[1],
                self.edges_to_triangles_connections[0],
            ],
            dtype=torch.int64,
        )

    def get_triangles_to_tetrahedra_connections_tensor(self) -> torch.Tensor:
        return (
            torch.tensor(
                [
                    self.triangles_to_tetrahedra_connections[1],
                    self.triangles_to_tetrahedra_connections[0],
                ],
                dtype=torch.int64,
            )
            if self.triangles_to_tetrahedra_connections
            else torch.empty((2, 0), dtype=torch.int64)
        )

    def __deepcopy__(self, memo):
        return GraphHomologicalStructure(
            nodes_to_edges_connections=deepcopy(self.nodes_to_edges_connections, memo),
            edges_to_triangles_connections=deepcopy(
                self.edges_to_triangles_connections, memo
            ),
            triangles_to_tetrahedra_connections=deepcopy(
                self.triangles_to_tetrahedra_connections, memo
            ),
        )


class HNN(nn.Module):
    def __init__(
        self,
        homological_structure: GraphHomologicalStructure,
    ):
        super(HNN, self).__init__()
        self.homological_structure = homological_structure

        self.sparse_layer_edges = SparseLinear(
            homological_structure.num_nodes,
            homological_structure.num_edges,
            connectivity=self.homological_structure.get_nodes_to_edges_connections_tensor(),
        )

        self.sparse_layer_triangles = SparseLinear(
            self.homological_structure.num_edges,
            self.homological_structure.num_triangles,
            connectivity=self.homological_structure.get_edges_to_triangles_connections_tensor(),
        )

        if len(self.homological_structure.triangles_to_tetrahedra_connections[0]) != 0:
            self.sparse_layer_tetrahedra = SparseLinear(
                self.homological_structure.num_triangles,
                self.homological_structure.num_tetrahedra,
                connectivity=self.homological_structure.get_triangles_to_tetrahedra_connections_tensor(),
            )

        else:
            self.sparse_layer_tetrahedra = None

    def forward(self, x):
        x_s1 = F.relu(self.sparse_layer_edges(x))

        x_s2 = F.relu(self.sparse_layer_triangles(x_s1))

        if len(self.homological_structure.triangles_to_tetrahedra_connections[0]) != 0:
            x_s3 = F.relu(self.sparse_layer_tetrahedra(x_s2))

            return torch.cat([x_s1, x_s2, x_s3], 1)

        else:

            return torch.cat([x_s1, x_s2], 1)

In [14]:
class ConvolutedMixingHNN(nn.Module):
    @staticmethod
    def get_connections_for_convoluted_mixing_hnn(
        nodes_to_edges_connections: tuple,
        num_convolutional_channels: int,
    ) -> tuple:
        """
        This function modifies the connections for the convoluted mixing HNN.
        It expands the nodes_to_edges_connections to account for the convolutional channels.
        """
        new_nodes_to_edges_connections = ([], [])
        for connection_index in range(len(nodes_to_edges_connections[0])):
            node_index = nodes_to_edges_connections[0][connection_index]
            edge_index = nodes_to_edges_connections[1][connection_index]

            for channel in range(num_convolutional_channels):
                new_nodes_to_edges_connections[0].append(
                    node_index * num_convolutional_channels + channel
                )
                new_nodes_to_edges_connections[1].append(edge_index)

        return new_nodes_to_edges_connections

    def __init__(
        self,
        homological_structure: GraphHomologicalStructure,
        num_convolutional_channels: int,
        lighten: bool = False,
    ):
        super(ConvolutedMixingHNN, self).__init__()
        self.name = "hcnn"
        if lighten:
            self.name += "-lighten"

        self.homological_structure = homological_structure

        self.conv_layer_price_vol = nn.Sequential(
            nn.Conv1d(
                in_channels=1,
                out_channels=num_convolutional_channels,
                kernel_size=2,
                stride=2,
            ),
            nn.ReLU(),
        )

        convoluted_nodes_to_edges_connections = (
            self.get_connections_for_convoluted_mixing_hnn(
                homological_structure.nodes_to_edges_connections,
                num_convolutional_channels,
            )
        )

        self.convoluted_homological_structure = deepcopy(homological_structure)
        self.convoluted_homological_structure.nodes_to_edges_connections = (
            convoluted_nodes_to_edges_connections
        )

        self.hnn = HNN(self.convoluted_homological_structure)

        self.readout_layer = nn.Linear(
            in_features=homological_structure.num_edges
            + homological_structure.num_triangles
            + homological_structure.num_tetrahedra,
            out_features=3,
        )

    def forward(self, x):
        # x.shape = (batch_size, 1, num_window_lags, num_spatial_features) num_spatial_features è della dimensione di tutti i nodi (nodi nel senso di spaziali quindi senza lag) * 2 perche c'è price and volume

        # After these -> x.shape = (batch_size, 1, num_features) num_features è della dimensione di tutti i nodi (nodi nel senso di spazio-temporali, quindi vol1ask_lag0, vol1ask_lag1, ...) * 2 perche c'è price and volume
        x = torch.flip(x, dims=[2])
        x = x.reshape(x.shape[0], 1, -1)

        # after conv_layer_price_vol -> x.shape = (batch_size, num_convolutional_channels, num_features // 2)
        x = self.conv_layer_price_vol(x)

        # after flatten -> # x.shape = (batch_size, num_convolutional_channels * num_features // 2)
        # Permute to have channels first, then flatten. so the columns will be feature_channel1, feature_channel2, ..., feature_channelN
        x = x.permute(0, 2, 1).flatten(start_dim=1)

        x = self.hnn(x)  # x.shape = (batch_size, num_classes)

        # after hnn -> x.shape = (batch_size, num_edges + num_triangles + num_tetrahedra)
        x = self.readout_layer(x)  # x.shape = (batch_size, num

        return x

In [3]:
import pandas as pd

from loaders.custom_dataset import CustomDataset
from torch.utils.data import Dataset, DataLoader


class CustomWindowedDataset(CustomDataset):
    def __init__(
        self,
        dataset,
        learning_stage,
        windows_limits: list[tuple[int, int]],
        shuffling_seed,
        cache_size,
        lighten,
        threshold,
        all_horizons,
        prediction_horizon,
        targets_type,
        balanced_dataloader=False,
        backtest=False,
        training_stocks=None,
        validation_stocks=None,
        target_stocks=None,
    ):
        """
        windows_limits is a list of tuples specifying the start and end indices for each window.
        It must be ordered. End index must be excluded. If I have [(0,2), (2,5), (5,10)], it means I have three windows:
        - Window 1: 0,1
        - Window 2: 2,3,4
        - Window 3: 5,6,7,8,9
        """
        self.windows_limits = windows_limits
        self.last_lag = windows_limits[-1][1]

        super().__init__(
            dataset=dataset,
            learning_stage=learning_stage,
            shuffling_seed=shuffling_seed,
            cache_size=cache_size,
            lighten=lighten,
            threshold=threshold,
            all_horizons=all_horizons,
            prediction_horizon=prediction_horizon,
            targets_type=targets_type,
            balanced_dataloader=balanced_dataloader,
            backtest=backtest,
            training_stocks=training_stocks,
            validation_stocks=validation_stocks,
            target_stocks=target_stocks,
        )

    def get_max_offset(self):
        return self.last_lag

    def get_window_data(self, cache_idx, start_idx):
        columns_number = 20 if self.lighten else 40
        window_means = [
            self._get_single_window_mean(
                start_idx, cache_idx, start_lag_window, end_lag_window, columns_number
            )
            for (start_lag_window, end_lag_window) in self.windows_limits
        ]

        # Stack into new DataFrame, reverse order (decreasing by window number)
        result = np.vstack(window_means[::-1])
        return result

    def _get_single_window_mean(
        self,
        start_idx: int,
        cache_idx: int,
        start_lag_window: int,
        end_lag_window: int,
        columns_number: int,
    ) -> pd.Series:
        # + 1 in necessary; otherwise the window will be shifted by one in the past
        start_window_idx = start_idx - end_lag_window + 1
        end_window_idx = start_idx - start_lag_window + 1

        window_df = self.cache_data[cache_idx][
            start_window_idx:end_window_idx, :columns_number
        ]

        return window_df.mean(axis=0)

In [23]:
import re
import networkx as nx
from itertools import permutations, combinations
import torch
import torch.nn as nn
import numpy as np
from fast_tmfg import TMFG
import pandas as pd

df = pd.read_csv(
    "/home/daxus/code/LOBFrame/data/nasdaq/unscaled_data/CSCO/CSCO_orderbooks_2019-06-11.csv"
)
df = df.drop(
    columns=[
        "seconds",
        "Raw_Target_10",
        "Raw_Target_50",
        "Raw_Target_100",
        "Smooth_Target_10",
        "Smooth_Target_50",
        "Smooth_Target_100",
    ]
)
df = df.iloc[:1000].copy()

for col in df.columns:
    df[f"{col}_lag1"] = df[col].shift(1)
    df[f"{col}_lag2"] = df[col].shift(2)
    df[f"{col}_lag3"] = df[col].shift(3)
    df[f"{col}_lag4"] = df[col].shift(4)

df = df.dropna()
df = df.reset_index(drop=True)


def parse_col(c):
    lag = re.search(r"_lag(\d+)", c)
    lag_num = int(lag.group(1)) if lag else 0
    c_clean = c.split("_lag")[0]
    match = re.match(r"(ASKs|ASKp|BIDs|BIDp)(\d+)", c_clean)
    if match:
        prefix, level = match.groups()
        return prefix, int(level), lag_num
    return c, 0, lag_num


def sort_key(c):
    if "Target" in c:
        return (99, 99, 99)  # Put target columns at the end

    order = ["ASKs", "ASKp", "BIDs", "BIDp"]
    prefix, level, lag_num = parse_col(c)
    return (
        lag_num,  # non-lagged first
        level,  # by level number
        order.index(prefix) if prefix in order else 99,  # by prefix order
    )


df = df[sorted(df.columns, key=sort_key)]
df

  df[f"{col}_lag4"] = df[col].shift(4)
  df[f"{col}_lag1"] = df[col].shift(1)
  df[f"{col}_lag2"] = df[col].shift(2)
  df[f"{col}_lag3"] = df[col].shift(3)
  df[f"{col}_lag4"] = df[col].shift(4)
  df[f"{col}_lag1"] = df[col].shift(1)
  df[f"{col}_lag2"] = df[col].shift(2)
  df[f"{col}_lag3"] = df[col].shift(3)
  df[f"{col}_lag4"] = df[col].shift(4)
  df[f"{col}_lag1"] = df[col].shift(1)
  df[f"{col}_lag2"] = df[col].shift(2)
  df[f"{col}_lag3"] = df[col].shift(3)
  df[f"{col}_lag4"] = df[col].shift(4)
  df[f"{col}_lag1"] = df[col].shift(1)
  df[f"{col}_lag2"] = df[col].shift(2)
  df[f"{col}_lag3"] = df[col].shift(3)
  df[f"{col}_lag4"] = df[col].shift(4)
  df[f"{col}_lag1"] = df[col].shift(1)
  df[f"{col}_lag2"] = df[col].shift(2)
  df[f"{col}_lag3"] = df[col].shift(3)
  df[f"{col}_lag4"] = df[col].shift(4)
  df[f"{col}_lag1"] = df[col].shift(1)
  df[f"{col}_lag2"] = df[col].shift(2)
  df[f"{col}_lag3"] = df[col].shift(3)
  df[f"{col}_lag4"] = df[col].shift(4)
  df[f"{col}_lag1"] = df[

Unnamed: 0,ASKs1,ASKp1,BIDs1,BIDp1,ASKs2,ASKp2,BIDs2,BIDp2,ASKs3,ASKp3,...,BIDs8_lag4,BIDp8_lag4,ASKs9_lag4,ASKp9_lag4,BIDs9_lag4,BIDp9_lag4,ASKs10_lag4,ASKp10_lag4,BIDs10_lag4,BIDp10_lag4
0,1600,571300,760,571200,2018,571400,2126,571100,1250,571500,...,1643.0,570500.0,801.0,572100.0,2758.0,570400.0,1501.0,572200.0,1362.0,570300.0
1,1600,571300,560,571200,2018,571400,2126,571100,1250,571500,...,1643.0,570500.0,801.0,572100.0,2758.0,570400.0,1501.0,572200.0,1362.0,570300.0
2,1600,571300,560,571200,2018,571400,2026,571100,1250,571500,...,1643.0,570500.0,801.0,572100.0,2758.0,570400.0,1501.0,572200.0,1362.0,570300.0
3,1600,571300,560,571200,2018,571400,2026,571100,1250,571500,...,1643.0,570500.0,801.0,572100.0,2758.0,570400.0,1501.0,572200.0,1362.0,570300.0
4,1600,571300,560,571200,2018,571400,1926,571100,1250,571500,...,1643.0,570500.0,801.0,572100.0,2758.0,570400.0,1501.0,572200.0,1362.0,570300.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
991,1215,571100,1801,571000,2400,571200,2800,570900,2000,571300,...,1462.0,570300.0,1575.0,571900.0,2020.0,570200.0,2355.0,572000.0,1100.0,570100.0
992,1215,571100,1801,571000,2400,571200,2800,570900,2100,571300,...,1462.0,570300.0,1575.0,571900.0,2020.0,570200.0,2355.0,572000.0,1100.0,570100.0
993,1215,571100,1801,571000,2400,571200,2800,570900,2100,571300,...,1462.0,570300.0,1575.0,571900.0,2020.0,570200.0,2355.0,572000.0,1100.0,570100.0
994,1215,571100,2581,571000,2400,571200,2800,570900,2100,571300,...,1462.0,570300.0,1575.0,571900.0,2020.0,570200.0,2355.0,572000.0,1100.0,570100.0


In [24]:
corr_matrix = df[[col for col in df.columns if "p" not in col]].corr()
model_all = TMFG()
cliques_all, seps_all, adj_matrix_all = model_all.fit_transform(
    corr_matrix, output="weighted_sparse_W_matrix"
)
adj_matrix_all_df = pd.DataFrame(
    adj_matrix_all, index=corr_matrix.index, columns=corr_matrix.columns
)
adj_matrix_all_df

Unnamed: 0,ASKs1,BIDs1,ASKs2,BIDs2,ASKs3,BIDs3,ASKs4,BIDs4,ASKs5,BIDs5,...,ASKs6_lag4,BIDs6_lag4,ASKs7_lag4,BIDs7_lag4,ASKs8_lag4,BIDs8_lag4,ASKs9_lag4,BIDs9_lag4,ASKs10_lag4,BIDs10_lag4
ASKs1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000
BIDs1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000
ASKs2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000
BIDs2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000
ASKs3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
BIDs8_lag4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.981974
ASKs9_lag4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000
BIDs9_lag4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.974709,0.0,0.000000,0.0,0.0,0.0,0.000000
ASKs10_lag4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.000000,0.0,0.000000,0.0,0.0,0.0,0.000000


In [25]:
G = nx.from_numpy_array(adj_matrix_all)
clique_1, clique_2, clique_3, clique_4 = separating_cliques(G)

connection_1 = get_connection(clique_1, clique_2)
connection_2 = get_connection(clique_2, clique_3)
connection_3 = get_connection(clique_3, clique_4)

In [29]:
# Now i try to build the HNN
# df = pd.read_csv(
#     "/home/daxus/code/LOBFrame/data/nasdaq/unscaled_data/CSCO/CSCO_orderbooks_2019-06-11.csv"
# )
# df = df.drop(
#     columns=[
#         "seconds",
#         "Raw_Target_10",
#         "Raw_Target_50",
#         "Raw_Target_100",
#         "Smooth_Target_10",
#         "Smooth_Target_100",
#     ]
# )

# df = df[
#     [
#         col
#         for col in df.columns
#         if "6" not in col
#         and "10" not in col
#         and "7" not in col
#         and "8" not in col
#         and "9" not in col
#     ]
# ]
# df = df.iloc[:1000].copy()
# df = df[sorted(df.columns, key=sort_key)]


hnn = ConvolutedMixingHNN(
    homological_structure=GraphHomologicalStructure(
        nodes_to_edges_connections=connection_1,
        edges_to_triangles_connections=connection_2,
        triangles_to_tetrahedra_connections=connection_3,
    ),
    num_convolutional_channels=4,
)

dataset = CustomWindowedDataset(
    dataset="nasdaq",
    learning_stage="training",
    windows_limits=[(0, 1), (1, 3), (3, 7), (7, 15), (15, 31)],
    shuffling_seed=42,
    cache_size=1,
    lighten=False,
    threshold=32,
    targets_type="raw",
    all_horizons=[5, 10, 30, 50, 100],
    prediction_horizon=100,
    balanced_dataloader=False,
    training_stocks=["CSCO"],
    validation_stocks=["CSCO"],
    target_stocks=["CSCO"],
)

dataloader = DataLoader(
    dataset,
    batch_size=32,
    shuffle=False,
    num_workers=0,
    drop_last=True,
    sampler=dataset.glob_indices,
)

print(len(dataloader))

UNBALANCED dataset construction...


100%|██████████| 1/1 [00:08<00:00,  8.62s/it]

2520





In [31]:
complete_list = []
# Example usage of the DataLoader
batch_data, batch_labels = next(iter(dataloader))

out = hnn.forward(batch_data)
out.shape

torch.Size([32, 3])