In [14]:
# Jupyter Notebooks for the submodule. 
%run IsWrls_WrlsNWGenerater.ipynb
%run IsWrls_WrlsNW_wmmse.ipynb

import numpy as np  # import numpy
import time         # import time functions 
import torch        # Import the torch library

from torch_geometric.data import Data        # Import the Data class from the torch_geometric.data module
from torch_geometric.loader import DataLoader  # Import the DataLoader class from the torch_geometric.loader module
from torch.nn import Sequential as Seq, Linear as Lin, ReLU, Sigmoid, BatchNorm1d as BN

from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn.conv import MessagePassing

## Class init_parameters
* In wireless network simulations, it's crucial to define parameters that require the behaviour of the simulated environment. This class serves as a container for such parameters, making it easier to manage and access them throughout the simulation process.

>**1. Wireless Network Settings:**
>>Parameters like n_links, field_length, bandwidth, carrier_f, etc., define characteristics of the wireless network, such as the number of links, field dimensions, frequency band, and transmission characteristics.

>**2. Antenna Configuration:**
>>Parameters such as tx_height, rx_height, antenna_gain_decibel, etc., specify details about the antennas used in the simulation, including their heights, gains, and transmission powers.

>**3. Noise Parameters:**
>>Parameters like noise_density_milli_decibel, input_noise_power, output_noise_power, SNR_gap_dB, etc., represent the noise characteristics of the wireless channel, including noise density, signal-to-noise ratio (SNR) gaps, and noise powers.

>**4. 2D Occupancy Grid Settings:**
>>Parameters such as cell_length, N_antennas, maxrx, minrx, n_grids, etc., define the grid-based representation of the simulation environment, including cell dimensions, antenna configurations within cells, and grid resolution.


In [15]:
class init_parameters():
    def __init__(self, train_K, Nt):
        """
        Initializes parameters for a wireless network simulation.

        Args:
        train_K (int): Number of links and receivers in the network.
        Nt (int): Number of antennas.
        """
        # Wireless network settings
        self.n_links = train_K  # Number of links in the network
        self.n_receiver = train_K  # Number of receivers in the network
        self.field_length = 1000  # Length of the field where the network operates
        self.shortest_directLink_length = 2  # Shortest length of direct links
        self.longest_directLink_length = 65  # Longest length of direct links
        self.shortest_crossLink_length = 1  # Shortest length of cross-links
        self.bandwidth = 5e6  # Bandwidth of the network
        self.carrier_f = 2.4e9  # Carrier frequency
        self.tx_height = 1.5  # Height of the transmitter
        self.rx_height = 1.5  # Height of the receiver
        self.antenna_gain_decibel = 2.5  # Antenna gain in decibels
        self.tx_power_milli_decibel = 40  # Transmit power in milliwatts (in decibels)
        self.tx_power = 1  # Transmit power (commented out calculation)
        self.noise_density_milli_decibel = -169  # Noise density (in decibels)
        self.input_noise_power = 1  # Input noise power (commented out calculation)
        self.output_noise_power = 1  # Output noise power
        self.SNR_gap_dB = 6  # Signal-to-noise ratio gap (in decibels)
        self.SNR_gap = 1  # Signal-to-noise ratio gap (commented-out calculation)
        self.setting_str = "{}_links_{}X{}_{}_{}_length".format(self.n_links, self.field_length, self.field_length, self.shortest_directLink_length, self.longest_directLink_length)
        # 2D occupancy grid setting
        self.cell_length = 5  # Length of each cell in the grid
        self.N_antennas = Nt  # Number of antennas
        self.maxrx = 2  # Maximum number of receivers
        self.minrx = 1  # Minimum number of receivers
        self.n_grids = 1  # Number of grids (commented-out calculation)

# Testing for the class definition
# train_K = 30  # Number of links and receivers
# Nt = 10  # Number of antennas
#train_config = init_parameters(train_K, Nt)  # Initializing parameters
#print(f"Number of links for the network is {train_config.n_links} and the number of receivers is {train_config.n_receiver}")


The **MLP** function constructs a **multi-layer perceptron (MLP)** neural network using PyTorch's **Sequential module**.
Let as **MLP(x)** represent the output of the MLP neural network for input *x*. The function consists of multiple linear transformation and activation layers: <br>

**1. Linear Transformation:** 
>For each layer $l_{l}$ in the MLP with input dimension $d_{l-1}$ and output dimension $d_{l}$, a linear transformation is applied: <br><br>
$ \boxed{\mathrm{Linear}(X(t)) = \mathrm W_{l}\cdot  \mathrm X(t) + \mathrm B_{l}} $. <br><br>
 where $ W_{l} $ is the weight matrix and $ B_{l} $ is the bias vector for layer $ l $. <br>

**2. Activation Function (ReLU):**
>The **Rectified Linear Unit (ReLU)** activation function is a simple yet widely used non-linear activation function in neural networks. It's defined mathematically as: <br><br>
$ \boxed{ReLU(x)=Max(0,x)} $ <br><br>
>In other words, ReLU sets all negative values in the input tensor x to zero, while leaving positive values unchanged. This leads to sparse >activation, which helps alleviate the vanishing gradient problem during training and encourages sparse representations in neural networks.
In neural network architectures, ReLU is typically used as the activation function in hidden layers, while other activation functions like softmax or sigmoid are used in output layers for specific tasks like classification or regression.

**Batch Normalization (optional):**
If batch normalization is enabled, a Batch Normalization (BN) layer is applied after each linear transformation to normalize the activations:<br><br>
>* **Normalization:** &nbsp; For each mini-batch during training, batch normalization normalizes the activations of each layer by subtracting the mini-batch mean and dividing by the mini-batch standard deviation. This is performed independently for each feature dimension. <br>
>* **Scaling and Shifting:** &nbsp; After normalization, the activations are scaled and shifted using learnable parameters (gamma and beta) to allow the network to learn the optimal scale and shift for each feature dimension. <br>

>Mathematically, batch normalization can be defined as follows: <br><br>
$ \boxed{BN(t) = \gamma \frac{t-μ}{\sigma} + \beta} $
>Where:
>* t => the input tensor.
>* μ => the mean of the mini-batch.
>* σ => the standard deviation of the mini-batch.
>* γ => learnable scale
>* β => shift parameters.


In [16]:
def MLP(channels, batch_norm=True):
    return Seq(*[
        Seq(Lin(channels[i - 1], channels[i]), ReLU())#, BN(channels[i])
        for i in range(1, len(channels))
    ])    

*This **class IGConv**, appears to be a **Graph Neural Network (GNN)** layer implemented using **PyTorch's** Geometric library. Let's break down its components and functionality:*

> **Initialization:**
>> Inherits from MessagePassing, which is a base class provided by PyTorch Geometric for implementing graph neural network layers. Accepts two parameters mlp1 and mlp2, which are presumably multi-layer perceptron (MLP) modules used for message passing and node feature update, respectively.Calls the superclass constructor with aggr='max' and any additional keyword arguments.

> **Reset Parameters:**
>> Defines a method reset_parameters to initialize the parameters of the MLP modules.

> **Update Function:**
>> Defines an update method that computes the new node features based on aggregated messages and the current node features. Concatenates the aggregated messages (aggr_out) with the current node features (x). Passes the concatenated tensor through mlp2 and performs some normalization (nor) on the output. Concatenates the normalized output with a subset of the original node features. Returns the updated node features.

> **Forward Function:**
>> Defines a forward method that takes node features (x), edge indices (edge_index), and edge attributes (edge_attr) as input. Reshapes the input tensors if they are one-dimensional. Calls the propagate method inherited from MessagePassing, which applies message passing to the graph.

> **Message Function:**
>> Defines a message method that computes messages sent from source nodes to target nodes. Concatenates the target node features (x_j) with edge attributes (edge_attr). Passes the concatenated tensor through mlp1 to compute the messages.

> **Representation Function:**
>>Overrides the __repr__ method to provide a string representation of the class instance, showing the class name and the parameters mlp1 and mlp2.

*Overall, this class defines a graph convolutional layer (IGConv) that utilizes two MLP modules (mlp1 and mlp2) for message passing and node feature update in a graph neural network. It follows the message-passing paradigm commonly used in GNNs, where information is exchanged between neighboring nodes to update node representations.*


In [17]:
class IGConv(MessagePassing):
    def __init__(self, mlp1, mlp2, **kwargs):
        super(IGConv, self).__init__(aggr='max', **kwargs)

        self.mlp1 = mlp1
        self.mlp2 = mlp2
        #self.reset_parameters()

    def reset_parameters(self):
        reset(self.mlp1)
        reset(self.mlp2)
        
    def update(self, aggr_out, x):
        tmp = torch.cat([x, aggr_out], dim=1)
        comb = self.mlp2(tmp)
        nor = torch.sqrt(torch.sum(torch.mul(comb,comb),axis=1))
        nor = nor.unsqueeze(axis=-1)
        comp1 = torch.ones(comb.size(), device=device)
        comb = torch.div(comb,torch.max(comp1,nor) )
        return torch.cat([comb, x[:,:2*Nt]],dim=1)
        
    def forward(self, x, edge_index, edge_attr):
        x = x.unsqueeze(-1) if x.dim() == 1 else x
        edge_attr = edge_attr.unsqueeze(-1) if edge_attr.dim() == 1 else edge_attr
        return self.propagate(edge_index, x=x, edge_attr=edge_attr)

    def message(self, x_i, x_j, edge_attr):
        tmp = torch.cat([x_j, edge_attr], dim=1)
        agg = self.mlp1(tmp)
        return agg

    def __repr__(self):
        return '{}(nn={})'.format(self.__class__.__name__, self.mlp1,self.mlp2)

*This code defines a neural network model called IGCNet using PyTorch.*

**Initialization:** 
* The IGCNet class is defined, which is a subclass of torch.nn.Module, indicating that it's a neural network model.
* Inside the **\__init__** method, the initial setup of the model is defined.
* The **super(IGCNet, self).\__init__()** line initializes the parent class (torch.nn.Module) to ensure proper inheritance.

**MLP Modules Initialization:**
* Two multi-layer perceptron (MLP) modules (**self.mlp1** and **self.mlp2**) are initialized using the MLP function with specific layer configurations.
* **self.mlp1** has layers with input size 6*Nt, output size 64, and 64.
* **self.mlp2** has layers with input size 64+4*Nt and output size 32.

**Sequential Composition:**
* The self.mlp2 is modified by adding a linear layer (Lin) with output size 2*Nt at the end using the Seq function.

**IGConv Layer Initialization:**
* An instance of the IGConv class is created, which is presumably a graph convolutional layer.
* This layer is initialized with the previously defined MLP modules (self.mlp1 and self.mlp2).

**Forward Pass:**
* The forward method defines how data flows through the model during the forward pass.
* Input data (x0, edge_attr, edge_index) is passed through the graph convolutional layer (self.conv) three times (x1, x2, out).
* Each time, the output of the previous pass is used as input (x) along with edge attributes and indices.

*In summary, the IGCNet model consists of two MLP modules (mlp1 and mlp2), which are then used within a graph convolutional layer (IGConv). During the forward pass, input data is passed through the graph convolutional layer multiple times to generate the final output*

In [18]:
class IGCNet(torch.nn.Module):
    def __init__(self):
        super(IGCNet, self).__init__()

        self.mlp1 = MLP([6*Nt, 64, 64])
        self.mlp2 = MLP([64+4*Nt, 32])
        self.mlp2 = Seq(*[self.mlp2,Seq(Lin(32, 2*Nt))])
        self.conv = IGConv(self.mlp1,self.mlp2)

    def forward(self, data):
        x0, edge_attr, edge_index = data.x, data.edge_attr, data.edge_index
        x1 = self.conv(x = x0, edge_index = edge_index, edge_attr = edge_attr)
        x2 = self.conv(x = x1, edge_index = edge_index, edge_attr = edge_attr)
        out = self.conv(x = x2, edge_index = edge_index, edge_attr = edge_attr)
        return out

### Normalization Process

*This equation represents the normalization process for both training and test data. Each element of the channel matrix is normalized separately using its respective mean and standard deviation. Finally, the normalized diagonal and off-diagonal elements are combined to obtain the final normalized channel matrix.*

Given a channel matrix $H$ of size $M \times M$, where $M$ is the number of antennas:

1. **Initialization**:
   - $H$ is the input channel matrix.
   - $N_t$ is the number of antennas.

2. **Mask Creation**:
   - Let mask $\text{M}_{ij}$ be the mask indicating whether element $H_{ij}$ is on the diagonal or off-diagonal. $\text{mask}_{ij} = 1$ if $i = j$, and $\text{mask}_{ij} = 0$ otherwise.

3. **Diagonal Normalization**:
   - Calculate the mean $\mu_{\text{diag}}$ and standard deviation $\sigma_{\text{diag}}$ of the diagonal elements:
     $\boxed{\mu_{\text{diag}} = \frac{1}{N_t} \sum_{i=1}^{N_t} H_{ii}}$ <br>
     $\boxed{\sigma_{\text{diag}} = \sqrt{\frac{1}{N_t} \sum_{i=1}^{N_t} (H_{ii} - \mu_{\text{diag}})^2}}$ 
   - Normalize the diagonal elements:
     $\boxed{\tilde{H}_{ij} = \frac{H_{ij} - \mu_{\text{diag}}}{\sigma_{\text{diag}}} \quad \text{if} \quad i = j}$

4. **Off-Diagonal Normalization**:
   - Calculate the mean $\mu_{\text{off-diag}}$ and standard deviation $\sigma_{\text{off-diag}}$ of the off-diagonal elements:
     $\mu_{\text{off-diag}} = \frac{1}{M - N_t} \sum_{i=1}^{M} \sum_{j=1}^{M} (1 - \text{mask}_{ij}) H_{ij}$
     $\sigma_{\text{off-diag}} = \sqrt{\frac{1}{M - N_t} \sum_{i=1}^{M} \sum_{j=1}^{M} (1 - \text{mask}_{ij}) (H_{ij} - \mu_{\text{off-diag}})^2}$$
   - Normalize the off-diagonal elements:
     $\tilde{H}_{ij} = \frac{H_{ij} - \mu_{\text{off-diag}}}{\sigma_{\text{off-diag}}} \quad \text{if} \quad i \neq j$

5. **Combining Diagonal and Off-Diagonal**:
   - The final normalized channel matrix $\tilde{H}$ is obtained by combining the normalized diagonal and off-diagonal elements:
     $ \tilde{H}_{ij} = \begin{cases} \frac{H_{ij} - \mu_{\text{diag}}}{\sigma_{\text{diag}}} & \text{if } i = j \\ \frac{H_{ij} - \mu_{\text{off-diag}}}{\sigma_{\text{off-diag}}} & \text{if } i \neq j \end{cases} $

This Markdown representation provides a concise summary of the normalization process using mathematical equations with summation notation. Each step is explained in detail, making it easier to understand and implement.


In [19]:
def normalize_data(train_data, test_data, general_para):
    Nt = general_para.N_antennas
    
    tmp_mask = np.expand_dims(np.eye(train_K),axis=-1)
    tmp_mask = [tmp_mask for i in range(Nt)]
    mask = np.concatenate(tmp_mask,axis=-1)
    mask = np.expand_dims(mask,axis=0)
    
    train_copy = np.copy(train_data)
    diag_H = np.multiply(mask,train_copy)
    diag_mean = np.sum(diag_H/Nt)/train_layouts/train_K
    diag_var = np.sqrt(np.sum(np.square(diag_H))/train_layouts/train_K/Nt)
    tmp_diag = (diag_H - diag_mean)/diag_var

    off_diag = train_copy - diag_H
    off_diag_mean = np.sum(off_diag/Nt)/train_layouts/train_K/(train_K-1)
    off_diag_var = np.sqrt(np.sum(np.square(off_diag))/Nt/train_layouts/train_K/(train_K-1))
    tmp_off = (off_diag - off_diag_mean)/off_diag_var
    tmp_off_diag = tmp_off - np.multiply(tmp_off,mask)
    
    norm_train = np.multiply(tmp_diag,mask) + tmp_off_diag
    
    # normlize test
    tmp_mask = np.expand_dims(np.eye(test_K),axis=-1)
    tmp_mask = [tmp_mask for i in range(Nt)]
    mask = np.concatenate(tmp_mask,axis=-1)
    mask = np.expand_dims(mask,axis=0)
    
    test_copy = np.copy(test_data)
    diag_H = np.multiply(mask,test_copy)
    tmp_diag = (diag_H - diag_mean)/diag_var
    
    off_diag = test_copy - diag_H
    tmp_off = (off_diag - off_diag_mean)/off_diag_var
    tmp_off_diag = tmp_off - np.multiply(tmp_off,mask)
    
    norm_test = np.multiply(tmp_diag,mask) + tmp_off_diag
    print(diag_mean, diag_var, off_diag_mean, off_diag_var)
    return norm_train, norm_test


### Building Graph from Channel State Information (CSI)

The `build_graph` function constructs a graph representation from CSI, distances, and other parameters.

#### Input Parameters:

- `CSI`: Channel State Information matrix.
- `dist`: Distance matrix.
- `K`: Number of users.
- `threshold`: Threshold value for distance filtering.

#### Derivation:

- `n`  : CSI matrix $[H]_{r \times c}$ size
- `N_t`: number of antennas.

##### Extract real and imaginary parts:
$\boxed{ X = (\text{[CSI]} \times \text{[I]})^{T} }               $
- $Q = \text{imag}(X)$, &nbsp; $I = \text{real}(X)                $<br> <br>

##### Create feature vector:
$\boxed{ X = [\frac{1}{\sqrt{N_t}}(\mathbf{1}_{n,2N_t}), Q, I] }  $

##### Filter distances:
$\boxed{ \text{D}         = \text{d} + (1000 \cdot (\text{[d]}\times \text{[I]}))^{T} } $ <br><br>

$\boxed{\text{D}_{ij}     = \begin{cases}
                            0 & \text{if } \text{D}_{ij} > \text{threshold} 
                            \\ 
                            \text{D}_{ij} & \text{otherwise} \end{cases} }              $ <br><br>
                            
$\boxed{\text{E}          = \begin{cases}
                            [\text{i}, \text{j}] & \text{if} \text{D}_{i,j} \neq 0
                            \\
                            0 & \text{otherwise} \end{cases}} $ <br><br>
                            
$\boxed{\text{A}          = [\text{E(1)}, \text{E(0)}]}              $ <br><br>
$\boxed{\text{HH} = \text{I} + \text{j} \text{Q}  }         $ <br><br>
$\boxed{\text{Data} = [\text{X}, \text{E}, \text{A}]}                 $
- `X`: Complex data from CSI
- `E`: Edge Indeces
- `A`: Attributes of the edge index
          
This provides a mathematical overview of the `build_graph` function, explaining each step with equations and operations performed.


In [20]:
def build_graph(CSI, dist, norm_csi_real, norm_csi_imag, K, threshold):
    n = CSI.shape[0]
    Nt = CSI.shape[2]
    x1 = np.array([CSI[ii,ii,:] for ii in range(K)])
    x2 = np.imag(x1)
    x1 = np.real(x1)
    x3 = 1/np.sqrt(Nt)*np.ones((n,2*Nt))
    
    x = np.concatenate((x3,x1,x2),axis=1)
    x = torch.tensor(x, dtype=torch.float)
    
    
    dist2 = np.copy(dist)
    mask = np.eye(K)
    diag_dist = np.multiply(mask,dist2)
    dist2 = dist2 + 1000 * diag_dist
    dist2[dist2 > threshold] = 0
    attr_ind = np.nonzero(dist2)
    
    edge_attr_real = norm_csi_real[attr_ind]
    edge_attr_imag = norm_csi_imag[attr_ind]
    
    edge_attr = np.concatenate((edge_attr_real,edge_attr_imag), axis=1)
    edge_attr = torch.tensor(edge_attr, dtype=torch.float)
    
    attr_ind = np.array(attr_ind)
    adj = np.zeros(attr_ind.shape)
    adj[0,:] = attr_ind[1,:]
    adj[1,:] = attr_ind[0,:]
    edge_index = torch.tensor(adj, dtype=torch.long)
    
    H1 = np.expand_dims(np.real(CSI),axis=-1)
    H2 = np.expand_dims(np.imag(CSI),axis=-1)
    HH = np.concatenate((H1,H2),axis=-1)
    y = torch.tensor(np.expand_dims(HH,axis=0), dtype=torch.float)
    data = Data(x=x, edge_index=edge_index.contiguous(),edge_attr = edge_attr, y = y)
    return data

In [21]:
def proc_data(HH, dists, norm_csi_real, norm_csi_imag, K):
    n = HH.shape[0]
    data_list = []
    for i in range(n):
        data = build_graph(HH[i,:,:,:],dists[i,:,:], norm_csi_real[i,:,:,:], norm_csi_imag[i,:,:,:], K,500)
        data_list.append(data)
    return data_list

The function iterates over each instance of the CSI matrix, and for each instance iteration, it computes Y using the function np_WMMSE_vectornp_WMMSE_vector, which calculates the Weighted Minimum Mean Squared Error (WMMSE) solution for the given CSI and noise variance.
The output Y contains the result of the WMMSE algorithm applied to each instance of the CSI matrix.

In [22]:
def batch_wmmse(csis,var_noise):
    Nt = test_config.N_antennas
    K = test_config.n_receiver
    n = csis.shape[0]
    Y = np.zeros( (n,K,Nt),dtype=complex)
    Pini = 1/np.sqrt(Nt)*np.ones((K,Nt),dtype=complex)
    for ii in range(n):
        Y[ii,:,:] = np_WMMSE_vector(np.copy(Pini), csis[ii,:,:,:], 1, var_noise)
    return Y

The `power_check` function checks the power constraint for a given power allocation matrix.
#### 1. Input Parameters:
   - `p`: Power allocation matrix.

#### 2.Compute Power Constraints:
   - Compute the squared norm of each row in the power allocation matrix: <br>
     $ \boxed{\text{p} = \sum_{i=1}^{n} p_i^{2}} $
   - Count the number of rows where the squared norm exceeds the threshold of `1.1`.

This function is used to verify whether the power allocation matrix satisfies the power constraints.


In [23]:
def power_check(p):
    n = p.shape[0]
    pp = np.sum(np.square(p),axis=1)
    print(np.sum(pp>1.1))

### Sum Rate Loss Function

The `sr_loss` function computes the sum rate loss based on the received power and interference of the channels.

#### Input Parameters:
- `H_1`: Tensor representing the channel coefficients for the first antenna.
- `H_2`: Tensor representing the channel coefficients for the second antenna.
- `p_1`: Power allocation tensor for the first antenna.
- `p_2`: Power allocation tensor for the second antenna.
- `K`: Number of users.
- `N`: Number of antennas.

#### Procedure:

1. **Received Power Calculations**:
   - Compute received power for the first antenna:  <br><br>
     $ \boxed{\text{rx\_power1} = \sum_{i=1}^{K} \sum_{j=1}^{N} H_{i,j} \cdot p1_{i,j} } $ <br><br>
   - Compute received power for the second antenna: <br><br>
     $ \boxed{\text{rx\_power2} = \sum_{i=1}^{K} \sum_{i=1}^{N} H_{i,j} \cdot p2_{i,j} } $ <br><br>
   - Compute cross-channel interference: <br><br>
     $ \boxed{\text{rx\_power3} = \sum_{i=1}^{K} \sum_{j=1}^{N} H_{i,j} \cdot p1_{i,j} } $ <br><br>
     $ \boxed{\text{rx\_power4} = \sum_{i=1}^{K} \sum_{j=1}^{N} H_{i,j} \cdot p2_{i,j} } $ <br><br>

2. **Received Power and Interference**:
   - Combine received power and interference: <br><br>
     $ \boxed{\text{signal\_power} = (\text{rx\_power1} - \text{rx\_power2})^2 + (\text{rx\_power3} + \text{rx\_power4})^2} $ <br><br>
   - Separate valid received power and interference using a mask: <br><br>
     $ \boxed{\text{abs\_power}   = \sum_{i=1}^{K} (\text{signal\_power}_{i,i})} $ <br><br>
     $ \boxed{\text{interference} = \sum_{i=1}^{K} \sum_{j = 0}^{N} (\text{signal\_power}_{i,j \neq i}) + 1} $ <br><br>

3. **Rate Calculation**:
   - Compute the achievable rate for each user: <br>
     $ \boxed{\text{rate}_i = \log_2 \left(1 + \frac{\text{abs\_power}_i}{\text{interference}_i} \right)} $ <br><br>
   - Compute the sum rate: <br><br>
     $ \boxed{\text{sum\_rate} = \frac{1}{K} \sum_{i=1}^{K} (\text{rate}_i)} $ <br><br>

4. **Loss Calculation**:
   - Compute the negative sum rate loss: <br><br>
     $ \boxed{\text{loss} = -\text{sum\_rate} } $ <br><br>

#### Output:

The function returns the sum rate loss as the final output.


In [24]:
def sr_loss(data,p,K,N):
    # H1 K*K*N
    # p1 K*N
    H1 = data.y[:,:,:,:,0]
    H2 = data.y[:,:,:,:,1]
    p1 = p[:,:N]
    p2 = p[:,N:2*N]
    p1 = torch.reshape(p1,(-1,K,1,N))
    p2 = torch.reshape(p2,(-1,K,1,N))
    
    rx_power1 = torch.mul(H1, p1)
    rx_power1 = torch.sum(rx_power1,axis=-1)

    rx_power2 = torch.mul(H2, p2)
    rx_power2 = torch.sum(rx_power2,axis=-1)

    rx_power3 = torch.mul(H1, p2)
    rx_power3 = torch.sum(rx_power3,axis=-1)

    rx_power4 = torch.mul(H2, p1)
    rx_power4 = torch.sum(rx_power4,axis=-1)

    rx_power = torch.mul(rx_power1 - rx_power2,rx_power1 - rx_power2) + torch.mul(rx_power3 + rx_power4,rx_power3 + rx_power4)
    mask = torch.eye(K, device = device)
    valid_rx_power = torch.sum(torch.mul(rx_power, mask), axis=1)
    interference = torch.sum(torch.mul(rx_power, 1 - mask), axis=1) + 1
    rate = torch.log2(1 + torch.div(valid_rx_power, interference))
    sum_rate = torch.mean(torch.sum(rate, axis=1))
    loss = torch.neg(sum_rate)
    return loss


### Training Function

The `train` function trains a neural network model using the specified optimizer and loss function.

1. **Model Preparation**:
   - Sets the model to training mode.

2. **Loss Calculation Loop**:
   - Iterates over the training data batches provided by `train_loader`.
   - Transfers the data to the specified device (e.g., GPU).
   - Resets the gradients of the optimizer.
   - Passes the data through the model to obtain predictions.
   - Calculates the loss using the `sr_loss` function, which computes the sum rate loss based on the received power and interference.
   - Backpropagates the loss to compute gradients.
   - Updates the total loss by adding the loss multiplied by the number of graphs (data instances) in the batch.
   - Performs optimization by calling `optimizer.step()` to update the model parameters based on the computed gradients.

3. **Loss Calculation**:
   - After processing all batches, computes the average loss per layout by dividing the total loss by the number of layouts.

4. **Return**:
   - Returns the average loss per layout as the output of the function.


In [25]:
def train():
    model.train()

    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = sr_loss(data,out,train_K,Nt)
        loss.backward()
        total_loss += loss.item() * data.num_graphs
        optimizer.step()
    return total_loss / train_layouts

### Testing Function

The `test` function evaluates the neural network model on the test dataset.
1. **Model Evaluation**:
   - Sets the model to evaluation mode.

2. **Loss Calculation Loop**:
   - Iterates over the test data batches provided by `test_loader`.
   - Transfers the data to the specified device (e.g., GPU).
   - Disables gradient computation using `torch.no_grad()` to speed up computation and save memory.
   - Records the start time before model inference.
   - Passes the data through the model to obtain predictions.
   - Records the end time after model inference and prints the time taken.
   - Calculates the loss using the `sr_loss` function, which computes the sum rate loss based on the received power and interference.
   - Updates the total loss by adding the loss multiplied by the number of graphs (data instances) in the batch.

3. **Loss Calculation**:
   - After processing all batches, computes the average loss per layout by dividing the total loss by the number of layouts.

4. **Return**:
   - Returns the average loss per layout as the output of the function.


In [26]:
def test():
    model.eval()

    total_loss = 0
    for data in test_loader:
        data = data.to(device)
        with torch.no_grad():
            start = time.time()
            out = model(data)
            end = time.time()
            print('CGCNet time:', end-start)
            loss = sr_loss(data,out,test_K,Nt)
            total_loss += loss.item() * data.num_graphs
            #power = out[:,:2*Nt]
            #Y = power.numpy()
            #power_check(Y)
    
    return total_loss / test_layouts

In [27]:
Nt  = 2
var = 1

test_K  = 30
train_K = 30
train_layouts = 20
test_layouts  = 10

train_config = init_parameters(train_K, Nt)
test_config  = init_parameters(test_K, Nt)

(train_dists, train_csis) = sample_generate(train_config, train_layouts)
(test_dists, test_csis)   = sample_generate(test_config, test_layouts)

(train_csi_real, train_csi_imag) = np.real(train_csis), np.imag(train_csis)
(test_csi_real, test_csi_imag)   = np.real(test_csis), np.imag(test_csis)


(norm_train_real, norm_test_real) = normalize_data(train_csi_real, test_csi_real, train_config)
(norm_train_imag, norm_test_imag) = normalize_data(train_csi_imag, test_csi_imag, train_config)

start = time.time()
Y = batch_wmmse(test_csis.transpose(0,2,1,3),var)
end = time.time()
print('WMMSE time:',end-start)
sr = IC_sum_rate( test_csis,Y,var)
print('WMMSE rate:',sr)

train_data_list = proc_data(train_csis, train_dists, norm_train_real, norm_train_imag, train_K)
test_data_list  = proc_data(test_csis, test_dists, norm_test_real, norm_test_imag,  test_K)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Initialize device
model  = IGCNet().to(device) # Initialize model

optimizer = torch.optim.Adam(model.parameters(), lr=0.001) # Initialize optimizer
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.9) # Initialize scheduler

print(f"Device: {device}")
print(f"Model: {model.__class__.__name__}")
print(f"Optimizer: Adam")
print(f"Scheduler: StepLR(step_size=20, gamma=0.9)")

train_loader = DataLoader(train_data_list, batch_size=64, shuffle=True,num_workers=0)
test_loader  = DataLoader(test_data_list, batch_size=test_layouts, shuffle=False, num_workers=0)

print("Data Loading completed ....")
# Iterate over both train_loader and test_loader
for loader_name, loader in [("train_loader", train_loader), ("test_loader", test_loader)]:
    # Iterate over the DataLoader and print the size of each batch
    for i, batch in enumerate(loader, 1):
        batch_size = batch.num_graphs  # Get the number of graphs in the batch
        print(f"{loader_name}: Batch {i}: Size = {batch_size}")

for epoch in range(1, 20):
    loss1 = train()
    
    loss2 = test()
    print('Epoch {:03d}, Train Loss: {:.4f}, Val Loss: {:.4f}'.format(
        epoch, loss1, loss2))
    scheduler.step()
    
density   = test_config.field_length**2/test_K
gen_tests = [40, 80, 160]
for test_K in gen_tests:
    test_layouts = 50
    test_config = init_parameters(test_K, Nt)
    field_length = int(np.sqrt(density*test_K))
    test_config.field_length = field_length
    test_dists, test_csis    = sample_generate(test_config, test_layouts)
    print('test size', test_csis.shape,field_length)

    start = time.time()
    start = time.time()
    Y = batch_wmmse(test_csis.transpose(0,2,1,3),var)
    end = time.time()
    print('WMMSE time:',end-start)
    sr = IC_sum_rate( test_csis,Y,var)
    print('WMMSE rate:',sr)

    test_csi_real, test_csi_imag = np.real(test_csis), np.imag(test_csis)
    _, norm_test_real = normalize_data(train_csi_real,test_csi_real, train_config)
    _, norm_test_imag = normalize_data(train_csi_imag,test_csi_imag, test_config)

    test_data_list = proc_data(test_csis, test_dists, norm_test_real, norm_test_imag,  test_K)
    test_loader = DataLoader(test_data_list, batch_size=test_layouts, shuffle=False, num_workers=1)
    loss2 = test()
    print('CGCNet rate:',loss2)

<<<<<<<<<<<<<20 layouts: 30_links_1000X1000_2_65_length>>>>>>>>>>>>
<<<<<<<<<<<<<10 layouts: 30_links_1000X1000_2_65_length>>>>>>>>>>>>
0.8329697004688061 44.13261591703872 0.008220171157523819 3.459562246262484
0.7958741830422255 51.05758305063691 0.0015287609592706966 3.871541301572302
WMMSE time: 2.5465402603149414
WMMSE rate: 108.32834655515748
Device: cpu
Model: IGCNet
Optimizer: Adam
Scheduler: StepLR(step_size=20, gamma=0.9)
Data Loading completed ....
train_loader: Batch 1: Size = 20
test_loader: Batch 1: Size = 10
CGCNet time: 0.003065824508666992
Epoch 001, Train Loss: -50.1422, Val Loss: -63.8789
CGCNet time: 0.004076480865478516
Epoch 002, Train Loss: -52.0666, Val Loss: -65.1230
CGCNet time: 0.004507303237915039
Epoch 003, Train Loss: -53.1808, Val Loss: -66.0039
CGCNet time: 0.0036160945892333984
Epoch 004, Train Loss: -54.1842, Val Loss: -66.6687
CGCNet time: 0.004757881164550781
Epoch 005, Train Loss: -55.2323, Val Loss: -67.2583
CGCNet time: 0.006839275360107422
Epoch 