In [82]:
%run IsWrls_eMMB_WmmsePowerControl.ipynb

import numpy as np
import torch

from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
from torch_geometric.nn.conv import MessagePassing
from torch.nn import Sequential as Seq, Linear as Lin, ReLU, Sigmoid, BatchNorm1d as BN

1. **Class Definition:**
   - Define a class named `IGConv` which inherits from `MessagePassing`.

2. **Constructor (`__init__` method):**
   - Initialize the message passing aggregation method to 'max' and other keyword arguments.
   - Define two multi-layer perceptrons (MLPs) named `mlp1` and `mlp2`.
   - Call the constructor of the superclass (`MessagePassing`).

3. **Parameter Initialization (`reset_parameters` method):**
   - Reset the parameters of `mlp1` and `mlp2`.

4. **Update Function (`update` method):**
   - Concatenate the input features (`x`) and aggregated neighbor features (`aggr_out`).
   - Pass the concatenated features through `mlp2` to obtain combined features.
   - Concatenate the original features (`x[:,:2]`) with the combined features (`comb`) along the feature dimension.

5. **Forward Pass (`forward` method):**
   - Ensure that input features (`x`) and edge attributes (`edge_attr`) have the correct dimensions.
   - Use the `propagate` method to perform message passing based on the given edge indices (`edge_index`), input features (`x`), and edge attributes (`edge_attr`).

6. **Message Function (`message` method):**
   - Concatenate the features of the receiving node (`x_j`) with the edge attributes (`edge_attr`) and pass them through `mlp1` to obtain aggregated messages.

7. **Representation Function (`__repr__` method):**
   - Return a string representation of the class name (`IGConv`) along with the parameters `mlp1` and `mlp2`.

8. **Mathematical Formulas:**
   - Let $x_i$ and $x_j$ be the features of the sending and receiving nodes, respectively.
   - Let $\text{edge\_attr}$ be the edge attributes.
   - $\text{mlp1}$ and $\text{mlp2}$ represent the multi-layer perceptron layers.
   - $\text{agg}$ represents the aggregated messages obtained after passing through $\text{mlp1}$.
   - $\text{comb}$ represents the combined features obtained after passing through $\text{mlp2}$.
   - $\text{aggr\_out}$ represents the aggregated neighbor features after message passing.

9. **Model Overview:**
   - The class implements a graph convolutional layer using message passing.
   - It utilizes two multi-layer perceptrons (`mlp1` and `mlp2`) to aggregate messages and update node features.
   - The `update` method combines the original node features with the aggregated features to produce updated node representations.


In [83]:
class IGConv(MessagePassing):
    def __init__(self, mlp1, mlp2, **kwargs):
        super(IGConv, self).__init__(aggr='max', **kwargs)

        self.mlp1 = mlp1
        self.mlp2 = mlp2
        #self.reset_parameters()

    def reset_parameters(self):
        reset(self.mlp1)
        reset(self.mlp2)
        
    def update(self, aggr_out, x):
        tmp = torch.cat([x, aggr_out], dim=1)
        comb = self.mlp2(tmp)
        return torch.cat([x[:,:2], comb],dim=1)
        
    def forward(self, x, edge_index, edge_attr):
        x = x.unsqueeze(-1) if x.dim() == 1 else x
        edge_attr = edge_attr.unsqueeze(-1) if edge_attr.dim() == 1 else edge_attr
        return self.propagate(edge_index, x=x, edge_attr=edge_attr)

    def message(self, x_i, x_j, edge_attr):
        tmp = torch.cat([x_j, edge_attr], dim=1)
        agg = self.mlp1(tmp)
        return agg

    def __repr__(self):
        return '{}(nn={})'.format(self.__class__.__name__, self.mlp1,self.mlp2)

1. **Input Parameters:**
   - `channels`: List of integers representing the number of input, hidden, and output units in each layer of the MLP.
   - `batch_norm`: Boolean indicating whether batch normalization should be applied after each linear layer.

2. **Multi-Layer Perceptron (MLP) Construction:**
   - Define a function named `MLP` that takes the list of `channels` as input and returns a sequential composition of linear layers followed by activation functions (ReLU).
   - For each layer $i$ in the range from 1 to the length of `channels`, excluding the first element:
     - Create a linear transformation (`Lin`) from the previous layer's output size (`channels[i - 1]`) to the current layer's output size (`channels[i]`).
     - Apply the rectified linear unit (ReLU) activation function to introduce non-linearity.
     - Optionally, apply batch normalization (BN) after each linear layer if `batch_norm` is set to `True`.

3. **Mathematical Formulas:**
   - Let `channels = [n_0, n_1, ..., n_L]`, where $n_0$ is the input size, $n_L$ is the output size, and $n_i$ represents the number of units in hidden layer $i$.
   - Define `Lin(n_{i-1}, n_i)` as a linear transformation from layer $i-1$ to layer $i$.
   - Apply the rectified linear unit (ReLU) activation function: $\text{ReLU}(x) = \max(0, x)$.
   - Optionally, apply batch normalization (BN) after each linear layer to normalize the activations.

4. **Model Overview:**
   - The function constructs a multi-layer perceptron (MLP) neural network architecture with fully connected layers and ReLU activation functions.
   - The number of layers and units in each layer are specified by the `channels` parameter.
   - Batch normalization can be optionally included after each linear layer if `batch_norm` is set to `True`.


In [84]:
def MLP(channels, batch_norm=True):
    return Seq(*[
        Seq(Lin(channels[i - 1], channels[i], bias = True), ReLU())#, BN(channels[i]))
        for i in range(1, len(channels))
    ])

1. **Class Definition:**
   - Define a class named `IGCNet` which inherits from `torch.nn.Module`.

2. **Constructor (`__init__` method):**
   - Initialize the module using `super(IGCNet, self).__init__()`.
   - Define layers:
     - `mlp1`: Multi-layer perceptron (MLP) with input size 5, hidden layer sizes [16, 32].
     - `mlp2`: Another MLP with input size 35 and a single output neuron.
     - `conv`: Instance of `IGConv` with `mlp1` and `mlp2` as parameters.

3. **Forward Pass (`forward` method):**
   - Accepts a `data` object containing features, edge attributes, and edge indices.
   - Extract features (`x0`), edge attributes (`edge_attr`), and edge indices (`edge_index`) from the `data` object.
   - Perform convolution operations using the `conv` layer:
     - Pass the input features, edge attributes, and edge indices through the convolutional layer (`self.conv`).
     - Repeat the convolution operation twice (`x1` and `x2`) for feature refinement.
   - Return the final output (`out`) obtained after multiple convolution operations.

4. **Mathematical Formulas:**
   - Let $x_0$ be the input feature matrix.
   - Let $\text{edge\_attr}$ be the edge attribute matrix.
   - Let $\text{edge\_index}$ be the edge index tensor.
   - $\text{mlp1}$ and $\text{mlp2}$ represent the multi-layer perceptron layers.
   - $\text{conv}$ represents the graph convolutional layer (`IGConv`).
   - $x_1$ and $x_2$ represent the output features after the first and second convolutional layers, respectively.
   - $\text{out}$ represents the final output after multiple convolution operations.

5. **Model Overview:**
   - The model utilizes a graph convolutional neural network (GCN) architecture to process graph-structured data.
   - It applies multiple graph convolution operations to refine the node features and capture complex relationships within the graph.


In [85]:
class IGCNet(torch.nn.Module):
    def __init__(self):
        super(IGCNet, self).__init__()

        self.mlp1 = MLP([5, 16, 32])
        self.mlp2 = MLP([35, 16])
        self.mlp2 = Seq(*[self.mlp2,Seq(Lin(16, 1, bias = True), Sigmoid())])
        self.conv = IGConv(self.mlp1,self.mlp2)

    def forward(self, data):
        x0, edge_attr, edge_index = data.x, data.edge_attr, data.edge_index
        x1 = self.conv(x = x0, edge_index = edge_index, edge_attr = edge_attr)
        x2 = self.conv(x = x1, edge_index = edge_index, edge_attr = edge_attr)
        #x3 = self.conv(x = x2, edge_index = edge_index, edge_attr = edge_attr)
        #x4 = self.conv(x = x3, edge_index = edge_index, edge_attr = edge_attr)
        out = self.conv(x = x2, edge_index = edge_index, edge_attr = edge_attr)
        return out

1. **Input Parameter:**
   - $n$: Number of nodes in the graph.

2. **Generating Complete Graph:**
   - Initialize an empty list $\text{adj}$ to store the adjacency list representing the graph.
   - For each node $i$ from $0$ to $n-1$:
     - For each node $j$ from $0$ to $n-1$:
       - If $i \neq j$, indicating that the edge is not a self-loop:
         - Add the edge $(i, j)$ to the adjacency list $\text{adj}$.

3. **Output:**
   - $\text{adj}$: Adjacency list representing the complete graph, where each element is a pair of nodes representing an edge.


In [86]:
def get_cg(n):
    adj = []
    for i in range(0,n):
        for j in range(0,n):
            if(not(i==j)):
                adj.append([i,j])
    return adj

1. **Input Parameters:**
   - $H$: Channel gains matrix of shape $K \times K$, where $K$ is the number of users.
   - $A$: User-specific scaling factors matrix of shape $K \times 1$.
   - $\text{adj}$: Adjacency list representing the graph structure.

2. **Building Graph Representation:**
   - $n = H.shape[0]$ (number of nodes in the graph)
   - $x_1 = \text{np.expand\_dims}(\text{diag}(H), axis=1)$ (expand dimensions to represent diagonal elements of $H$)
   - $x_2 = \text{np.expand\_dims}(A, axis=1)$ (expand dimensions of $A$)
   - $x_3 = \text{np.ones}((K,1))$ (create a column vector of ones)
   - Concatenate $x_1$, $x_2$, and $x_3$ to form feature vectors $x$
   - Initialize an empty list $\text{edge\_attr}$ to store edge attributes
   - For each edge $e$ in the adjacency list $\text{adj}$:
     - Compute the edge attributes based on the corresponding channel gains in $H$ and add them to $\text{edge\_attr}$
   - Convert feature vectors $x$, edge indices, edge attributes, node labels $y$, and node positions $\text{pos}$ into PyTorch tensors
   - Create a `Data` object using the tensors to represent the graph data.

3. **Output:**
   - `data`: PyTorch `Data` object representing the graph, containing features, edge indices, edge attributes, node labels, and node positions.


In [87]:
def build_graph(H,A,adj):
    n = H.shape[0]
    x1 = np.expand_dims(np.diag(H),axis=1)
    x2 = np.expand_dims(A,axis=1)
    x3 = np.ones((K,1))
    edge_attr = []
    
    x = np.concatenate((x1,x2,x3),axis=1)
    for e in adj:
        edge_attr.append([H[e[0],e[1]],H[e[1],e[0]]])
    x = torch.tensor(x, dtype=torch.float)
    edge_index = torch.tensor(adj, dtype=torch.long)
    edge_attr = torch.tensor(edge_attr, dtype=torch.float)
    y = torch.tensor(np.expand_dims(H,axis=0), dtype=torch.float)
    pos = torch.tensor(np.expand_dims(A,axis=0), dtype=torch.float)
    
    data = Data(x=x, edge_index=edge_index.t().contiguous(),edge_attr = edge_attr, y = y, pos = pos)
    return data 

1. **Input Parameters:**
   - $HH$: Channel gains matrices of shape $n \times K \times K$, where $n$ is the number of samples, and $K$ is the number of users.
   - $AA$: User-specific scaling factors matrices of shape $n \times K$, where $n$ is the number of samples, and $K$ is the number of users.

2. **Processing Data:**
   - Initialize an empty list $\text{data\_list}$ to store processed data for each sample.
   - Obtain the channel gain graph ($cg$) using the function $\text{get\_cg}(K)$, where $K$ is the number of users.
   - For each sample $i$ from $1$ to $n$:
     - Build the graph representation of the channel gains and user-specific scaling factors using the function $\text{build\_graph}$.
     - Append the processed data (graph representation) to $\text{data\_list}$.

3. **Output:**
   - $\text{data\_list}$: List containing the processed data for each sample, where each element represents the graph representation of the channel gains and user-specific scaling factors.


In [88]:

def proc_data(HH,AA):
    n = HH.shape[0]
    data_list = []
    cg = get_cg(K)
    for i in range(n):
        data = build_graph(HH[i],AA[i],cg)
        data_list.append(data)
    return data_list

1. **Input Parameters:**
   - $H$: Channel gains matrix of shape $N \times K \times 1$, where $N$ is the number of samples, and $K$ is the number of users.
   - $p$: Transmit powers matrix of shape $N \times K \times 1 \times N$.
   - $\alpha$: User-specific weights matrix of shape $N \times K$.
   - $\text{var\_noise}$: Variance of the noise.

2. **Expanding Dimensions:**
   - Expand the dimensions of $H$ to include an additional singleton dimension at the end: $H = \text{np.expand\_dims}(H, axis=-1)$.

3. **Calculating Received Power:**
   - $\text{rx\_power} = H \cdot p$ (element-wise multiplication)
   - $\text{rx\_power} = \sum(\text{rx\_power}, axis=-1)$ (sum along the last dimension)
   - $\text{rx\_power} = |\text{rx\_power}|^2$ (square of absolute values)

4. **Calculating Valid and Interference Powers:**
   - $\text{mask} = \text{np.eye}(K)$ (identity matrix of size $K \times K$)
   - $\text{valid\_rx\_power} = \sum(\text{rx\_power} \cdot \text{mask}, axis=1)$ (sum of diagonal elements)
   - $\text{interference} = \sum(\text{rx\_power} \cdot (1 - \text{mask}), axis=1) + \text{var\_noise}$ (sum of off-diagonal elements)

5. **Calculating Rates:**
   - $\text{rate} = \log\left(1 + \frac{\text{valid\_rx\_power}}{\text{interference}}\right)$

6. **Weighted Rates:**
   - $\text{w\_rate} = \alpha \cdot \text{rate}$

7. **Calculating Sum Rate:**
   - $\text{sum\_rate} = \frac{1}{N} \sum(\sum(\text{w\_rate}, axis=1))$ (mean of sum of weighted rates across all samples)

8. **Output:**
   - $\text{sum\_rate}$: Average sum rate over all samples.


In [89]:
def np_sum_rate(H,p,alpha,var_noise):
    H = np.expand_dims(H,axis=-1)
    K = H.shape[1]
    N = H.shape[-1]
    p = p.reshape((-1,K,1,N))
    rx_power = np.multiply(H, p)
    rx_power = np.sum(rx_power,axis=-1)
    rx_power = np.square(abs(rx_power))
    mask = np.eye(K)
    valid_rx_power = np.sum(np.multiply(rx_power, mask), axis=1)
    interference = np.sum(np.multiply(rx_power, 1 - mask), axis=1) + var_noise
    rate = np.log(1 + np.divide(valid_rx_power, interference))
    w_rate = np.multiply(alpha,rate)
    sum_rate = np.mean(np.sum(w_rate, axis=1))
    return sum_rate

1. **Input Parameters:**
   - $X$: Input data matrix of shape $n \times K \times K$, where $n$ is the number of samples, and $K$ is the number of users.
   - $AAA$: User-specific scaling factors matrix of shape $n \times K$.
   - $label$: Label vector indicating the selected users for each sample.

2. **Initialization:**
   - $n = X.shape[0]$ (number of samples)
   - $\text{thd} = \frac{\sum \text{label}}{n}$ (threshold calculated based on the label vector)

3. **Greedy User Selection:**
   - Initialize $Y$ as a zero matrix of shape $n \times K$.
   - For each sample $ii$ from $1$ to $n$:
     - Extract the user-specific scaling factors $\alpha$ for the $ii$-th sample.
     - Calculate $H_{\text{diag}} = \alpha \cdot (\text{diagonal of } X[ii,:,:])^2$.
     - Sort the indices of $H_{\text{diag}}$ in descending order and store them in $xx$.
     - Select the top $\text{thd}$ indices from $xx$ and set the corresponding elements in $Y[ii,:]$ to 1.

4. **Output:**
   - $Y$: Matrix indicating the selected users for each sample, where each row represents a sample, and each column represents a user. A value of 1 indicates that the corresponding user is selected for the respective sample, while 0 indicates non-selection.


In [90]:
def simple_greedy(X,AAA,label):
    
    n = X.shape[0]
    thd = int(np.sum(label)/n)
    Y = np.zeros((n,K))
    for ii in range(n):
        alpha = AAA[ii,:]
        H_diag = alpha * np.square(np.diag(X[ii,:,:]))
        xx = np.argsort(H_diag)[::-1]
        for jj in range(thd):
            Y[ii,xx[jj]] = 1
    return Y

1. **Input Parameters:**
   - $K$: Number of users in the system.
   - $num_H$: Total number of training samples.
   - $var\_noise$: Variance of the noise.
   - $Pmin$: Minimum transmit power (default value is 0).
   - $seed$: Random seed for reproducibility (default value is 2017).

2. **Data Generation:**
   - Initialize: $P_{\text{max}} = 1$
   - Initialize: $P_{\text{ini}} = P_{\text{max}} \times \mathbf{1}_{\text{num\_H} \times K \times 1}$ (a matrix of shape $\text{num\_H} \times K \times 1$ with all elements equal to $P_{\text{max}}$)
   - Generate random channel gains:
     - Complex Gaussian Channel: $\mathbf{CH} = \frac{1}{\sqrt{2}} (\mathcal{N}(0, 1) + j \times \mathcal{N}(0, 1))$ (where $j$ is the imaginary unit)
     - Magnitude of Channel Gain: $\mathbf{H} = |\mathbf{CH}|$
   - Generate transmit powers using the WMMSE algorithm:
     - $\mathbf{Y} = \text{WMMSE}(P_{\text{ini}}, \alpha, \mathbf{H}, P_{\text{max}}, var\_noise)$
   - Additionally, generate a second set of transmit powers for comparison:
     - $\mathbf{Y2} = \text{WMMSE}(P_{\text{ini}}, \text{fake\_a}, \mathbf{H}, P_{\text{max}}, var\_noise)$

3. **Output:**
   - $\mathbf{H}$: Random channel gains matrix of shape $\text{num\_H} \times K \times K$.
   - $\mathbf{Y}$: Transmitted powers matrix obtained using WMMSE algorithm of shape $\text{num\_H} \times K$.
   - $\alpha$: Matrix of shape $\text{num\_H} \times K$ containing user-specific scaling factors (currently initialized to all ones).
   - $\mathbf{Y2}$: Transmitted powers matrix obtained using WMMSE algorithm with a fixed scaling factor ($\text{fake\_a}$) of shape $\text{num\_H} \times K$.

This function generates random channel gains and computes the transmit powers for the given number of training samples using the WMMSE algorithm.


In [91]:
def generate_wGaussian(K, num_H, var_noise=1, Pmin=0, seed=2017):
    print('Generate Data ... (seed = %d)' % seed)
    np. random.seed(seed)
    Pmax = 1
    Pini = Pmax*np.ones((num_H,K,1) )
    #alpha = np.random.rand(num_H,K)
    alpha = np.ones((num_H,K))
    #alpha = np.ones((num_H,K))py
    fake_a = np.ones((num_H,K))
    #var_noise = 1
    X=np.zeros((K**2,num_H))
    Y=np.zeros((K,num_H))
    total_time = 0.0
    CH = 1/np.sqrt(2)*(np.random.randn(num_H,K,K)+1j*np.random.randn(num_H,K,K))
    H=abs(CH)
    Y = batch_WMMSE2(Pini,alpha,H,Pmax,var_noise)
    Y2 = batch_WMMSE2(Pini,fake_a,H,Pmax,var_noise)
    return H, Y, alpha, Y2

1. **Function Definition:**
   - Define a function named `sr_loss` responsible for computing the loss between the model output and the ground truth.

2. **Input Parameters:**
   - $data$: Input data containing the ground truth information.
   - $out$: Model output containing the predicted power allocations.
   - $K$: Number of users in the system.

3. **Power Allocation Extraction:**
   - Extract the power allocations from the model output $out$.
   - Reshape the power allocations to match the shape of the input data.

4. **Calculate Received Power:**
   - Compute the received power at each receiver by multiplying the squared absolute channel gains $abs\_H\_2$ with the corresponding power allocations.

5. **Masking Valid Receiver Power:**
   - Apply a masking matrix to extract the valid receiver power, considering only the diagonal elements of the received power matrix.

6. **Calculate Interference:**
   - Compute the interference by summing the received power over all interfering users and adding the noise variance $var$.

7. **Compute Rate:**
   - Calculate the achievable rate for each user using the Shannon capacity formula: $rate = \log/_2(1 + \frac{valid/_rx/_power}{interference})$.

8. **Weighted Rate Calculation:**
   - Multiply the achievable rate for each user by the corresponding user position $data.pos$, effectively weighting the rate by the user's position.

9. **Compute Sum Rate:**
   - Calculate the sum rate by averaging the weighted rates across all users.

10. **Loss Calculation:**
    - Negate the sum rate to obtain the loss value.
    - Since the goal is to maximize the sum rate, negating it turns the optimization problem into a minimization problem.

11. **Mathematical Formulas:**
    - Let $ \mathcal{D} = \{ (\mathbf{X}_{i}, \mathbf{Y}_{i}) \}_{i=1}^{\text{num\_test}} $ represent the input data, where $ \mathbf{X}_i $ is the input data and $ \mathbf{Y}_i $ is the corresponding ground truth.
    - Let $ out $ represent the model output containing the predicted power allocations.
    - Let $ K $ be the number of users in the system.
    - Let $ power $ represent the extracted power allocations from the model output.
    - Let $ abs/_H $ represent the absolute channel gains.
    - Let $ rx/_power $ represent the received power at each receiver.
    - Let $ mask $ represent the masking matrix to extract valid receiver power.
    - Let $ valid/_rx/_power $ represent the valid receiver power after applying the masking matrix.
    - Let $ interference $ represent the interference experienced by each receiver.
    - Let $ rate $ represent the achievable rate for each user.
    - Let $ w/_rate $ represent the weighted rate for each user based on their position.
    - Let $ sum/_rate $ represent the sum rate computed by averaging the weighted rates across all users.
    - The loss function is computed as the negative of the sum rate to be minimized during training.


In [92]:
def sr_loss(data, out, K):
    power = out[:,2]
    power = torch.reshape(power, (-1, K, 1)) 
    abs_H = data.y
    abs_H_2 = torch.pow(abs_H, 2)  
    rx_power = torch.mul(abs_H_2, power)
    mask = torch.eye(K)
    mask = mask.to(device)
    valid_rx_power = torch.sum(torch.mul(rx_power, mask), 1)
    interference = torch.sum(torch.mul(rx_power, 1 - mask), 1) + var
    rate = torch.log(1 + torch.div(valid_rx_power, interference))
    w_rate = torch.mul(data.pos,rate)
    sum_rate = torch.mean(torch.sum(w_rate, 1))
    loss = torch.neg(sum_rate)
    return loss

1. **Function Definition:**
   - Define a function named `train` responsible for training the model.

2. **Training Loop:**
   - Set the model to training mode (`model.train()`).
   - Initialize the total loss variable (`total_loss`) to 0.
   - Iterate over the training data batches using the `train_loader`.
   - For each batch of data:
     - Move the data to the appropriate device (CPU or GPU).
     - Zero out the gradients of the optimizer (`optimizer.zero_grad()`).
     - Pass the data through the model (`model(data)`).
     - Calculate the loss between the model output and the ground truth using the `sr_loss` function.
     - Backpropagate the gradients (`loss.backward()`).
     - Update the total loss with the current batch loss multiplied by the number of graphs in the batch.
     - Update the model parameters using the optimizer (`optimizer.step()`).

3. **Loss Calculation:**
   - The loss function (`sr_loss`) computes the loss between the model output and the ground truth.
   - The total loss is the average loss across all batches, normalized by the total number of training samples (`num_H`).

4. **Mathematical Formulas:**
   - Let $\mathcal{D} = \{(\mathbf{X}_i, \mathbf{Y}_i)\}_{i=1}^{\text{num\_H}}$ represent the training dataset, where $\mathbf{X}_i$ is the input data and $\mathbf{Y}_i$ is the corresponding ground truth.
   - Let $\text{train\_loader}$ be the data loader that provides batches of data from $\mathcal{D}$.
   - Let $\text{model}$ represent the neural network model being trained.
   - Let $\text{optimizer}$ be the optimization algorithm used to update the model parameters.
   - Let $\text{loss}(\mathbf{Y}_{\text{true}}, \mathbf{Y}_{\text{pred}})$ be the loss function used to measure the discrepancy between the true and predicted values.
   - The total loss $\mathcal{L}$ is calculated as the average loss across all training samples:
     $$ \mathcal{L} = \frac{1}{\text{num\_H}} \sum_{i=1}^{\text{num\_H}} \text{loss}(\mathbf{Y}_i, \text{model}(\mathbf{X}_i)) $$

5. **Model Update:**
   - The optimizer updates the model parameters using the gradients obtained from backpropagation.


In [96]:
def train():
    model.train()

    total_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)
        loss = sr_loss(data,out,K)
        loss.backward()
        total_loss += loss.item() * data.num_graphs
        optimizer.step()
    return total_loss / num_H

1. **Function Definition:**
   - Define a function named `test` responsible for evaluating the model's performance on the test dataset.

2. **Evaluation Loop:**
   - Set the model to evaluation mode (`model.eval()`).
   - Initialize the total loss variable (`total_loss`) to 0.
   - Iterate over the test data batches using the `test_loader`.
   - For each batch of data:
     - Move the data to the appropriate device (CPU or GPU).
     - Disable gradient calculation to save memory and computation (`torch.no_grad()`).
     - Pass the data through the model (`model(data)`).
     - Calculate the loss between the model output and the ground truth using the `sr_loss` function.
     - Update the total loss with the current batch loss multiplied by the number of graphs in the batch.

3. **Loss Calculation:**
   - The loss function (`sr_loss`) computes the loss between the model output and the ground truth.
   - The total loss is the average loss across all batches, normalized by the total number of testing samples (`num_test`).

4. **Mathematical Formulas:**
   - Let $\mathcal{D} = \{(\mathbf{X}_i, \mathbf{Y}_i)\}_{i=1}^{\text{num\_test}}$ represent the test dataset, where $\mathbf{X}_i$ is the input data and $\mathbf{Y}_i$ is the corresponding ground truth.
   - Let $\text{test\_loader}$ be the data loader that provides batches of data from $\mathcal{D}$.
   - Let $\text{model}$ represent the neural network model being evaluated.
   - The total loss $\mathcal{L}$ is calculated as the average loss across all testing samples:
     $$ \mathcal{L} = \frac{1}{\text{num\_test}} \sum_{i=1}^{\text{num\_test}} \text{loss}(\mathbf{Y}_i, \text{model}(\mathbf{X}_i)) $$

5. **Model Evaluation:**
   - The model's performance is evaluated by computing the loss on the test dataset, providing insight into its generalization ability.


In [94]:
def test():
    model.eval()

    total_loss = 0
    for data in test_loader:
        data = data.to(device)
        with torch.no_grad():
            out = model(data)
            loss = sr_loss(data,out,K)
            total_loss += loss.item() * data.num_graphs
    return total_loss / num_test

1. **Parameters:**
   - $K$: Number of users in the system.
   - $num_{H}$: Total number of training samples.
   - $num_{test}$: Total number of testing samples.
   - $training_epochs$: Number of training epochs.
   - $trainseed$: Random seed for generating the training dataset.
   - $testseed$: Random seed for generating the test dataset.
   - $var\_db$: Variance in decibels.
   - $var$: Actual variance.

2. **Data Generation:**
   - Training Data: $(X_{\text{train}}, Y_{\text{train}}, A_{\text{train}}, \text{wtime}) = \text{generate\_wGaussian}(K, num_H, trainseed, var\_noise)$
   - Test Data: $(X, Y, A, Y2) = \text{generate\_wGaussian}(K, num_test, testseed, var\_noise)$

3. **Algorithm Execution:**
   - Simple Greedy Algorithm: $\text{bl\_Y} = \text{simple\_greedy}(X, A, Y)$

4. **Data Processing:**
   - Training Data Processing: $\text{train\_data\_list} = \text{proc\_data}(X_{\text{train}}, A_{\text{train}})$
   - Test Data Processing: $\text{test\_data\_list} = \text{proc\_data}(X, A)$

5. **Model Definition:**
   - $\text{device} = \text{cuda if available else cpu}$
   - Neural Network Model: $\text{model} = \text{IGCNet}().\text{to(device)}$

6. **Training:**
   - Optimizer: $\text{optimizer} = \text{Adam}(\text{model.parameters()}, lr=0.001)$
   - Learning Rate Scheduler: $\text{scheduler} = \text{StepLR}(\text{optimizer}, step\_size=20, gamma=0.9)$
   - Training Data Loader: $\text{train\_loader} = \text{DataLoader}(\text{train\_data\_list}, batch\_size=64, shuffle=True, num\_workers=1)$
   - Test Data Loader: $\text{test\_loader} = \text{DataLoader}(\text{test\_data\_list}, batch\_size=2000, shuffle=False, num\_workers=1)$

7. **Training Loop:**
   - For each epoch in range(1, 200):
     - Training: $\text{loss1} = \text{train}()$
     - If epoch is a multiple of 8:
       - Testing: $\text{loss2} = \text{test}()$
       - Print epoch number, training loss, and validation loss.
     - Learning Rate Scheduler Step: $\text{scheduler.step()}$


In [95]:
K = 10                                # number of users
num_H = 10000                         # number of training samples
num_test = 2000                       # number of testing  samples
training_epochs = 50                  # number of training epochs
trainseed = 0                         # set the random seed for the training set
testseed = 7                          # set random seed for test set
print('Gaussian IC Case: K=%d, Total Samples: %d, Total Iterations: %d\n'%(K, num_H, training_epochs))
var_db = 10
var = 1/10**(var_db/10)
Xtrain, Ytrain, Atrain, wtime = generate_wGaussian(K, num_H, seed=trainseed, var_noise = var)
X, Y, A, Y2 = generate_wGaussian(K, num_test, seed=testseed, var_noise = var)
bl_Y = simple_greedy(X,A,Y)
print('greedy:',np_sum_rate(X,bl_Y,A,var))
print('wmmse:',np_sum_rate(X.transpose(0,2,1),Y,A,var))
print('wmmse unweighted:',np_sum_rate(X.transpose(0,2,1),Y2,A,var))
train_data_list = proc_data(Xtrain,Atrain)
test_data_list = proc_data(X,A)   
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.cuda.is_available())
model = IGCNet().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.9)
train_loader = DataLoader(train_data_list, batch_size=64, shuffle=True,num_workers=1)
test_loader  = DataLoader(test_data_list, batch_size=2000, shuffle=False, num_workers=1)
 for epoch in range(1, 200):
     loss1 = train()
     if(epoch % 8 == 0):
         loss2 = test()
         print('Epoch {:03d}, Train Loss: {:.4f}, Val Loss: {:.4f}'.format(
             epoch, loss1, loss2))
     scheduler.step()

Gaussian IC Case: K=10, Total Samples: 10000, Total Iterations: 50

Generate Data ... (seed = 0)
Generate Data ... (seed = 7)
greedy: 2.8668466257428618
wmmse: 3.841383699946745
wmmse unweighted: 3.841383699946745
False
