# Import Packages
* fake_data for generating data and labels
* torch for build networks
* pickle for loading data

The network parameter types of pytorch in the code should be consistent:
```python
torch.set_default_tensor_type(torch.FloatTensor)
```
and sometimes the kernel of jupterbook appears to have died, In order to prevent it from happening, so set:
```python
os.environ['KMP_DUPLICATE_LIB_OK']='True'
```

In [92]:
import os
import math
import torch
import random
import pickle
import calendar
import numpy as np
import training 
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
os.environ['KMP_DUPLICATE_LIB_OK']='True'
torch.set_default_tensor_type(torch.FloatTensor)

# Loading data
Input data\(X)'s size is \[16,10992], 10992 examples, 16 features. y is the corresponding labels, which has 10 classes\(0~9).
The data is divided into train, valid and test, which ratio is 6:2:2. Then the value of data is transformed to \[0,1].

In [93]:
with open('Dataset_Pendigits.p', 'rb') as f:
    dataset = pickle.load(f)
X = dataset['X'].float().T
y = dataset['y'].unsqueeze(1).T
#E, N_features, N_class = X.shape[0], X.shape[1], torch.max(
#    torch.unique(y)).item()+1
x_train, y_train, x_valid, y_valid,x_test,y_test=training.SplitData(X,y,0.6,0.2,seed=0)
x_train= x_train - torch.min(x_train, axis=0)[0]
x_train= x_train/ (torch.max(x_train, axis=0)[0])
x_valid= x_valid - torch.min(x_valid, axis=0)[0]
x_valid= x_valid/ (torch.max(x_valid, axis=0)[0])
x_test= x_test - torch.min(x_test, axis=0)[0]
x_test= x_test/ (torch.max(x_test, axis=0)[0])
x_train,y_train,x_valid, y_valid,x_test, y_test =x_train.T ,y_train.flatten().long(),x_valid.T ,y_valid.flatten().long(),x_test.T ,y_test.flatten().long()
train_ds = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_ds, batch_size=y_train.numel())
valid_ds = TensorDataset(x_valid, y_valid)
valid_loader = DataLoader(valid_ds, batch_size=y_valid.numel())
test_ds = TensorDataset(x_test, y_test)
test_loader = DataLoader(test_ds, batch_size=y_test.numel())

# Define neural network
this is a network which has 16 input units, 5 hidden units and 10 output units. 

In [94]:
class DynamicLayer(torch.nn.Module):
    # initialize the class
    def __init__(self,in_d,out_d):
        super().__init__()
        self.g=torch.nn.Parameter(torch.randn(in_d+2,out_d))
        
    def Tanh(self,x):
        return (torch.exp(x) - torch.exp(-x) )/ (torch.exp(x) + torch.exp(-x))
    #def Tanh(self,x,a,b,c,d):
    #    return a+b*(torch.exp((x-c)*d) - torch.exp(-(x-c)*d)) / (torch.exp((x-c)*d) + torch.exp(-(x-c)*d))
    @property
    def w(self):
        return self.g.abs()/(self.g.abs().sum(dim=0))
    
    def linear(self,data,weight): 
        vb=torch.ones((data.shape[0],1))
        vd=torch.zeros((data.shape[0],1))
        data=torch.cat((data,vb,vd),1)     
        output=torch.zeros(data.shape[0],weight.shape[1])
        for i in range(weight.shape[1]):
            fakedata=data.clone()
            index=torch.where(weight[:,i])
            for number in index:
                data[:,number]=-fakedata[:,number]
            output_i=torch.mm(data,weight[:,i].unsqueeze(1)) 
            output[:,i]=output_i.squeeze()
        #output=torch.mm(data,weight)
        return output
       
    def forward(self, x):         
        x=self.Tanh(self.linear(x,self.w))
        return x       

In [95]:
class DynamicNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        #self.g1=torch.nn.Parameter(torch.randn(7,10))
        self.layer1=DynamicLayer(in_d=16,out_d=5)
        self.layer2=DynamicLayer(in_d=5,out_d=10)
    
    
    def forward(self, x):        
        x=self.layer1(x)
        x=self.layer2(x)
        
        #x=self.linear(x,self.w(self.g1))
        return x       

# Initial Parameters of Weight

In [78]:
model=DynamicNet()
params = list(model.parameters())
print(params)

[Parameter containing:
tensor([[ 5.0536e-01, -1.2335e-01, -9.8330e-02,  5.7941e-01, -9.2380e-02],
        [-2.2507e-01, -4.0093e-01, -4.7184e-02, -1.3110e+00, -1.9283e+00],
        [ 1.1235e+00, -7.1757e-01, -2.0265e+00, -1.1439e+00, -6.6647e-02],
        [-6.0107e-01,  4.5145e-01, -3.6709e-01, -9.5386e-01, -8.4613e-01],
        [ 6.9578e-01,  1.0945e+00, -5.0804e-01, -4.7111e-01, -8.7869e-01],
        [ 1.1407e-01,  1.2807e-01,  1.0591e+00, -2.9482e-01,  1.1040e+00],
        [-2.4654e+00, -1.4785e+00,  1.2659e+00,  1.0890e-01, -7.9559e-01],
        [-2.9031e+00,  5.5079e-01,  1.1495e+00, -1.7039e-01, -1.9767e+00],
        [-2.2044e+00,  1.0707e+00,  1.8059e-01,  8.8479e-02,  1.1786e-01],
        [-2.9599e-01, -6.3283e-01,  7.7413e-04,  4.7265e-02,  1.0699e+00],
        [-1.5355e+00,  1.0960e+00, -7.6134e-01,  2.7015e-01, -9.0298e-01],
        [ 1.1121e+00, -2.2617e+00,  1.2802e+00, -4.6817e-01, -1.1164e+00],
        [-4.5858e-01,  3.6300e-01, -1.2239e+00, -3.5887e-02,  8.7331e-01],
  

# Training

In [96]:
lossfunction = torch.nn.CrossEntropyLoss()
epoch_times=[]
test_acc=[]
for seed in range(10):   
    torch.manual_seed(seed)
    model =  DynamicNet()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, betas=(0.9, 0.99))
    #scheduler = LambdaLR(optimizer, lr_lambda=lambda epoch: 500/(epoch+500))
    NN, train_loss_NN, valid_loss_NN, train_acc_NN, valid_acc_NN, epoch_times= \
    training.train_nn_with_scheduler(epoch_times,model,train_loader, valid_loader, optimizer,lossfunction, Epoch=10**10)

    with torch.no_grad():
        for x_test, y_test in test_loader:
            prediction_test = NN(x_test)              
            yhat_test = torch.argmax(prediction_test.data, 1)
            test_correct = torch.sum(yhat_test == y_test.data)
            acc_test = test_correct / y_test.numel()
            test_acc.append(acc_test)
print(test_acc)
print(epoch_times)

The ID for this training is 1653669653.


RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.FloatTensor [6595, 7]], which is output 0 of struct torch::autograd::CopySlices, is at version 10; expected version 9 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).

In [None]:
print(sum(test_acc)/len(test_acc))
test_acc=np.array(test_acc)
print(np.std(test_acc))
#print(max(test_acc)-sum(test_acc)/len(test_acc))
#print(min(test_acc)-sum(test_acc)/len(test_acc))

# Result
This training lasts 10000 epochs, every 500 epochs print the mean of cross-entropy loss.