# Import Packages
* fake_data for generating data and labels
* torch for build networks
* pickle for loading data

The network parameter types of pytorch in the code should be consistent:
```python
torch.set_default_tensor_type(torch.FloatTensor)
```
and sometimes the kernel of jupterbook appears to have died, In order to prevent it from happening, so set:
```python
os.environ['KMP_DUPLICATE_LIB_OK']='True'
```

In [1]:
import os
import math
import torch
import random
import pickle
import calendar
import numpy as np
import training 
import matplotlib.pyplot as plt
from torch.utils.data import TensorDataset, DataLoader
os.environ['KMP_DUPLICATE_LIB_OK']='True'
torch.set_default_tensor_type(torch.FloatTensor)

# Loading data
Input data\(X)'s size is \[16,10992], 10992 examples, 16 features. y is the corresponding labels, which has 10 classes\(0~9).
The data is divided into train, valid and test, which ratio is 6:2:2. Then the value of data is transformed to \[0,1].

In [2]:
with open('Dataset_Pendigits.p', 'rb') as f:
    dataset = pickle.load(f)
X = dataset['X'].float().T
y = dataset['y'].unsqueeze(1).T
#E, N_features, N_class = X.shape[0], X.shape[1], torch.max(
#    torch.unique(y)).item()+1
x_train, y_train, x_valid, y_valid,x_test,y_test=training.SplitData(X,y,0.6,0.2,seed=0)
x_train= x_train - torch.min(x_train, axis=0)[0]
x_train= x_train/ (torch.max(x_train, axis=0)[0])
x_valid= x_valid - torch.min(x_valid, axis=0)[0]
x_valid= x_valid/ (torch.max(x_valid, axis=0)[0])
x_test= x_test - torch.min(x_test, axis=0)[0]
x_test= x_test/ (torch.max(x_test, axis=0)[0])
x_train,y_train,x_valid, y_valid,x_test, y_test =x_train.T ,y_train.flatten().long(),x_valid.T ,y_valid.flatten().long(),x_test.T ,y_test.flatten().long()
train_ds = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_ds, batch_size=y_train.numel())
valid_ds = TensorDataset(x_valid, y_valid)
valid_loader = DataLoader(valid_ds, batch_size=y_valid.numel())
test_ds = TensorDataset(x_test, y_test)
test_loader = DataLoader(test_ds, batch_size=y_test.numel())

# Define neural network
this is a network which has 16 input units, 5 hidden units and 10 output units. 

In [3]:
class DynamicLayer(torch.nn.Module):
    # initialize the class
    def __init__(self,in_d,out_d):
        super().__init__()
        self.g=torch.nn.Parameter(torch.randn(in_d+2,out_d))
        
    def Tanh(self,x):
        return (torch.exp(x) - torch.exp(-x) )/ (torch.exp(x) + torch.exp(-x))
    #def Tanh(self,x,a,b,c,d):
    #    return a+b*(torch.exp((x-c)*d) - torch.exp(-(x-c)*d)) / (torch.exp((x-c)*d) + torch.exp(-(x-c)*d))
    def w(self,g):
        return g/(g.abs().sum(dim=0))
    
    def linear(self,data,weight): 
        vb=torch.ones((data.shape[0],1))
        vd=torch.zeros((data.shape[0],1))
        data=torch.cat((data,vb,vd),1)
        output=torch.mm(data,weight)
        return output
       
    def forward(self, x):         
        x=self.Tanh(self.linear(x,self.w(self.g)))
        return x       

In [11]:
class DynamicNet(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.g1=torch.nn.Parameter(torch.randn(10,10))
        self.layer1=DynamicLayer(in_d=16,out_d=5)
        self.layer2=DynamicLayer(in_d=5,out_d=10)  
    def forward(self, x):        
        x=self.layer1(x)
        #print(x.min())
        #print(x.max())
        x=self.layer2(x)
        #print(x.min())
        #print(x.max())
        x=torch.mm(x,self.g1)
        return x       

# Initial Parameters of Weight

In [16]:
model=DynamicNet()
params = list(model.parameters())
print(params)

[Parameter containing:
tensor([[-1.0700,  0.3825,  0.4559, -0.5843, -2.8685],
        [-0.6814, -1.0809,  0.1738, -0.5099, -0.6779],
        [ 1.0894,  0.4191,  0.3440,  0.0781, -0.7547],
        [-0.1203, -0.4169, -1.4198, -0.1352, -0.6433],
        [-0.0549, -1.2162, -1.0078,  1.3204, -0.8347],
        [ 0.4197,  0.4105, -0.2025,  0.5457, -0.6574],
        [ 0.8635,  2.6863,  1.0971,  1.4041,  0.7662],
        [ 1.0596,  0.0217, -0.7074,  0.0664, -1.8974],
        [-0.5456,  0.6247,  0.6791,  0.5482, -0.0892],
        [-0.5464,  0.0162,  2.7874, -1.9327,  0.7942],
        [-0.0669,  1.2865,  0.2882,  1.1416,  1.1156],
        [ 0.5661, -0.6580, -0.8577, -0.3073,  0.7638],
        [-3.4053, -2.2229,  0.5699,  0.7617, -1.1900],
        [ 0.4383, -0.1733, -1.4250, -1.3035, -1.1419],
        [-0.6002, -0.5101, -1.5076, -1.6444,  0.1163],
        [-0.9219, -2.0487, -1.0731, -0.3445, -0.6022],
        [-0.1326, -0.9399, -0.6903,  0.4113,  1.3609],
        [-0.2817,  0.0892,  0.3759,  0.204

# Training

In [12]:
lossfunction = torch.nn.CrossEntropyLoss()
epoch_times=[]
test_acc=[]
for seed in range(10):   
    torch.manual_seed(seed)
    model =  DynamicNet()
    optimizer = torch.optim.Adam(model.parameters(), lr=1, betas=(0.9, 0.99))
    #scheduler = LambdaLR(optimizer, lr_lambda=lambda epoch: 500/(epoch+500))
    NN, train_loss_NN, valid_loss_NN, train_acc_NN, valid_acc_NN, epoch_times= \
    training.train_nn_with_scheduler(epoch_times,model,train_loader, valid_loader, optimizer,lossfunction, Epoch=10**10)

    with torch.no_grad():
        for x_test, y_test in test_loader:
            prediction_test = NN(x_test)              
            yhat_test = torch.argmax(prediction_test.data, 1)
            test_correct = torch.sum(yhat_test == y_test.data)
            acc_test = test_correct / y_test.numel()
            test_acc.append(acc_test)
print(test_acc)
print(epoch_times)

The ID for this training is 1653578451.
| Epoch:        0 | Valid accuracy: 0.09418 | Valid loss: 2.618660450 |
| Epoch:      500 | Valid accuracy: 0.92220 | Valid loss: 0.245437056 |
| Epoch:     1000 | Valid accuracy: 0.92630 | Valid loss: 0.226458117 |


KeyboardInterrupt: 

In [None]:
print(sum(test_acc)/len(test_acc))
test_acc=np.array(test_acc)
print(np.std(test_acc))
#print(max(test_acc)-sum(test_acc)/len(test_acc))
#print(min(test_acc)-sum(test_acc)/len(test_acc))

# Result
This training lasts 10000 epochs, every 500 epochs print the mean of cross-entropy loss.