In [1]:
import numpy as np
import logging

logging.basicConfig()
logger = logging.getLogger()
logger.setLevel(logging.INFO)

In [2]:
_num_tuples = 1296
_num_features = 45
_label_dims = 11

In [3]:
def load_data(files, num_tuples, num_features, label_dims):
    data_file, label_file = files
    print(data_file, label_file)
    data = np.zeros((num_tuples, num_features))
    labels = np.zeros((num_tuples, label_dims))
    
    with open(data_file, 'rb') as f:
        for i,line in enumerate(f.readlines()):
            ##################[ISSUE! @YUZHE]HARD CODED, PLEASE CHECK THE DATA FORMAT ########################
            #processed = line.rstrip('\n').rstrip('\r').rstrip(',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,').split(',')
            processed = line.rstrip('\n').split(',')
            ###################################################################################################
            #logger.info(processed)
            try:
                assert num_features == len(processed)
                data[i,:] = processed
            except AssertionError as err:
                logger.info("Wrong Feature Number claimed !, {}, {}".format(num_features, len(processed)))
                
    with open(label_file, 'rb') as f:
        for i,line in enumerate(f.readlines()):
            #processed = line.rstrip('\n').split(',')
            processed = line.rstrip('\n').rstrip('\r').rstrip(',,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,').split(',')
            #print(len(processed), label_dims)
            try:
                assert label_dims == len(processed)
                
                labels[i,:] = processed
            except AssertionError as err:
                logger.info("Wrong Label Dimensions claimed !")
        return data, labels

In [4]:
data, labels = load_data(('spectrum.csv', 'temperature.csv'), _num_tuples, _num_features, _label_dims)

('spectrum.csv', 'temperature.csv')


In [5]:
class Dataset(object):
    def __init__(self, data):
        # m, n denote number of tuples and features respectively
        self._m = data[0].shape[0]
        self._n = data[0].shape[1]
        self._training_data = data[0]
        self._training_labels = data[1]
        
    def __len__(self):
        return self._m
    
    def __getitem__(self, idx):
        return  self._training_data[idx,:], self._training_labels[idx,:] 

    def fetch_col(self, col_index):
        return self._training_data[:, col_index]

    def shuffle(self, seed=None):
        if seed:
            np.random.seed(seed=seed)
        shuffled_indices = np.arange(self._m)
        np.random.shuffle(shuffled_indices)
        self._training_data = np.take(self._training_data, shuffled_indices, axis=0)
        self._training_labels = np.take(self._training_labels, shuffled_indices)

    @property
    def num_tuples(self):
        return self._m

    @property
    def num_features(self):
        return self._n

    @property
    def labels(self):
        return self._training_labels

    @property
    def data_table(self):
        return self._training_data

In [6]:
dataset = Dataset((data, labels))

In [7]:
import torch
from torch import nn

# model definition
# notes from Yuzhe: Here are the parameters for the neural network,total of 3 layers of network
class NN(nn.Module):
    def __init__(self):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(45, 1000)
        self.fc2 = nn.Linear(1000, 1000)
        self.fc3 = nn.Linear(1000, 11)
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
        ## notes from Yuzhe: Following are the activate function for network
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)
        #x = self.relu(x)
        return x
    def name(self):
        return 'nn'

In [8]:
from torch.utils.data import DataLoader
import torch.nn.functional as F

## notes from Yuzhe, following are parameters that can be changed to modify the training code
args = {'lr':0.001, ## learning rate
        'max_steps':30,   ## maximum numbers of iteration
        'batch_size':100,   
        'epoch':30,
        'enable_gpu':None}
train_loader = DataLoader(dataset, batch_size=args['batch_size'], shuffle=True)

In [9]:
import torch.optim as optim

device = torch.device("cpu")
model = NN().to(device)
optimizer = optim.SGD(model.parameters(), lr=args['lr'], momentum=0.9)

In [10]:
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.float().to(device), target.float().to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.mse_loss(output, target)
        if epoch >= 499:
            print(output, target)
        loss.backward()
        optimizer.step()
        logger.info('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test(args, model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.mse_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    logger.info('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [11]:
for epoch in range(1, args['epoch'] + 1):
    train(args, model, device, train_loader, optimizer, epoch)
    #test(args, model, device, test_loader)



In [12]:
for p in model.parameters():
    print(p)

Parameter containing:
tensor([[-0.1476, -0.0560, -0.0207,  ..., -0.0674,  0.0529, -0.1030],
        [ 0.0413, -0.0575,  0.1053,  ...,  0.0275, -0.1328,  0.1284],
        [ 0.0140,  0.0808, -0.1295,  ...,  0.1422, -0.1231, -0.0791],
        ...,
        [ 0.0741,  0.1353,  0.0581,  ...,  0.0571,  0.0771,  0.1452],
        [ 0.1198, -0.0315, -0.0971,  ..., -0.1323, -0.0671,  0.0049],
        [-0.1429, -0.0325, -0.0471,  ..., -0.1018, -0.0338, -0.0587]],
       requires_grad=True)
Parameter containing:
tensor([-5.3841e-03, -5.0519e-02, -1.0950e+03, -9.3156e-02, -1.2654e-01,
        -1.0156e+03, -5.8675e+02, -3.7298e-02, -6.9028e-02, -2.5147e-02,
        -6.4043e+02, -3.5268e-03, -4.3736e+02, -1.3637e-01, -1.8476e+02,
        -5.4530e-02, -4.7857e+01, -3.8251e-02, -8.8616e+01, -4.9167e+02,
        -1.0506e-01, -9.7962e+02, -9.2888e-02, -9.2696e+02, -1.3983e-01,
        -1.2593e-01, -5.4261e-02, -1.2491e-01, -2.3559e-03, -1.0906e-01,
        -2.8878e+02, -8.8779e+02, -7.7823e-02, -3.0977e-0