In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import torch.nn.functional as F

import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
import sklearn
torch.manual_seed(2019)
import matplotlib.pyplot as plt 

In [None]:
def read_data(datasets):
    """
    :param datasets: list containing (X.csv, y.csv) pairs
    :return: pair (X, y) where X is the feature sample matrix and y is the label array from combining all data
        in datasets
    """
    data = [np.loadtxt(s[0], delimiter=',') for s in datasets]
    labels = [np.loadtxt(s[1], delimiter=',') for s in datasets]
    X = np.vstack(data)
    y = np.concatenate(labels, axis=None)
    return X, y


def sphere(X_train, X_test):
    """
    :param X_train: Sample-feature matrix to sphere
    :param X_test: Sample-feature matrix to sphere according to mean and stdev of X_train
    :return: Tuple containing (X_train_sphered, X_test_sphered)
    """
    X_train, X_test = X_train.T, X_test.T
    a, b = X_train.shape
    stdevs = [np.std(row) for row in X_train]  # standard deviation of each row in X
    diag = np.diag([1 / s for s in stdevs])
    X_train_sphered = diag.dot(X_train).dot(np.eye(b) - 1 / b * np.ones((b, b)))
    sample_means = np.array([np.mean(row) for row in X_train])
    sample_stds = np.array([np.std(row) for row in X_train])

    # Now update X_test according to sample_means, sample_stds
    a, b = X_test.shape
    # print(f"X_test shape {X_test.shape}")
    # print(f"means.shape {sample_means.shape}, stds.shape {sample_stds.shape}")
    assert sample_stds.shape[0] == a and sample_means.shape[0] == a
    X_test_sphered = X_test - np.column_stack([sample_means for i in range(b)])
    X_test_sphered = X_test_sphered / np.column_stack([sample_stds for i in range(b)])
    return X_train_sphered.T, X_test_sphered.T


def process_data(X, y, test_size=0.2, random_state=42):
    """
    :param X: 2D numpy array containing all training samples
    :param y: 1D numpy array containing all training labels corresponding to X
    :param test_size: fraction of samples to use for training
    :param random_state: seed for sklearn.model_selection.train_test_split
    :return: (X_train, X_test, y_train, y_test) tuple
    """
    X_train, X_test, y_train, y_test = train_test_split(X, y/y.max(), test_size=test_size, random_state=random_state)
    X_train, X_test = sphere(X_train, X_test)
    assert X_train.shape[0] == y_train.shape[0]
    assert X_test.shape[0] == y_test.shape[0]
    return X_train, X_test, y_train, y_test

In [None]:
set1 = ('/ArOpt.csv', '/ArOptLabel.csv')
set2 = ('/ArOpt15.csv', '/ArOpt15Label.csv')
set3 = ('/Ar15.csv', '/Ar15Label.csv')
set4 = ('/Ar25.csv', '/Ar25Label.csv')
datasets = (set1, set2, set3, set4)

X, y = read_data(datasets)
X_train, X_test, y_train, y_test = process_data(X, y, test_size=0.2, random_state=42)
y_train = y_train.reshape(-1,1)
y_test = y_test.reshape(-1,1)

In [None]:
x = torch.from_numpy(X_train).float()
y = torch.from_numpy(y_train).float()
xPredicted = torch.from_numpy(X_test).float()
yTestReal = torch.from_numpy(y_test).float()
x, y = Variable(x), Variable(y)

In [None]:
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(n_feature, n_hidden)   # hidden layer
        self.predict = torch.nn.Linear(n_hidden, n_output)   # output layer

    def forward(self, x):
        x = F.relu(self.hidden(x))      # activation function for hidden layer
       #  x = F.leaky_relu(self.hidden(x))
       # x = F.leaky_relu(self.hidden(x))
        x = self.predict(x)             # linear output
        return x
net = Net(n_feature=15, n_hidden=13, n_output=1) 
print(net)
optimizer = torch.optim.SGD(net.parameters(), lr=0.005)
#optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss
#loss_func = torch.nn.L1Loss() 

In [None]:
input_size = 15
hidden_sizes = [8, 5]
output_size = 1
# Build a feed-forward network
net = nn.Sequential(nn.Linear(input_size, hidden_sizes[0]),
                      nn.LeakyReLU(),
                      nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                      nn.LeakyReLU(),
                      nn.Linear(hidden_sizes[1], output_size))
print(net)
optimizer = torch.optim.SGD(net.parameters(), lr=0.005)
#optimizer = torch.optim.Adam(net.parameters(), lr=0.01)
loss_func = torch.nn.MSELoss()  # this is for regression mean squared loss
#loss_func = torch.nn.L1Loss() 





In [None]:
total_epoch = 40000
lost_hist_train = np.zeros([total_epoch])
lost_hist_eval = np.zeros([total_epoch])
net.train()
for t in range(total_epoch):

    prediction = net(x.float())     # input x and predict based on x

    loss = loss_func(prediction, y)     # must be (1. nn output, 2. target)

    optimizer.zero_grad()   # clear gradients for next train
    loss.backward()         # backpropagation, compute gradients
    optimizer.step()        # apply gradients
    # print(loss.data.numpy())
    lost_hist_train[t] = loss.data.numpy()

    y_predict = net(xPredicted.float())  # input x and predict based on x

    loss_eval = loss_func(y_predict, yTestReal)  # must be (1. nn output, 2. target)

    lost_hist_eval[t] = loss_eval.data.numpy()


t = np.linspace(0,total_epoch,total_epoch)

plt.semilogy()
plt.xlabel('iteration')
plt.ylabel('mse')
plt.plot(t,lost_hist_train,color='b',label='train')
plt.plot(t,lost_hist_eval,color='r',label='test')
# plt.show()
plt.legend()
plt.savefig('process.svg')

net.eval()
prediction = net(x.float())


In [None]:
net.eval()
y_predict= net(xPredicted.float()) 

In [None]:
print(y_predict)
#mse = np.sqrt(((yTestReal.detach().numpy() - y_predict.detach().numpy())**2).mean(axis=0))
mse = sklearn.metrics.mean_squared_error(yTestReal.detach().numpy(),y_predict.detach().numpy())
print(mse)

In [None]:
xx = np.linspace(0,1,100)
yy = np.linspace(0,1,100)
plt.clf()
plt.plot(xx,yy,label='y=x')
plt.xlabel('y_real')
plt.ylabel('y_predict')
plt.plot(y.numpy(), prediction.detach().numpy(), '.',label='train')
plt.plot(yTestReal.detach().numpy(),y_predict.detach().numpy(), 'o',label='test')
plt.legend()
plt.savefig('results.svg')
