# Introduction

# Preliminaries

In [138]:
%matplotlib inline
import numpy as np
from numpy.random import normal, randint
import matplotlib.pyplot as plt
import pprint
from sklearn import linear_model
from sklearn.cross_validation import train_test_split

logistic = linear_model.LogisticRegression()

# Number of basis states for the wavefunctions
NBW = 40
nbws = np.arange(1, NBW+1)
# Number of potentials:
NV = int(1E4)
# Number of basis states in the potential:
NB = 10
ns = np.arange(1, NB+1)
# lambda (variance of Legendre coefficients):
lam = 0.75
# The variance of the n=0 legendre coefficient V_0:
V20 = 10

# Input file:
filepath = "../Data/eigenvalues_NV" + str(NV) \
    + "_NB" + str(NB) + "_lam" \
    + str(lam) + "_V20" + str(V20) + ".npy"
filepathSD = "../Data/eigenvaluesSD_NV" + str(NV) \
    + "_NB" + str(NB) + "_lam" \
    + str(lam) + "_V20" + str(V20) + ".npy"
data = np.load(filepath)
dataSD = np.load(filepathSD)
VSns = data[::,0:10]
VCns = data[::,10:20]
eigs = data[::,20::]

In [139]:
print(data.shape, VSns.shape, VCns.shape, eigs.shape)
print(dataSD.shape)

(10000, 60) (10000, 10) (10000, 10) (10000, 40)
(40,)


# Preprocessing
We know that the spectrum is symmetric under $x\to -x$. We can build this into our dataset. We can duplicate the entire dataset but set all the Sine coefficients to their negative value. This effectively reflects the potential around the $y$-axis.

In [140]:
def VS(ns, xs):
    return np.sin(np.pi*np.outer(ns,xs))
def VC(ns, xs):
    return np.cos(np.pi*np.outer(ns,xs))
Nx = 100
xs = np.linspace(-1,1,Nx)
#Vgrid = legval(xs, np.transpose(Vns))
VSs = VS(ns,xs)
VCs = VC(ns,xs)

Vgrid = np.dot(VSns,VSs) + np.dot(VCns,VCs)
VgridFlipped = Vgrid[::,::-1]
print(Vgrid.shape)

(10000, 100)


In [141]:
numeigs = 10
X = np.concatenate( (np.concatenate((VSns, VCns), axis = 1), np.concatenate((-VSns, VCns), axis =1)) )
Xgrid = np.concatenate( (Vgrid, Vgrid[::,::-1]) )
y = np.concatenate( (eigs, eigs) )[::,1:numeigs+1]

In [142]:
# Split test and train
test_frac = 0.4
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=test_frac, random_state=7
)

In [143]:
print(X.shape, y.shape)
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(20000, 20) (20000, 10)
(12000, 20) (12000, 10)
(8000, 20) (8000, 10)


# Training the model and predicting on the test set

In [144]:
from sklearn import linear_model
from sklearn.ensemble import RandomForestRegressor
lm = linear_model.Ridge()
rf = RandomForestRegressor()
specFit = lm.fit(X_train,y_train)

In [145]:
# Test on the training set:
y_test_pred = lm.predict(X_test)

In [146]:
y_test[0:5]

array([[-0.01106067, -0.38588974,  0.18099106,  0.10960405, -0.08684193,
         0.08342672,  0.03164771,  0.04529693,  0.00191228,  0.00850912],
       [-0.02947834,  0.09098351,  0.19304551,  0.11089545,  0.03844527,
         0.04317426,  0.10302539,  0.04361396,  0.00960283,  0.01905374],
       [-0.10329386,  0.44973147, -0.1431563 , -0.08362555,  0.05966746,
         0.0079617 ,  0.07294833,  0.0263731 ,  0.00443459,  0.00709695],
       [ 0.19885799,  0.12739127,  0.00340198,  0.14662022,  0.13056632,
         0.02542335, -0.01728318,  0.05876616,  0.03818774,  0.00654564],
       [-0.29377236,  0.05516022, -0.12140063,  0.0590401 , -0.17562988,
         0.06735603, -0.05646346,  0.02113549,  0.02599634,  0.00995357]])

In [147]:
#yerr = np.abs((y_test_pred - y_test) / dataSD[0:10])
yerr = np.abs((y_test_pred - y_test) / y_test)

In [148]:
muerr = np.mean(yerr, axis = 0)
print(muerr)

[ 3.30561028  5.23053714  6.15690467  1.85309155  2.44344774  2.2230578
  1.53408985  2.27419336  1.74101695  0.76691475]


# Neural network

In [149]:
from fann2 import libfann

# Model parameters
connection_rate = 1
learning_rate = 0.7
num_hidden = 100
desired_error = 0.0001
max_iterations = 10000
iterations_between_reports = 100

# Data parameters
num_train = X_train.shape[0]
num_input = X_train[1].size
num_output = numeigs

# Initialize the neural network
ann = libfann.neural_net()
ann.create_sparse_array(connection_rate, (num_input, num_hidden, num_output))
ann.set_learning_rate(learning_rate)
ann.set_activation_function_output(libfann.SIGMOID_SYMMETRIC_STEPWISE)

In [None]:
fann_train = libfann.training_data()
fann_train.set_train_data(X_train.tolist(), y_train.tolist())

In [None]:
# Training the network
ann.train_on_data(fann_train, max_iterations, iterations_between_reports, desired_error)

In [None]:
case = 14;
print((ann.run(X_test[case].tolist())-y_test[case])/y_test[case])