In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
import warnings
from density_db import density_database
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import explained_variance_score
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestRegressor
from sklearn import svm
from sklearn.svm import LinearSVR
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel
from tensorflow import keras
import warnings

In [2]:
#get data
def get_data():
    rawData = density_database('density.db').selectAllErrorsWithParams('Redfield').fetchall()
    return pd.DataFrame(data=rawData, columns=['delta_e', 'J', 'lambda', 'gamma', 'T', 'error'])

In [3]:
#plot
def plot_error(data):
    data.loc[
        (data['delta_e'] == 100.)
        & (data['J'] == 100.)
        & (data['lambda'] == 100.)
        & (data['gamma'] == 100.)
        & (data['T'] == 300.)
    ].plot(kind="scatter", x="delta_e", y="error", logy=True)
    plt.ylim(10**-4,10**-2)

In [4]:
#explore combinations
def correletions():
    tryData = data.copy()
    tryData['gamma-lambda'] = tryData['gamma']*tryData['lambda']
    tryData['gamma-delta_e'] = tryData['gamma']*tryData['delta_e']
    tryData['gamma-J'] = tryData['gamma']*tryData['J']
    tryData['lambda-delta_e'] = tryData['lambda']*tryData['delta_e']
    tryData['lambda-J'] = tryData['lambda']*tryData['J']
    tryData['delta_e-J'] = tryData['delta_e']*tryData['J']
    corr_matrix = tryData.corr()
    corr_matrix

In [5]:
def prepare_train_and_test_sets(data):
    modifiedData = data.copy()
    modifiedData['gamma-lambda'] = data['gamma']*data['lambda']
    modifiedData['gamma-delta_e'] = data['gamma']*data['delta_e']
    modifiedData['gamma-J'] = data['gamma']*data['J']
    modifiedData['lambda-delta_e'] = data['lambda']*data['delta_e']
    modifiedData['lambda-J'] = data['lambda']*data['J']
    modifiedData['delta_e-J'] = data['delta_e']*data['J']
    #return train_test_split(modifiedData, test_size=0.2, random_state=42)
    return modifiedData

In [6]:
train_set = prepare_train_and_test_sets(get_data())

In [7]:
def mspe(y, y_predicted):
    return 100 * np.sum(np.square((y - y_predicted)/y)) / len(y)

def mape(y, y_predicted):
    return 100 * np.sum(np.abs((y - y_predicted)/y)) / len(y)
 
def trainMethod(method, train_set, pipeline = None):
    data_labels = np.array(train_set["error"].copy())
    data_prepared = np.array(train_set.drop("error", axis=1))
    if pipeline:
        data_prepared = pipeline.fit_transform(data_prepared)
    kfold = KFold(n_splits=10, shuffle=True, random_state=47)
    errors = [];
    for train, test in kfold.split(data_prepared, data_labels):
        method.fit(data_prepared[train], data_labels[train])
        labels_predicted = method.predict(data_prepared[test])
        errors.append(mape(data_labels[test], labels_predicted))
    print('Error: %f' % (sum(errors)/len(errors)))
    return errors

In [8]:
def trainNueralNetwork(train_set):
    train_set_nn = train_set.copy()
    pipeline = Pipeline([
        ('std_scaler', StandardScaler()),
    ])
    data_labels = np.array(train_set_nn["error"].copy())
    data_prepared = np.array(pipeline.fit_transform(train_set_nn.drop("error", axis=1)))
    kfold = KFold(n_splits=10, shuffle=True, random_state=47)
    errors = [];
    for train, test in kfold.split(data_prepared, data_labels):
        model = keras.Sequential([
            keras.layers.Dense(11, activation='softmax', input_shape=(11,)),
            keras.layers.Dense(11, activation='softmax'),
            keras.layers.Dense(1)
        ])
        model.compile(optimizer='Adam', loss='mean_absolute_percentage_error')
        model.fit(data_prepared[train], data_labels[train], epochs=200, validation_split=0, verbose=0)
        labels_predicted = model.predict(data_prepared[test])
        errors.append(mape(data_labels[test], labels_predicted[:,0]))
        #print(errors)
        #break
    print('Error: %f' % (sum(errors)/len(errors)))
    return errors

In [9]:
#linear su validation
def linear():
    lin_reg = LinearRegression()
    trainMethod(lin_reg, train_set)
    
#forest su validation
def randomForestRegressor():
    for_reg = RandomForestRegressor(n_estimators=100)
    trainMethod(for_reg, train_set)
    
#support vector machines
def support_vector_machines():
    warnings.filterwarnings('ignore')
    svr = LinearSVR()
    pipeline = Pipeline([
        ('std_scaler', StandardScaler()),
    ])
    trainMethod(svr, train_set, pipeline)
    warnings.filterwarnings('always')
    
#k nearest
def k_nearest():
    eigh = KNeighborsRegressor(n_neighbors=5, weights='distance')
    trainMethod(eigh, train_set)
    
#linear lasso
def linear_lasso():
    lasso = Lasso()
    trainMethod(lasso, train_set)

#linear ridge
def linear_ridge():
    ridge = Ridge()
    trainMethod(ridge, train_set)

#decision tree
def decision_tree():
    decision = DecisionTreeRegressor()
    trainMethod(decision, train_set)
    
#gaussian process
def gaussian_process():
    kernel = DotProduct() + WhiteKernel()
    gpr = GaussianProcessRegressor(kernel=kernel,random_state=0)
    trainMethod(gpr, train_set)

In [10]:
print('linear')
linear()
print()
print('linear_lasso')
linear_lasso()
print()
print('linear_ridge')
linear_ridge()
print()
print('randomForestRegressor')
randomForestRegressor()
print()
print('decision_tree')
decision_tree()
print()
print('k_nearest')
k_nearest()
print()
print('svm')
support_vector_machines()
print()
print('gaussian_process')
gaussian_process()
print()
print('neural network')
trainNueralNetwork(train_set)

linear
Error: 79.366896

linear_lasso
Error: 80.752652

linear_ridge
Error: 79.366874

randomForestRegressor
Error: 22.981270

decision_tree
Error: 26.896598

k_nearest
Error: 20.915795

svm
Error: 79.793290

gaussian_process
Error: 78.169086

neural network


  if isinstance(inputs, collections.Sequence):


Error: 21.093743


[14.927877234494682,
 34.045571475213556,
 65.57720465886582,
 11.318383518321436,
 11.188185300287293,
 11.066417363493642,
 8.570994930615852,
 19.806670096303147,
 12.282645459132,
 22.153481464546584]