<a href="https://colab.research.google.com/github/alenacode/SG_MMM2020/blob/master/SG_betha.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTING LIBRARIES
# multivariate multi-step encoder-decoder lstm 
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from numpy import array, hstack, vstack
import keras
from keras.models import Sequential
from keras.layers import RepeatVector, LSTM, Dense, TimeDistributed
from sklearn.model_selection import train_test_split

# PREPARING OF INPUT DATA
# get configurations from dir
def get_data():
    configs = []
    for E in np.arange(52, -53, -4):
        with open("data/1/" + str(E) + ".dat", "r") as sample:
            lattice = []
            for line in sample:
                lattice = [int(x) for x in line.split()]
                configs.append(lattice)
    return vstack(configs)

# split a multivariate sequence into samples
def split_sequences(dataset, num_of_timestamps, num_of_predictions):
    X, Y = list(), list()
    for i in range(len(dataset)):
        # find the end of this pattern
        end_ix = i + num_of_timestamps
        out_end_ix = end_ix + num_of_predictions
        
        # check if we are beyond the dataset
        if out_end_ix > len(dataset):
            break
        
        # gather input and output parts of the pattern
        seq_x, seq_y = dataset[i:end_ix], dataset[end_ix:out_end_ix]
        X.append(seq_x)
        Y.append(seq_y)
    return array(X), array(Y)

# horizontally stack columns
data = get_data()
print("Received data")

# choose a number of time steps (1 - сколько мы будем брать конфигураций для каждого шага LSTM, 
#								 2 - на сколько шагов вперед хотим предсказать)
num_of_timestamps, num_of_predictions = 3, 2

# covert into input/output
X, Y = split_sequences(data, num_of_timestamps, num_of_predictions)
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.3, random_state = 0)
features = X_train.shape[2]
print("Splitted sequences to X_train, X_test, Y_train, Y_test")

# DEFINING & TRAINING THE MODEL
# define model
model = Sequential()
model.add(LSTM(32, activation = 'tanh', input_shape = (num_of_timestamps, features)))
model.add(RepeatVector(num_of_predictions))
model.add(LSTM(32, activation = 'tanh', return_sequences = True))
model.add(TimeDistributed(Dense(features)))
model.compile(optimizer = 'adam', loss = 'mse')

# fit model and get loss
model.fit(X_train, Y_train, epochs = 270, verbose = 0)
print("Trained the model")
loss = model.evaluate(X_test, Y_test, verbose = 0)
print("Test loss:", loss)

# PREDICTION
# demonstrate prediction
prediction = model.predict(X_test)
prediction = array([-1 if i <= -0.5 else 1 for i in prediction.ravel()])

# visualize (6 configs)
plt.style.use('ggplot')
plt.figure(figsize=(20, 7))
plt.plot(Y_test.ravel()[:216], label = "Real value")
plt.plot(prediction.ravel()[:216], label = "Predicted value")
plt.legend()

In [None]:
# IMPORTING LIBRARIES
# multivariate multi-step encoder-decoder lstm 
%matplotlib inline
import matplotlib.pyplot as plt

import keras
import numpy as np
from numpy import array, hstack, vstack
from keras.models import Sequential
from keras.layers import RepeatVector, LSTM, Dense, TimeDistributed
from sklearn.model_selection import train_test_split

from datetime import datetime
import time
# PREPARING OF INPUT DATA

# get configurations from dir
def get_data():
    configs = []
    testdata = []
    for j in np.arange(1, 6, +1):
        for E in np.arange(-4, -41, -4):
            with open("data/" + str(j) + "/" + str(E) + ".dat", "r") as sample:
                lattice = []
                for line in sample:
                    lattice = [int(x) for x in line.split()]
                    if(j == 5):
                        testdata.append(lattice)
                    else:
                        configs.append(lattice)
    configs.append(testdata[:200])
    return vstack(testdata[200:]), vstack(configs)

# split a multivariate sequence into samples
def split_sequences(dataset, num_of_timestamps, num_of_predictions):
    X, Y = list(), list()
    for i in range(len(dataset)):
        # find the end of this pattern
        end_ix = i + num_of_timestamps
        out_end_ix = end_ix + num_of_predictions
        
        # check if we are beyond the dataset
        if out_end_ix > len(dataset):
            break
        
        # gather input and output parts of the pattern
        seq_x, seq_y = dataset[i:end_ix], dataset[end_ix:out_end_ix]
        X.append(seq_x)
        Y.append(seq_y)
    return array(X), array(Y)

# horizontally stack columns
testdata, data = get_data()
print(len(data))
print(len(testdata))
print("Received data")

# choose a number of time steps
num_of_timestamps, num_of_predictions = 3, 2

# covert into input/output
# X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 0)
X_train, Y_train = split_sequences(data, num_of_timestamps, num_of_predictions)
X_test, Y_test = split_sequences(testdata, num_of_timestamps, num_of_predictions)
features = X_train.shape[2]
print("Splitted sequences to X_train, X_test, Y_train, Y_test")

# DEFINING & TRAINING THE MODEL
# define model
model = Sequential()
model.add(LSTM(32, activation = 'tanh', input_shape = (num_of_timestamps, features)))
model.add(RepeatVector(num_of_predictions))
model.add(LSTM(32, activation = 'tanh', return_sequences = True))
model.add(TimeDistributed(Dense(features)))
model.compile(optimizer = 'adam', loss = 'mse')

# fit model and get loss
model.fit(X_train, Y_train, epochs = 80, verbose = 0)
print("Trained the model")
loss = model.evaluate(X_test, Y_test, verbose = 0)
print("Test loss:", loss)

# PREDICTION
# demonstrate prediction
prediction = model.predict(X_test)
# prediction = array([round(i) for i in prediction.ravel()])
# prediction = array([-1 if i <= -0.5 else 1 for i in prediction.ravel()])

# visualize (6 configs)
plt.style.use('ggplot')
plt.figure(figsize=(20, 7))
plt.plot(Y_test.ravel()[:216], label = "Real value")
plt.plot(prediction.ravel()[:216], label = "Predicted value")
plt.legend()