This program combines the files with the generated spin configurations (generated with generate_data.ipynb) such that three files are produced. <br>
One training set file, <br>
one validation set file, <br>
one test set file. <br>
These files are stored in a subfolder of the parent path calles "spin_configurations"

In [1]:
import numpy as np
import os
import math
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
import timeit

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
# constants 
T_c = 2 / (math.log(1 + math.sqrt(2)))

cpath = os.getcwd()
ppath = os.path.abspath(os.path.join(cpath, os.pardir))



#### Read Data + add temperature and magnetization information

In [1]:
# variables to change
n=40 # lattice size of the ising model
samples_per_file = 1000 # sunber of spin configurations contained in one file

# if generated with default settings of generate_data.ipynb then one set includes 16000 samples
no_sets = 1

In [3]:

n_total = n*n

kbT_sequence = [1.5, 1.7, 1.8, 1.9, 2.0, 2.1, 2.2, T_c, 2.3, 2.4, 2.5, 2.6, 2.7, 2.8, 3.0, 3.5]
no_classes = len(kbT_sequence)
dataset = np.zeros((len(kbT_sequence)*samples_per_file*no_sets,n_total + 3), dtype='float')
print(dataset.shape)

filecounter = 0
for j in range(no_sets):
    foldername = 'spin_configurations/set'+str(j+1)
    for i, kbT in enumerate(kbT_sequence):
        
        filename = 'spin_configurations_'+str(n)+'_'+str(kbT)+'.txt'
        filepath = os.path.join(ppath, foldername, filename)
        
        configurations = np.genfromtxt(filepath, dtype='float', delimiter=',', skip_header=2)
        dataset[samples_per_file*filecounter:samples_per_file*(filecounter+1),0:-3] = configurations
        
        # temperatures
        dataset[samples_per_file*filecounter:samples_per_file*(filecounter+1),-3] = i
        
        # magnetizations
        configurations_2 = (2 * configurations) - 1
        dataset[samples_per_file*filecounter:samples_per_file*(filecounter+1),-2] = np.mean(configurations_2, axis=1, dtype='float64')

        if kbT <= T_c:
            dataset[samples_per_file*filecounter:samples_per_file*(filecounter+1),-1] = 1
        else:
            dataset[samples_per_file*filecounter:samples_per_file*(filecounter+1),-1] = 0
            
        filecounter += 1

(16000, 1603)


#### Save Data to files

In [4]:
no_samples = len(dataset)


# the data split is performed with those ratios: 70% training, 15% validation, 15% test.
trainsize=0.7
validsize=0.15
testsize=0.15

train_idx=int(samples_per_file*trainsize)
valid_idx=int(train_idx + samples_per_file*validsize)
test_idx=int(valid_idx + samples_per_file*testsize)

training_data = dataset[0:train_idx]
validation_data = dataset[train_idx:valid_idx]
test_data = dataset[valid_idx:test_idx]

for i in range(1,no_classes*no_sets):
    current_idx = samples_per_file*i

    training_data = np.concatenate((training_data,dataset[current_idx : current_idx + train_idx]))
    validation_data = np.concatenate((validation_data,dataset[current_idx + train_idx : current_idx + valid_idx]))
    test_data = np.concatenate((test_data,dataset[current_idx + valid_idx : current_idx + test_idx]))
    
np.random.shuffle(training_data)
np.random.shuffle(validation_data)
np.random.shuffle(test_data)

print(training_data.shape,validation_data.shape,test_data.shape)


training_features_filename = "training_features_" + str(n) + ".txt"
validation_features_filename = "validation_features_" + str(n) + ".txt"
test_features_filename = "test_features_" + str(n) + ".txt"

training_temperatures_filename = "training_temperatures_" + str(n) + ".txt"
validation_temperatures_filename = "validation_temperatures_" + str(n) + ".txt"
test_temperatures_filename = "test_temperatures_" + str(n) + ".txt"

training_magnetizations_filename = "training_magnetizations_" + str(n) + ".txt"
validation_magnetizations_filename = "validation_magnetizations_" + str(n) + ".txt"
test_magnetizations_filename = "test_magnetizations_" + str(n) + ".txt"

training_phases_filename = "training_phases_" + str(n) + ".txt"
validation_phases_filename = "validation_phases_" + str(n) + ".txt"
test_phases_filename = "test_phases_" + str(n) + ".txt"


np.savetxt(os.path.join(ppath, 'spin_configurations', training_features_filename), training_data[:,:-3].astype('int'), fmt='%i', delimiter=',')
np.savetxt(os.path.join(ppath, 'spin_configurations', validation_features_filename), validation_data[:,:-3].astype('int'), fmt='%i', delimiter=',')
np.savetxt(os.path.join(ppath, 'spin_configurations', test_features_filename), test_data[:,:-3].astype('int'), fmt='%i', delimiter=',')

# saving the temperature indices in a separater file
np.savetxt(os.path.join(ppath, 'spin_configurations', training_temperatures_filename), training_data[:,-3].astype('int'), fmt='%i', delimiter=',')
np.savetxt(os.path.join(ppath, 'spin_configurations', validation_temperatures_filename), validation_data[:,-3].astype('int'), fmt='%i', delimiter=',')
np.savetxt(os.path.join(ppath, 'spin_configurations', test_temperatures_filename), test_data[:,-3].astype('int'), fmt='%i', delimiter=',')

# saving the magnetizations in a separater file
np.savetxt(os.path.join(ppath, 'spin_configurations', training_magnetizations_filename), training_data[:,-2], fmt='%.18f', delimiter=',')
np.savetxt(os.path.join(ppath, 'spin_configurations', validation_magnetizations_filename), validation_data[:,-2], fmt='%.18f', delimiter=',')
np.savetxt(os.path.join(ppath, 'spin_configurations', test_magnetizations_filename), test_data[:,-2], fmt='%.18f', delimiter=',')

# saving the phase states in a separate file
np.savetxt(os.path.join(ppath, 'spin_configurations', training_phases_filename), training_data[:,-1].astype('int'), fmt='%i', delimiter=',')
np.savetxt(os.path.join(ppath, 'spin_configurations', validation_phases_filename), validation_data[:,-1].astype('int'), fmt='%i', delimiter=',')
np.savetxt(os.path.join(ppath, 'spin_configurations', test_phases_filename), test_data[:,-1].astype('int'), fmt='%i', delimiter=',')




(11200, 1603) (2400, 1603) (2400, 1603)
