In [None]:
import matplotlib.pyplot as plt
import tensorflow as tf
import os
import cv2
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


In [None]:
target_size=128
batch_size=8

main_data_path='Data_Preprocessing/data_all/Fe_Fi_NM'

#Original Data location
DataFeFiNM_classFe = main_data_path+'/class_fe'
DataFeFiNM_classFi = main_data_path+'/class_fi'
DataFeFiNM_classNM = main_data_path+'/class_nm'

#Randomly transformed data location
save_dir_DataFeFiNM_classFe=main_data_path+'/Ferromagnetic'
save_dir_DataFeFiNM_classFi=main_data_path+'/Ferrimagnetic'
save_dir_DataFeFiNM_classNM=main_data_path+'/Non_Magnetic'

In [None]:
def transform_data(DataLocation,TransformedDataLocation,batch_size,target_size):
    datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1/255.,
                                                            width_shift_range=0.01,
                                                            height_shift_range=0.01,
                                                            shear_range=0.1,
                                                            zoom_range=0.1,
                                                            horizontal_flip=True,
                                                            vertical_flip=True
                                                                    )
    data_all = datagen.flow_from_directory(DataLocation , 
                                            target_size=(target_size,target_size),
                                            batch_size= batch_size,
                                            color_mode='grayscale',
                                            seed=None,
                                            shuffle=True,
                                            class_mode='binary',
                                            save_to_dir=TransformedDataLocation)
    
    #transformed data is generated each time data_all is called 
    total=0
    for image in data_all:
        total +=1
        if total >data_all.num_batches*10: #10 times larger size than original size
            break
    print(f'About {data_all.samples*10} randomly transformed data located in {TransformedDataLocation}')
    return data_all


*Transform Ferromagnetic DOS Image*

In [None]:
data_all=transform_data(DataFeFiNM_classFe,save_dir_DataFeFiNM_classFe,batch_size,target_size)

*Transform Ferrimagnetic DOS Image*

In [None]:
data_all=transform_data(DataFeFiNM_classFi,save_dir_DataFeFiNM_classFi,batch_size,target_size)

*Transform Non-Magnetic DOS Image*

In [None]:
data_all=transform_data(DataFeFiNM_classNM,save_dir_DataFeFiNM_classNM,batch_size,target_size)

In [137]:
dir_list = os.listdir(main_data_path) 
LABELS=['Ferrimagnetic','Ferromagnetic','Non_Magnetic']
data=[]
labels=[]
for label in dir_list:
    if label in LABELS:
        all_image_path=main_data_path+'/'+label
        for imagefile in os.listdir(all_image_path):
            imagepath=all_image_path+'/'+imagefile
            image = cv2.imread(imagepath,cv2.IMREAD_GRAYSCALE)
            data.append(image)
            labels.append(label)

In [138]:
data = np.array(data, dtype="float") / 255.0

# encode class values as integers
encoder = LabelEncoder()
encoder.fit(labels)
labels_ = encoder.transform(labels)
labels__=tf.keras.utils.to_categorical(labels_)

In [None]:
train_test_split_dir='Data_Preprocessing/data_train_test_split/Fe_Fi_NM'
(trainX, testX, trainY, testY) = train_test_split(data, labels__,
	test_size=0.25, stratify=labels, random_state=42)
trainX=trainX.reshape(trainX.shape[0],target_size,target_size,1)
testX=testX.reshape(testX.shape[0],target_size,target_size,1)
np.save(train_test_split_dir+'/trainX',trainX)
np.save(train_test_split_dir+'/trainY',trainY)
np.save(train_test_split_dir+'/testX',testX)
np.save(train_test_split_dir+'/testY',testY)