# RCT Text Classification using CNN   

This code is to build a pipeline to classify RCT (Randomized Controlled Trials) on Pubmed texts using a CNN (Convolutional Neural Networks) deep learning model.  
  
    
Author: Jenna Kim  
Created: 2020/10/31  
Last Modified: 2022/10/14


## Reference  
* A simple Convolutional Neural Network summary for binary image classification with Keras  
https://medium.com/swlh/a-simple-convolutional-neural-network-summary-for-binary-image-classification-with-keras-d58caf2f84a4  
* Can you run Keras models on GPU?  
https://www.run.ai/guides/gpu-deep-learning/keras-gpu  
* How can you use GPUs with TensorFlow?  
https://www.run.ai/guides/gpu-deep-learning/tensorflow-gpu  



# 1. Import library

## 1-1. Load libraries

In [None]:
import timeit
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow
import keras
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')

from nltk.stem import PorterStemmer, WordNetLemmatizer
from keras.models import Sequential
from keras import layers
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, classification_report

## 1-2. Check GPU settings

Make sure to use "conda_amazonei_tensorflow2_p36" kernel to run this code in AWS Sagemaker. If not setup, you can find it go to Kernel -> Change kernel

In [None]:
#!pip list

#!lspci | grep -i nvidia

In [None]:
# check the version of Tensorflow and Keras
# Tensorflow (ver 2.3.4); Keras (ver 2.4.3)

print("Tensorflow version: ", tensorflow.__version__)
print("Keras version: ", keras.__version__)

In [None]:
# check if gpu is available

import tensorflow as tf

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("GPU device: ", tf.config.list_physical_devices('GPU'))

TensorFlow supports running computations on a variety of types of devices, including CPU and GPU. They are reperesented with string identifiers. For example:  

"/device:CPU:0" : CPU of your machine  
"/physical_device:GPU:0": GPU visible to TensorFlow.  

TensorFlow code, with Keras included, can run on a GPU by default without requiring explicit code configuration. If both CPU and GPU are available, TensorFlow will run the GPU-capable code unless otherwise specified.

In [None]:
# To see which devices your operations and tensors are assignend to
tf.debugging.set_log_device_placement(True)

#a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
#b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
#c = tf.matmul(a, b)
#print(c)

In [None]:
# check GPU memory and & utilization
#!nvidia-smi

# To check the GPU memory usage while the process is running
# open a terminal in the directory (Go to New-> Terminal) and type the above code

## 2. Functions

In [None]:
def load_data_txt(filename, colname, record):
    """
    Read in input file and load data
    
    filename: csv file
    colname: column name for texts
    record: text file to save summary
    
    """
    
    ## 1. Read in data from input file
    df = pd.read_csv(filename, sep="\t", encoding='utf-8', header=None, names=['pmid', 'pubtype', 'year', 'title', 'abstract'])
    
    # No of rows and columns
    print("No of Rows: {}".format(df.shape[0]), file=record)
    print("No of Columns: {}".format(df.shape[1]), file=record) 
    print("No of Rows: {}".format(df.shape[0]))
    print("No of Columns: {}".format(df.shape[1]))

    ## 2. Select data needed for processing & convert labels
    df = df[['pmid', 'title', 'abstract', 'pubtype']]

    ## 3. Cleaning data 
    #Trim unnecessary spaces for strings
    df["title"] = df["title"].apply(lambda x: x.strip())
    df["abstract"] = df["abstract"].apply(lambda x: x.strip())

    # Remove null values 
    df=df.dropna()

    print("No of rows (After dropping null): {}".format(df.shape[0]), file=record)
    print("No of columns: {}".format(df.shape[1]), file=record)
    print("No of rows (After dropping null): {}".format(df.shape[0]))
    print("No of columns: {}".format(df.shape[1]))

    # Remove duplicates and keep first occurrence
    df.drop_duplicates(subset=['pmid'], keep='first', inplace=True)

    print("No of rows (After removing duplicates): {}".format(df.shape[0]), file=record)
    print("No of rows (After removing duplicates): {}".format(df.shape[0]))

    ## 4. Select text column
    if colname == "title":
        df = df[['pmid', 'title', 'pubtype']]
        df.rename({"title": "sentence", "pubtype": "label"}, axis=1, inplace=True)
    elif colname == "abs":
        df = df[['pmid', 'abstract', 'pubtype']]
        df.rename({"abstract": "sentence", "pubtype": "label"}, axis=1, inplace=True)
    elif colname == "mix":
        df['mix'] = df[['title','abstract']].apply(lambda x : '{} {}'.format(x[0],x[1]), axis=1)
        df = df[['pmid', 'mix', 'pubtype']]
        df.rename({"mix": "sentence", "pubtype": "label"}, axis=1, inplace=True)

    # Check the first few instances
    print("\n<Data View: First Few Instances>", file=record)
    print("\n", df.head(), file=record)
    print("\n<Data View: First Few Instances>")
    print("\n", df.head()) 
    
    # No of lables and rows 
    print('\nClass Counts(label, row): Total', file=record)
    print(df.label.value_counts(), file=record)   
    print('\nClass Counts(label, row): Total')
    print(df.label.value_counts())

    ## 5. Split into X and y
    X, y = df.iloc[:, :-1], df.iloc[:, -1]
     
    return X, y

In [None]:
def preprocess_data(X_data_raw):
    """
       Preprocess data with lowercase conversion, punctuation removal, tokenization, stemming
       
       X_data_raw: X data in dataframe
       
    """
    
    X_data=X_data_raw.iloc[:, -1].astype(str)
   
    # 1. convert all characters to lowercase
    X_data = X_data.map(lambda x: x.lower())
    
    # 2. remove punctuation
    X_data = X_data.str.replace('[^\w\s]', '')
    
    # 3. tokenize sentence
    X_data = X_data.apply(nltk.word_tokenize)

    # 4. remove stopwords
    stopword_list = stopwords.words("english")
    X_data = X_data.apply(lambda x: [word for word in x if word not in stopword_list])

    #print("\n<Data tokenzized and stopwords removed>\n", X_data)

    # 5. lemmatize
    #lemmatizer = WordNetLemmatizer()
    #X_data = X_data.apply(lambda x: [lemmatizer.lemmatize(token) for token in x])
    
    # 5. stemming
    stemmer = PorterStemmer()
    X_data = X_data.apply(lambda x: [stemmer.stem(y) for y in x])
    
    # 6. removing unnecessary space
    X_data = X_data.apply(lambda x: " ".join(x)) 
    
    print("\n<After preprocessing training data>")
    print(X_data)
    
    return X_data

In [None]:
# plot loss and accuracy of training & validation
def plot_history(history):
    
    plt.style.use('ggplot')

    #acc = history.history['accuracy']
    #val_acc = history.history['val_accuracy']
    acc = history.history['binary_accuracy']
    val_acc = history.history['val_binary_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    
    x = range(1, len(acc) + 1)
    
    plt.figure(figsize=(12,5))
    plt.subplot(1,2,1)
    plt.plot(x, acc, 'b', label='Training acc')
    plt.plot(x, val_acc, 'r', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.legend()
    
    plt.subplot(1,2,2)
    plt.plot(x, loss, 'b', label='Training loss')
    plt.plot(x, val_loss, 'r', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()

In [None]:
def create_cnn_model(maxlen, vocab_size, record):
    
    embedding_dim = 100
  
    # define the model
    model = Sequential()

    # adding embedding layer
    model.add(layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=maxlen))

    # adding a first convolutional layer
    model.add(layers.Conv1D(512, 2, activation='relu'))
  
    # pooling layer
    #model.add(layers.GlobalMaxPooling1D())
    model.add(layers.MaxPooling1D())

    # adding a second convolutional layer with 512 filters
    model.add(layers.Conv1D(512, 3, activation='relu'))

    # second pooling layer
    model.add(layers.MaxPooling1D())
  
    # flattening
    model.add(layers.Flatten())
    
    # add dropout to prevent overfitting
    model.add(layers.Dropout(0.5))
  
    # full connection
    #model.add(layers.Dense(units=512))
    #model.add(layers.Dense(units=1, activation='softmax'))  # for multi-classification
    model.add(layers.Dense(units=1, activation='sigmoid'))
  
    # compile the model
    model.compile(optimizer='adam', 
                  loss='binary_crossentropy',
                  metrics=['binary_accuracy',
                           keras.metrics.Precision(name='precision'),
                           keras.metrics.Recall(name='recall')])
  
    # summarize the model
    print("\n************* Model Summary *************", file=record)
    print(model.summary(), file=record)

    print("\n************* Model Summary *************")
    print(model.summary())

    return model

# 3. Word Embedding: if want to use a precomputed embedding space: GloVE  

File ('glove.6B.zip') is downloaded from https://nlp.stanford.edu/projects/glove/  

In [None]:
def create_embedding_matrix(filepath, word_index, embedding_dim):  
    vocab_size = len(word_index) + 1  # adding 1 because of reserved 0 index
    embedding_matrix = np.zeros((vocab_size, embedding_dim))
    
    with open(filepath, encoding='utf-8') as f:
        for line in f:
            word, *vector = line.split()
            if word in word_index:
                idx = word_index[word]
                embedding_matrix[idx] = np.array(
                vector, dtype=np.float32)[:embedding_dim]
                
    return embedding_matrix

In [None]:
# after downloading embedding dict, put the file in the same directory as this notebook
#filename = 'glove.6B/glove.6B.300d.txt'

def create_cnn_model_with_dic(filename, tokenizer, maxlen, vocab_size, record):
    
    embedding_dim = 100
    
    # To use embedding matrix to model
    embedding_matrix = create_embedding_matrix(filename, tokenizer.word_index, embedding_dim)
    
    # how many nonzero embedding vectors
    nonzero_elements = np.count_nonzero(np.count_nonzero(embedding_matrix))
    print(nonzero_elements / vocab_size) 
  
    # define the model
    model = Sequential()
    
    # use this to include embedding matrix
    model.add(layers.Embedding(input_dim=vocab_size, 
                               output_dim=embedding_dim, 
                               weights=[embedding_matrix], 
                               input_length=maxlen, 
                               trainable=False))
    
    ## convolutional layers
    # adding a first convolutional layer
    model.add(layers.Conv1D(512, 2, activation='relu'))
  
    # pooling layer
    #model.add(layers.GlobalMaxPooling1D())
    model.add(layers.MaxPooling1D())

    # adding a second convolutional layer with 512 filters
    model.add(layers.Conv1D(512, 3, activation='relu'))

    # second pooling layer
    model.add(layers.MaxPooling1D())
  
    # flattening
    model.add(layers.Flatten())
    
    # add dropout to prevent overfitting
    model.add(layers.Dropout(0.5))
  
    # full connection
    model.add(layers.Dense(units=1, activation='sigmoid'))
  
    ## compile the model
    model.compile(optimizer='adam', 
                  loss='binary_crossentropy',
                  metrics=['binary_accuracy',
                           keras.metrics.Precision(name='precision'),
                           keras.metrics.Recall(name='recall')])
  
    ## summarize the model
    print("\n************* Model Summary *************", file=record)
    print(model.summary(), file=record)

    print("\n************* Model Summary *************")
    print(model.summary())

    return model

# 4. Main Code

In [None]:
def main(input_file, 
         colname, 
         max_len, 
         batch_size,
         epochs,
         eval_on, 
         result_file,
         datasize_change,
         ratio):
    
    """
       Main function for processing data, model training, and evaluation
       
       input_file: input file
       colname: colume name for selection between title and abstract
       max_len: max length of tokens
       batch_size: batch size for traing model
       epochs: number of training and validation loop
       eval_on: indicator of model evaluation on or off
       result_file: name of output file of evaluation
       datasize_change: indicator of data size change on or off
       ratio: proportion of data size
       
    """
    
    #### 0. open result file for records
    f=open(result_file, "a")
    
    # Check the version of Tensorflow and Keras used
    print("\n************** Version **************", file=f)
    print("\n************** Version **************")
    print("Tensorflow version: ", tensorflow.__version__, file=f)
    print("Keras version: ", keras.__version__, file=f)
    print("Tensorflow version: ", tensorflow.__version__)
    print("Keras version: ", keras.__version__)
    
    # Check processing time
    proc_start_time = timeit.default_timer()

    #### 1. Load data 
    print("\n************** Loading Data ************\n", file=f)
    print("\n************** Loading Data ************\n")
    X, y = load_data_txt(input_file, colname, record=f)
    
    print("\n<First Sentence>\n{}".format(X.sentence[0]), file=f)
    print("\n<First Sentence>\n{}".format(X.sentence[0]))
    
    #### 2. Train and test split
    
    print("\n************** Spliting Data **************\n", file=f)
    print("\n************** Spliting Data **************\n")
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
    X_val, X_test, y_val, y_test = train_test_split(X_test,y_test, test_size=0.5, random_state=42, stratify=y_test)
    
    # For testing only: small size data
    #X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.01, random_state=42, stratify=y)
    #X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, train_size=0.001, random_state=42, stratify=y_test)
    #X_notuse, X_test, y_notuse, y_test = train_test_split(X_test, y_test, test_size=0.001, random_state=42, stratify=y_test)
    
    print("Train Data: {}".format(X_train.shape), file=f)
    print("Val Data: {}".format(X_val.shape), file=f)
    print("Test Data: {}".format(X_test.shape), file=f)
    
    print("Train Data: {}".format(X_train.shape))
    print("Val Data: {}".format(X_val.shape))
    print("Test Data: {}".format(X_test.shape))
    
    print('\nClass Counts(label, row): Train', file=f)
    print(y_train.value_counts(), file=f)
    print('\nClass Counts(label, row): Val', file=f)
    print(y_val.value_counts(), file=f)
    print('\nClass Counts(label, row): Test', file=f)
    print(y_test.value_counts(), file=f)

    print('\nClass Counts(label, row): Train')
    print(y_train.value_counts())
    print('\nClass Counts(label, row): Val')
    print(y_val.value_counts())
    print('\nClass Counts(label, row): Test')
    print(y_test.value_counts())

    print("\n<X_train Data>", file=f)
    print(X_train.head(), file=f)
    print("\n<X_train Data>")
    print(X_train.head())

    print("\n<X_val Data>", file=f)
    print(X_val.head(), file=f)
    print("\n<X_val Data>")
    print(X_val.head())

    print("\n<X_test Data>", file=f)
    print(X_test.head(), file=f)
    print("\n<X_test Data>")
    print(X_test.head())

    #### 3. Data size change
    
    if datasize_change:
        print("\n************** Data Size Change *************\n", file=f)
        print("Data Ratio (size): {} ({})".format(ratio, int(X_train.shape[0]*ratio)), file=f)
        print("\n************** Data Size Change *************\n")
        print("Data Size: {} ({})".format(ratio, int(X_train.shape[0]*ratio)))
        
        X_train, _, y_train, _ = train_test_split(X_train, y_train, train_size=ratio, random_state=42, stratify=y_train)  
    
    # Reset index
    X_train=X_train.reset_index(drop=True)
    X_val=X_val.reset_index(drop=True)
    X_test=X_test.reset_index(drop=True)
    y_train=y_train.reset_index(drop=True)
    y_val=y_val.reset_index(drop=True)
    y_test=y_test.reset_index(drop=True)
    
    print("\n************** Processing Data **************", file=f)
    print("\n************** Processing Data **************")
    print("\nTrain Data: {}".format(X_train.shape), file=f)
    print("Val Data: {}".format(X_val.shape), file=f)
    print("Test Data: {}".format(X_test.shape), file=f)
    print("\nTrain Data: {}".format(X_train.shape))
    print("Val Data: {}".format(X_val.shape))
    print("Test Data: {}".format(X_test.shape))
    
    print('\nClass Counts(label, row): Train', file=f)
    print(y_train.value_counts(), file=f)
    print('\nClass Counts(label, row): Val', file=f)
    print(y_val.value_counts(), file=f)
    print('\nClass Counts(label, row): Test', file=f)
    print(y_test.value_counts(), file=f)
    print("\n", file=f)

    print('\nClass Counts(label, row): Train')
    print(y_train.value_counts())
    print('\nClass Counts(label, row): Val')
    print(y_val.value_counts())
    print('\nClass Counts(label, row): Test')
    print(y_test.value_counts())
    print("\n")

    print("\n<X_train Data>", file=f)
    print(X_train.head(), file=f)
    print("\n<X_train Data>")
    print(X_train.head())

    print("\n<X_val Data>", file=f)
    print(X_val.head(), file=f)
    print("\n<X_val Data>")
    print(X_val.head())

    print("\n<X_test Data>", file=f)
    print(X_test.head(), file=f)
    print("\n<X_test Data>")
    print(X_test.head())
    
    ## 4. Transformation
    print("\n************** Transforming Text into Vectors **************", file=f)
    print("\n************** Transforming Text into Vectors **************")
    sentences_train = X_train.iloc[:, -1]
    sentences_val = X_val.iloc[:, -1]
    sentences_test = X_test.iloc[:, -1]

    print("\nsentences_train: ", sentences_train.shape)
    print(sentences_train.head())
    print("\nsentences_val: ", sentences_val.shape)
    print(sentences_val.head())
    print("\nsentences_test: ", sentences_test.shape)
    print(sentences_test.head())
    
    # prepare tokenizer
    #tokenizer = Tokenizer(num_words=5000)
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts(sentences_train)
    
    vocab_size = len(tokenizer.word_index) + 1

    print("\nvocab size: ", vocab_size, file=f)
    print("\nvocab size: ", vocab_size)

    # integer encode the texts
    X_train = tokenizer.texts_to_sequences(sentences_train)
    X_val = tokenizer.texts_to_sequences(sentences_val)
    X_test = tokenizer.texts_to_sequences(sentences_test) 

    print("\nFirst Instance: Train\n", sentences_train[0], file=f)
    print("\n", X_train[0], file=f)
    print("\nFirst Instance: Val\n", sentences_val[0], file=f)
    print("\n", X_val[0], file=f)
    print("\nFirst Instance: Test\n", sentences_test[0], file=f)
    print("\n", X_test[0], file=f)

    print("\nFirst Instance: Train\n", sentences_train[0])
    print("\n", X_train[0])
    print("\nFirst Instance: Val\n", sentences_val[0])
    print("\n", X_val[0])
    print("\nFirst Instance: Test\n", sentences_test[0])
    print("\n", X_test[0])
    
    # pad texts to a pre-defined max length
    X_train = pad_sequences(X_train, padding='post', maxlen=max_len)
    X_val = pad_sequences(X_val, padding='post', maxlen=max_len)
    X_test = pad_sequences(X_test, padding='post', maxlen=max_len)
    
    print("<X_train vector>\n", X_train[0, :], file=f)
    print("<X_train vector>\n", X_train[0, :])

    #### 5. Model Fitting
    print("\n************** Training Model: CNN **************", file=f)
    print("\n************** Training Model: CNN **************")

    cnn_model = create_cnn_model(max_len, vocab_size, record=f)

    history = cnn_model.fit(X_train, 
                            y_train, 
                            epochs=epochs,
                            verbose=True,
                            validation_data=(X_val, y_val),
                            batch_size=batch_size)

    # plot loss & accuracy
    print("\n")
    plot_history(history)
    
    ## 6. Evaluating model performance
    print('\n************** Model Evaluation **************', file=f)
    print('\n************** Model Evaluation **************')

    if eval_on:
        loss, acc, pre, rec = cnn_model.evaluate(X_test, y_test, verbose=False)
        f1 = 2 * ((pre*rec)/(pre+rec))

        print("\nTest evaluation: loss({:.4f}), acc({:.4f}), pre({:.4f}), rec({:.4f}))".format(loss, acc, pre, rec), file=f)
        print("\nLoss: {:.4f}".format(loss), file=f)
        print("\nAccuracy: {:.4f}".format(acc), file=f)
        print("\nPrecision Recall F1", file=f)
        print("{:.4f}\t{:.4f}\t{:.4f}".format(pre, rec, f1), file=f)

        print("\nTest evaluation: loss({:.4f}), acc({:.4f}), pre({:.4f}), rec({:.4f}))".format(loss, acc, pre, rec))
        print("\nLoss: {:.4f}".format(loss))
        print("\nAccuracy: {:.4f}".format(acc))
        print("\nPrecision Recall F1")
        print("{:.4f}\t{:.4f}\t{:.4f}".format(pre, rec, f1))

    else:
        print("No Evaluation Conducted", file=f)
        print("No Evaluation Conducted")

    # Create a classification report showing accuracy, precision, recall, f1
    #predictions = cnn_model.predict(X_test)
    #y_pred = np.argmax(predictions, axis=1)
    predictions = cnn_model.predict(X_test)
    y_pred = (predictions > 0.5).astype("int32")

    print('\nConfusion Matrix:', file=f)
    print(confusion_matrix(y_test, y_pred), file=f)
    print('\nConfusion Matrix:')
    print(confusion_matrix(y_test, y_pred))
        
    print("\n******** Classification Report ********", file=f)
    print(classification_report(y_test, y_pred, digits=4), file=f)
    print("\n******** Classification Report ********")
    print(classification_report(y_test, y_pred, digits=4))
    
    print("\nOutput file:'" + result_file + "' Created", file=f)
    print("\nOutput file:'" + result_file + "' Created")
    
    # check processing time
    proc_elapsed = timeit.default_timer() - proc_start_time
    print("\nTotal Processing Time: {}min\n".format(round(proc_elapsed/60)), file=f)
    print("\nTotal Processing Time: {}min\n".format(round(proc_elapsed/60)))
    
    f.close()

# 5. Run code 

In [None]:
#%%time

# "%%time" is commentized for running using linux terminal
# If using a Jupyter notebook, uncommentize above code to check running time

if __name__== "__main__":
    
    ######## 1. Set Parameter Values ########
    
    #### 1-1. Input file name & which column 
    input_filename="output_rct.txt" 
    column_name = "mix"                                      # 'title' for title text; 'abs' for abstract; 'mix' for title + abstract
    
    #### 1-2. Data size change?
    datachange_on=0                                            # 0 for no change; 1 for change of data size
    ratio_list=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]   # ratio for data size

    #### 1-3. Evaluating model performance?     
    eval_on=1                                                  # 0 for no; 1 for yes (confusion matrix/classification report)
    
    #### 1-4. Hyperparameters for CNN
    MAX_LEN = 512                                              # 150 for title; 512 for abs (Consistent with BERT parameters))
    BATCH_SIZE = 16                                            # Batch size: 16 or 32
    EPOCHS = 4                                                 # Number of epochs: 2,3,4

    
    ######## 2. Run Main Fuction ########
    if datachange_on:               
        for ratio in ratio_list: 
            eval_file = "eval_cnn_" + str(ratio) + "_" + column_name + ".txt"
            
            main(input_file=input_filename,
                 colname=column_name, 
                 max_len=MAX_LEN, 
                 batch_size=BATCH_SIZE,
                 epochs=EPOCHS,
                 eval_on=eval_on, 
                 result_file=eval_file,
                 datasize_change=datachange_on,
                 ratio=ratio)
    else:
        eval_file = "eval_cnn_all_" + column_name + ".txt" 
            
        main(input_file=input_filename,
             colname=column_name, 
             max_len=MAX_LEN, 
             batch_size=BATCH_SIZE,
             epochs=EPOCHS,
             eval_on=eval_on, 
             result_file=eval_file,
             datasize_change=datachange_on,
             ratio=1)
        
    print("\n************** Processing Completed **************\n")