In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import tensorflow as tf


# HYPER-PARAMETERS

In [3]:
TEST_SIZE = 0.2


# Optimization

In [4]:
def reduce_mem_usage(df : pd.DataFrame):
    start_mem = df.memory_usage().sum() / 1024 ** 2
    for col in df.columns:
        col_type = df[col].dtypes
        if str(col_type)[:5] == 'float':
            c_min = df[col].min()
            c_max = df[col].max()
            if c_min > np.finfo('f2').min and c_max < np.finfo('f2').max:
                df[col] = df[col].astype(np.float16)
            elif c_min > np.finfo('f4').min and c_max < np.finfo('f4').max:
                df[col] = df[col].astype(np.float32)
            else:
                df[col] = df[col].astype(np.float64)
        elif str(col_type)[:3] == 'int':
            c_min = df[col].min()
            c_max = df[col].max()
            if c_min > np.iinfo('i1').min and c_max < np.iinfo('i1').max:
                df[col] = df[col].astype(np.int8)
            elif c_min > np.iinfo('i2').min and c_max < np.iinfo('i2').max:
                df[col] = df[col].astype(np.int16)
            elif c_min > np.iinfo('i4').min and c_max < np.iinfo('i4').max:
                df[col] = df[col].astype(np.int32)
            elif c_min > np.iinfo('i8').min and c_max < np.iinfo('i8').max:
                df[col] = df[col].astype(np.int64)
        elif str(col_type)[:8] == 'datetime':
            df[col] = df[col].astype('category')
    end_mem = df.memory_usage().sum() / 1024 ** 2
    print('Потребление памяти меньше на',
         round(start_mem - end_mem, 2),
         'Мб (минус',
         round(100 * (start_mem - end_mem) / start_mem, 1),
         '%)')
    return df

# Data extract

In [38]:
methods = {
    "SHG Intensity" : [
        "SHG Intensity Mean",
        "SHG Intensity MAD",
        "SHG Intensity Contrast",
        "SHG Intensity Correlation",
        "SHG Intensity Entropy",
        "SHG Intensity ASM",
        "SHG Intensity IDM"
    ],
    "R-Ratio" : [
        "R-Ratio Mean",
        "R-Ratio MAD",
        "R-Ratio Contrast",
        "R-Ratio Correlation",
        "R-Ratio Entropy",
        "R-Ratio ASM"
    ],
    "Degree of Circular Polarization" : [
        "Degree of Circular Polarization Mean",
        "Degree of Circular Polarization MAD",
        "Degree of Circular Polarization Contrast",
        "Degree of Circular Polarization Correlation",
        "Degree of Circular Polarization Entropy",
        "Degree of Circular Polarization ASM",
        "Degree of Circular Polarization IDM"
    ],
    "SHG-CD" : [ 
        "SHG-CD MAD",
        "SHG-CD Contrast",
        "SHG-CD Correlation",
        "SHG-CD Entropy",
        "SHG-CD ASM",
        "SHG-CD IDM"
    ],
    "SHG-LD" : [
        "SHG-LD MAD",
        "SHG-LD Contrast",
        "SHG-LD Correlation",
        "SHG-LD Entropy",
        "SHG-LD ASM",
        "SHG-LD IDM"
    ],
    "Params" : [
        "2-Group Tag",
        "Pixel Density",
    ]
}

x_axis = sum([methods[key] for key in methods.keys() if key != "Params"], [])
# x_axis += [methods['Params'][1]]
y_axis = methods["Params"][0]
def getData(table_number):
    data = pd.read_excel(io="../../Data/41598_2022_13623_MOESM3_ESM.xlsx", 
    sheet_name=f"{1 << 2 * (table_number - 1)} Subimage Training")
    data = reduce_mem_usage(data)
    data = (data - data.min()) / (data.max() - data.min())
    #data["2-Group Tag"] = data[y_axis] == 2
    return data[x_axis].to_numpy(), data[y_axis].to_numpy()


# Init data

In [119]:
X_data, y_data = getData(5)
n_samples, n_features, batch_size, num_steps = *X_data.shape, 100, 20000

tf.compat.v1.disable_eager_execution()

X = tf.compat.v1.placeholder(tf.float32, shape=(batch_size, n_feauters))
y = tf.compat.v1.placeholder(tf.float32, shape=(batch_size,))

Потребление памяти меньше на 0.69 Мб (минус 75.4 %)


In [120]:
with tf.compat.v1.variable_scope('linear-regression'):
    k = tf.compat.v1.Variable(X_data[0].astype(np.float32).reshape((n_features, 1)), name='coef')
    b = tf.compat.v1.Variable(tf.zeros((1,)), name='bias')

In [124]:
y_pred = tf.matmul(X, k) + b
loss = tf.reduce_sum( tf.keras.losses.BinaryCrossentropy(from_logits=True)(y, y_pred) )
accuracy = tf.reduce_mean( tf.cast( tf.equal( tf.math.round(y_pred), y ), tf.float32) )
optimizer = tf.compat.v1.train.GradientDescentOptimizer(learning_rate=0.1).minimize(loss)

In [125]:
display_step = 1000
with tf.compat.v1.Session() as sess:
    sess.run(tf.compat.v1.initialize_all_variables())
    for i in range(num_steps):
        #print(sess.run([b]))
        indices = np.random.choice(n_samples, batch_size)
        X_batch, y_batch = X_data[indices], y_data[indices]
#         print(X_batch[0])
#         break
        _, loss_val, k_val, b_val, accuracy_ = sess.run([optimizer, loss, k, b, accuracy], feed_dict = {X : X_batch, y: y_batch})
        if (i + 1) % display_step == 0:
            print(f'Эпоха acc:{accuracy_*100}%,  {i + 1}: {loss_val}, b = {b_val}')

Эпоха acc:12.780000269412994%,  1000: 0.3707754909992218, b = [0.19009428]
Эпоха acc:10.559999942779541%,  2000: 0.27445918321609497, b = [0.25989044]
Эпоха acc:10.360000282526016%,  3000: 0.20695272088050842, b = [0.17563729]
Эпоха acc:7.14000016450882%,  4000: 0.25611811876296997, b = [0.06005829]
Эпоха acc:6.840000301599503%,  5000: 0.2621059715747833, b = [-0.08655767]
Эпоха acc:8.299999684095383%,  6000: 0.16319236159324646, b = [-0.23842528]
Эпоха acc:10.14000028371811%,  7000: 0.2120329886674881, b = [-0.3781189]
Эпоха acc:9.920000284910202%,  8000: 0.232200026512146, b = [-0.53261805]
Эпоха acc:7.999999821186066%,  9000: 0.23521655797958374, b = [-0.68587625]
Эпоха acc:8.550000190734863%,  10000: 0.27598345279693604, b = [-0.81811714]
Эпоха acc:8.51999968290329%,  11000: 0.2520977854728699, b = [-0.9481614]
Эпоха acc:3.9000000804662704%,  12000: 0.26019415259361267, b = [-1.076103]
Эпоха acc:7.360000163316727%,  13000: 0.263037770986557, b = [-1.1859249]
Эпоха acc:7.03999996185