# Imports

In [233]:
import tensorflow as tf
import pandas as pd
import random
import numpy as np

In [234]:
np.random.seed(42)
random.seed(42)

# Functions

In [342]:
def perfMeasure(y_actual, y_hat):
    TP = 0
    FP = 0
    TN = 0
    FN = 0

    for i in range(len(y_hat)): 
        if y_actual[i]==y_hat[i]==1:
           TP += 1
        if y_hat[i]==1 and y_actual[i]!=y_hat[i]:
           FP += 1
        if y_actual[i]==y_hat[i]==0:
           TN += 1
        if y_hat[i]==0 and y_actual[i]!=y_hat[i]:
           FN += 1

    return(TP, FP, TN, FN)

In [343]:
def convergenceChange(y_actual, y_hat, omega):
    TP, FP, TN, FN = perfMeasure(y_actual, y_hat)
    
    if TP + TN + FP + FN == 0:
      return 0, 0, 0, False

    if FP + TP == 0:
      return 0, 0, 0, False

    if TP + FN == 0:
      return 0, 0, 0, False

    Acc = (TP + TN)/(TP + TN + FP + FN)
    PPV = TP/(FP + TP)
    TPR = TP/(TP + FN)
    
    if (Acc > omega and PPV > omega and TPR > omega):
      return Acc, PPV, TPR, True
    return 0, 0, 0, False

# Data and preprocessing

In [237]:
df = pd.read_csv("data_NIS_3.csv")

In [238]:
df.drop(['Serial', 'Histo', 'G', 'pT', 'pN', 'RS_3', 'RStailor_3', 'Bcl_cod', 'ER%Li', 'PGR%Li', 'PGR%LiCod', 'PGRdxCod', 'Ki67%LiCod', 'Her2Score', 'Bio-profiles IHC', 'Diam', 'Her2dx'], axis = 1, inplace = True)

In [239]:
tumor_size = []
for i in range(156):
  tumor_size.append(random.uniform(0.1, 4.2))
df['tumor_size'] = pd.DataFrame(tumor_size)

In [240]:
ganglionic = []
for i in range(156):
  num = random.uniform(0, 1)
  if num >= 0.4:
    num = 1
  else: 
    num = 0
  ganglionic.append(num)
df['ganglionic'] = pd.DataFrame(ganglionic)

In [241]:
nuclei = []
for i in range(156):
  num = random.uniform(0, 3)
  if num < 0.3:
    num = 1
  elif num >= 0.3 and num < 1.6: 
    num = 2
  elif num >= 1.6:
    num = 3
  nuclei.append(num)
df['nuclei'] = pd.DataFrame(nuclei)

In [242]:
glande = []
for i in range(156):
  num = random.uniform(0, 3)
  if num < 0.3:
    num = 1
  elif num >= 0.3 and num < 1.7: 
    num = 2
  elif num >= 1.7:
    num = 3
  glande.append(num)
df['glande'] = pd.DataFrame(glande)

In [243]:
mitosies = []
for i in range(156):
  num = random.uniform(0, 3)
  if num < 0.7:
    num = 1
  elif num >= 0.7 and num < 2.3: 
    num = 2
  elif num >= 2.3:
    num = 3
  mitosies.append(num)
df['mitosies'] = pd.DataFrame(mitosies)

In [244]:
sbr = []
for i in range(156):
  num = random.uniform(0, 3)
  if num < 0.5:
    num = 1
  elif num >= 0.5 and num < 2: 
    num = 2
  elif num >= 2:
    num = 3
  sbr.append(num)
df['SBR'] = pd.DataFrame(sbr)

In [245]:
print("Nuclei grade")
print(df['nuclei'].value_counts())
print()
print("SBR grade")
print(df['SBR'].value_counts())
print()
print("Glande grade")
print(df['glande'].value_counts())
print()
print("Mitosies grade")
print(df['mitosies'].value_counts())
print()
print("ganglonic status")
print(df['ganglionic'].value_counts())

Nuclei grade
3    76
2    69
1    11
Name: nuclei, dtype: int64

SBR grade
2    84
3    56
1    16
Name: SBR, dtype: int64

Glande grade
2    72
3    68
1    16
Name: glande, dtype: int64

Mitosies grade
2    88
1    37
3    31
Name: mitosies, dtype: int64

ganglonic status
1    95
0    61
Name: ganglionic, dtype: int64


In [246]:
print("Tumor size (cm)")
print("< 1  - ", len(df['tumor_size'][df['tumor_size'] < 1]))
print("1.1 - 2.0  - ", len(df['tumor_size'][(df['tumor_size'] > 1) & (df['tumor_size'] < 2)]))
print("2.1 - 4.0  - ", len(df['tumor_size'][(df['tumor_size'] > 2) & (df['tumor_size'] < 4)]))
print("> 4.0  - ", len(df['tumor_size'][df['tumor_size'] > 4]))
print()
print("Estrogen Receptor (RE)")
print("< 10  - ", len(df['ERdx'][df['ERdx'] < 10]))
print("10 - 20  - ", len(df['ERdx'][(df['ERdx'] > 10) & (df['ERdx'] < 20)]))
print("> 20  - ", len(df['ERdx'][df['ERdx'] > 20]))
print()
print("Progestrone Receptor (PR)")
print("< 5  - ", len(df['PGRdx'][df['PGRdx'] < 5]))
print("5 - 10  - ", len(df['PGRdx'][(df['PGRdx'] > 5) & (df['PGRdx'] < 10)]))
print("> 10  - ", len(df['PGRdx'][df['PGRdx'] > 10]))
print()
print("Proliferation Rate (Ki-67)")
print("< 10  - ", len(df['Ki67%Li'][df['Ki67%Li'] < 10]))
print("10 - 20  - ", len(df['Ki67%Li'][(df['Ki67%Li'] > 10) & (df['Ki67%Li'] < 20)]))
print("> 20  - ", len(df['Ki67%Li'][df['Ki67%Li'] > 20]))
print()
print("Age")
print("< 40 years  - ", len(df['Age'][df['Age'] < 40]))
print("40 - 49 years  - ", len(df['Age'][(df['Age'] > 40) & (df['Age'] < 49)]))
print("50 - 59 years  - ", len(df['Age'][(df['Age'] > 50) & (df['Age'] < 59)]))
print("> 59 years  - ", len(df['Age'][df['Age'] > 59]))

Tumor size (cm)
< 1  -  36
1.1 - 2.0  -  39
2.1 - 4.0  -  73
> 4.0  -  8

Estrogen Receptor (RE)
< 10  -  59
10 - 20  -  96
> 20  -  0

Progestrone Receptor (PR)
< 5  -  15
5 - 10  -  132
> 10  -  1

Proliferation Rate (Ki-67)
< 10  -  26
10 - 20  -  58
> 20  -  65

Age
< 40 years  -  5
40 - 49 years  -  21
50 - 59 years  -  31
> 59 years  -  84


In [247]:
print("RS")
print("< 15  - ", len(df['RS'][df['RS'] < 15]))
print("15 - 25  - ", len(df['RS'][(df['RS'] > 15) & (df['RS'] < 25)]))
print("> 25  - ", len(df['RS'][df['RS'] > 25]))

RS
< 15  -  88
15 - 25  -  41
> 25  -  14


In [248]:
df['target_1'] = np.where(df['RS'] < 15, 1, 0)
df['target_2'] = np.where((df['RS'] > 15) & (df['RS'] < 25), 1, 0)
df['target_3'] = np.where(df['RS'] > 25, 1, 0)

In [249]:
df

Unnamed: 0,Age,RS,ERdx,PGRdx,Ki67%Li,tumor_size,ganglionic,nuclei,glande,mitosies,SBR,target_1,target_2,target_3
0,71,44,12.3,3.6,27,2.721650,0,2,3,2,3,0,0,1
1,74,9,10.8,7.8,17,0.202544,1,2,1,2,2,1,0,0
2,74,10,9.8,8.1,17,1.227620,0,3,2,1,3,1,0,0
3,65,5,12.5,7.7,19,1.015164,0,3,2,1,2,1,0,0
4,75,25,10.3,4.5,15,3.119532,0,2,2,2,2,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151,79,4,10.8,9.1,10,2.764500,1,2,2,1,3,1,0,0
152,63,14,10.3,8.1,16,1.896210,1,3,2,3,2,1,0,0
153,41,17,8.3,8.0,26,2.222061,1,2,2,3,1,0,1,0
154,74,21,9.9,5.0,12,0.596117,1,2,2,2,2,0,1,0


In [255]:
df_test = df.iloc[:78,:]
df_train = df.iloc[78:,:]

In [294]:
df_test_y_1 = df_test['target_1']
df_test_1 = df_test.drop(['target_3', 'target_2', 'target_1', 'RS'], axis=1)
df_train_y_1 = df_train['target_1']
df_train_1 = df_train.drop(['target_3', 'target_2', 'target_1', 'RS'], axis=1)

df_test_y_2 = df_test['target_2']
df_test_2 = df_test.drop(['target_3', 'target_2', 'target_1', 'RS'], axis=1)
df_train_y_2 = df_train['target_2']
df_train_2 = df_train.drop(['target_3', 'target_2', 'target_1', 'RS'], axis=1)

df_test_y_3 = df_test['target_3']
df_test_3 = df_test.drop(['target_3', 'target_2', 'target_1', 'RS'], axis=1)
df_train_y_3 = df_train['target_3']
df_train_3 = df_train.drop(['target_3', 'target_2', 'target_1', 'RS'], axis=1)

# Model and Training

In [267]:
t = 0
l = 0
ConvCond = False
Max_n = 128
Max_HL = 16

In [351]:
def constructNN(Max_n, Max_HL, ConvCond, l, t, test_X, train_X, test_y, train_y):
    Acc_history = []
    PPV_history = []
    TPR_history = []

    omega_range = [0.1, 0.2, 0.3, 0.4, 0.5, 0.7, 0.8, 0.9]

    for omega in omega_range:
      model = tf.keras.models.Sequential()
      while (ConvCond == False):
          model.add(tf.keras.layers.Dense(1, kernel_initializer = 'glorot_normal', bias_initializer = 'glorot_normal', activation='sigmoid'))
          n_l = 1
    
          if (n_l <= Max_n and ConvCond == False):
              model = tf.keras.models.Sequential(model.layers[:-1])
              model.add(tf.keras.layers.Dense(n_l + 1, kernel_initializer = 'glorot_normal', bias_initializer = 'glorot_normal', activation='sigmoid'))
    
          if (l <= Max_HL and ConvCond == False):
              model.add(tf.keras.layers.Dense(1, kernel_initializer = 'glorot_normal', bias_initializer = 'glorot_normal', activation='sigmoid'))

          if (l > Max_HL and ConvCond == False):
              return 0, 0, 0, 0

          model_res = model
          model_res.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
              loss=tf.keras.losses.BinaryCrossentropy(), metrics=['accuracy'])
          model_res.fit(test_X, test_y)
          model = tf.keras.models.Sequential(model.layers[:-1])
          model.add(model_res.layers[-1])
          pred_y = model_res.predict(train_X)
          Acc, PPV, TPR, ConvCond = convergenceChange(np.array(train_y), np.array(pred_y), omega)
          Acc_history.append(Acc)
          PPV_history.append(PPV)
          TPR_history.append(TPR)
          l += 1
          n_l += 1
          t += 1
    return Acc_history, PPV_history, TPR_history

In [352]:
Acc_1, PPV_1, TPR_1 = constructNN(Max_n, Max_HL, ConvCond, l, t, df_test_1.to_numpy(), df_train_1.to_numpy(), df_test_y_1.to_numpy(), df_train_y_1.to_numpy())



ValueError: ignored