<a href="https://colab.research.google.com/github/jkordonis/TropicalML/blob/main/ReLUcompression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [242]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import math
from scipy.linalg import block_diag
import os
import io
import cvxpy as cp





```
# This is formatted as code
```

# MNIST Train from Keras

In [243]:
num_classes = 10 # Initially 10 classes
input_shape = (28, 28, 1)

# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255
x_test = x_test.astype("float32") / 255
# Make sure images have shape (28, 28, 1)
x_train = np.expand_dims(x_train, -1)
x_test = np.expand_dims(x_test, -1)
print("x_train shape:", x_train.shape)
print(x_train.shape[0], "train samples")
print(x_test.shape[0], "test samples")


# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

x_train shape: (60000, 28, 28, 1)
60000 train samples
10000 test samples


In [244]:
Ner=100
model = keras.Sequential(
    [
        keras.Input(shape=input_shape),
        layers.Flatten(),
        layers.Dense(Ner, activation="relu"),     
        layers.Dense(10, activation="softmax"),
    ]
)

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_1 (Flatten)         (None, 784)               0         
                                                                 
 dense_2 (Dense)             (None, 100)               78500     
                                                                 
 dense_3 (Dense)             (None, 10)                1010      
                                                                 
Total params: 79,510
Trainable params: 79,510
Non-trainable params: 0
_________________________________________________________________


In [245]:
batch_size = 128
epochs = 8

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=0.1)

Epoch 1/8
Epoch 2/8
Epoch 3/8
Epoch 4/8
Epoch 5/8
Epoch 6/8
Epoch 7/8
Epoch 8/8


<keras.callbacks.History at 0x7f93f033b2d0>

In [246]:
W1=model.layers[1].weights[0].numpy()
b1=model.layers[1].weights[1].numpy()
W2=model.layers[2].weights[0].numpy()
b2=model.layers[2].weights[1].numpy()



```
# This is formatted as code
```

# Reshaping Data for 3-5 distinction and Tropical Polynomials

In [247]:
x_test_vec =np.zeros([10000, 784])
for i in range(10000):
  b = np.reshape(x_test[i],(784))
  x_test_vec[i]=b

err=0
y_pred=np.zeros((10000,10))
for i in range(10000):
  z =  np.matmul(W1.T,x_test_vec[i]) +b1
  z=np.maximum(z, np.zeros(Ner))
  y=np.matmul(W2.T,z)+b2
  y_pred[i]=y
  err_i=0
  if np.inner(y_test[i],[0,0,0,1,0,1,0,0,0,0])==1:
    if np.argmax(y)!=np.argmax(y_test[i]):
      err_i=1
  err=err+err_i
err

54

In [248]:
x_test_3_5=[]
y_test_3_5=[]


for i in range(10000):
  if np.inner(y_test[i],[0,0,0,1,0,1,0,0,0,0])==1:
    x_test_3_5.append(x_test_vec[i])
    y_test_3_5.append(-1)
    if np.inner(y_test[i],[0,0,0,1,0,0,0,0,0,0])==1:
      y_test_3_5[-1]=1
    if np.inner(y_test[i],[0,0,0,0,0,1,0,0,0,0])==1:
      y_test_3_5[-1]=0

x_test_3_5=np.array(x_test_3_5)
y_test_3_5=np.array(y_test_3_5)


x_train_3_5=[]
y_train_3_5=[]
x_train_vec =np.zeros([60000, 784])
for i in range(60000):
  b = np.reshape(x_train[i],(784))
  x_train_vec[i]=b



for i in range(60000):
  if np.inner(y_train[i],[0,0,0,1,0,1,0,0,0,0])==1:
    x_train_3_5.append(x_train_vec[i])
    y_train_3_5.append(-1)
    if np.inner(y_train[i],[0,0,0,1,0,0,0,0,0,0])==1:
      y_train_3_5[-1]=1
    if np.inner(y_train[i],[0,0,0,0,0,1,0,0,0,0])==1:
      y_train_3_5[-1]=0

x_train_3_5=np.array(x_train_3_5)
y_train_3_5=np.array(y_train_3_5)


In [249]:
# 3-5 IniOriginal  Error

err=0
for i in range(x_test_3_5.shape[0]):
  z =  np.matmul(W1.T,x_test_3_5[i]) +b1
  z=np.maximum(z, np.zeros(Ner))
  y=np.matmul(W2.T,z)+b2
  err_i=0
  if y[3]>y[5] and y_test_3_5[i]==0:#if np.argmax(y)!=5 and y_test_3_5[i]==0:#
    err_i=1
  if y[3]<y[5] and y_test_3_5[i]==1:#if np.argmax(y)!=3 and y_test_3_5[i]==1:#
    err_i=1
  err=err+err_i
err/x_test_3_5.shape[0]

0.00946372239747634

In [250]:
# Dimension Reduction
W2_=W2[:,3]-W2[:,5]
b2_=b2[3]-b2[5]

err=0
for i in range(x_test_3_5.shape[0]):
  z =  np.matmul(W1.T,x_test_3_5[i]) +b1
  z=np.maximum(z, np.zeros(Ner))
  y=np.matmul(W2_.T,z)+b2_
  err_i=0
  if y>0 and y_test_3_5[i]==0:#if y[3]>y[5] and y_test_3_5[i]==0:
    err_i=1
  if y<0 and y_test_3_5[i]==1:#if y[3]<y[5] and y_test_3_5[i]==1:
    err_i=1
  err=err+err_i
err/x_test_3_5.shape[0]

0.00946372239747634

In [251]:
W2_pl=np.maximum(W2_,np.zeros(W2_.shape))
W2_min=np.maximum(-W2_,np.zeros(W2_.shape))
ap_1 = np.zeros((W1.shape[1],W1.shape[0]))
bp_1 = np.zeros(W1.shape[1])
ap_2 = np.zeros((W1.shape[1],W1.shape[0]))
bp_2 = np.zeros(W1.shape[1])


for i in range(W1.shape[1]):
  ap_1[i]=W2_pl[i]*W1.T[i]
  bp_1[i]=W2_pl[i]*b1[i]
  ap_2[i]=W2_min[i]*W1.T[i]
  bp_2[i]=W2_min[i]*b1[i]

# QR Dimensionality Reduction 

```
# This is formatted as code
```



In [252]:
a1_vec=ap_1.T
a2_vec=ap_2.T

True_Val_1=(np.array([np.linalg.norm(a1_vec[:,i]) for i in range(a1_vec.shape[1])])!=0)
True_Val_2=(np.array([np.linalg.norm(a2_vec[:,i]) for i in range(a2_vec.shape[1])])!=0)

a1_nonz_vec  = a1_vec[:,True_Val_1]
a2_nonz_vec  = a2_vec[:,True_Val_2]

print(a1_nonz_vec.shape)
print(a2_nonz_vec.shape)

Q1,R1=np.linalg.qr(a1_nonz_vec,mode='reduced')
Q2,R2=np.linalg.qr(a2_nonz_vec,mode='reduced')
 
a1_red_vec = np.matmul(Q1.T,a1_nonz_vec)
a2_red_vec = np.matmul(Q2.T,a2_nonz_vec)
bp_1_red=bp_1[True_Val_1]
bp_2_red=bp_2[True_Val_2]

print(a1_red_vec.shape)
print(a2_red_vec.shape)

(784, 54)
(784, 46)
(54, 54)
(46, 46)


In [382]:

X_sample1 = np.matmul(x_train_3_5,Q1) 
X_sample2 = np.matmul(x_train_3_5,Q2) 


X_sample1_test = np.matmul(x_test_3_5,Q1) 
X_sample2_test = np.matmul(x_test_3_5,Q2) 

m_q=5

print(X_sample1_test.shape)
print(a1_red_vec.shape)






(1902, 54)
(54, 54)


In [383]:
err=0

for i in range(X_sample1_test.shape[0]):
  p1_val = np.sum( np.maximum(np.matmul(a1_red_vec.T,X_sample1_test[i]) +bp_1_red,np.zeros(a1_red_vec.shape[0])))
  p2_val = np.sum( np.maximum(np.matmul(a2_red_vec.T,X_sample2_test[i]) +bp_2_red,np.zeros(a2_red_vec.shape[0])))
  p_val =p1_val-p2_val+b2_
  y=p_val
  err_i=0
  if y>0 and y_test_3_5[i]==0:#if y[3]>y[5] and y_test_3_5[i]==0:
    err_i=1
  if y<0 and y_test_3_5[i]==1:#if y[3]<y[5] and y_test_3_5[i]==1:
    err_i=1
  err=err+err_i
err/X_sample1_test.shape[0]




0.00946372239747634

In [384]:
err=0
for i in range(X_sample1.shape[0]):
  p1_val = np.sum( np.maximum(np.matmul(a1_red_vec.T,X_sample1[i]) +bp_1_red,np.zeros(a1_red_vec.shape[0])))
  p2_val = np.sum( np.maximum(np.matmul(a2_red_vec.T,X_sample2[i]) +bp_2_red,np.zeros(a2_red_vec.shape[0])))
  p_val =p1_val-p2_val+b2_
  y=p_val
  err_i=0
  if y>0 and y_train_3_5[i]==0:
    err_i=1
  if y<0 and y_train_3_5[i]==1:
    err_i=1
  err=err+err_i
err/x_train_3_5.shape[0]#Training error(<Test error)

0.00735803324099723

#  Tropical Division 1

In [385]:
# Martix Definitions

A=a1_red_vec
d=A.shape[1]
m_p=A.shape[0]
B=bp_1_red

p_pol=(A.T,B)


A_inverse = np.linalg.inv(A)
B_transp_A_inverse = np.matmul(B.T,A_inverse)
B_transp_A_inverse_mat=B_transp_A_inverse 
A_inverse_hor_conc=A_inverse



a_hat, b_hat=np.array(np.random.randn(m_q,d)), np.array(np.random.randn(m_q))
q_pol=(a_hat, b_hat)
X_sample=X_sample1[0:200]
N_sample=X_sample1.shape[0]

A_inv_mat = A_inverse

for i in range(m_q-1):
  A_inv_mat=block_diag(A_inv_mat,A_inverse) 
  B_transp_A_inverse_mat=block_diag(B_transp_A_inverse_mat,B_transp_A_inverse)
  A_inverse_hor_conc=np.concatenate((A_inverse_hor_conc,A_inverse),axis=1)


print(B_transp_A_inverse_mat.shape)


(5, 270)


In [386]:
def tropical_sum_pol_function(x,pol):
  a_=pol[0]
  b_=pol[1]
  s= 0
  for i in range(np.shape(a_)[0]):
      s=s+max(np.inner(a_[i],x)+b_[i],0)
  return(s)

In [387]:
f_x_i=np.zeros(np.shape(X_sample)[0])
for i in range(np.shape(X_sample)[0]):
  f_x_i[i]=tropical_sum_pol_function(X_sample[i],p_pol)


In [388]:
# Phase 1
def Phase_1_function_comp_quotient(X_sample,q_pol):
  a_hat,b_hat=q_pol
  m_q=a_hat.shape[0]
  # Initialize sets I_i
  c1 = np.zeros(X_sample.shape[1]*m_q)
  y=np.zeros((m_q,X_sample.shape[0]))
  for i in range(y.shape[0]):
    for j in range(y.shape[1]):
      y[i,j] = 1 if np.inner(a_hat[i],X_sample[j])+b_hat[i]>=0 else 0
  c2=np.sum(y,1)
  c2=np.sum(y,1)
  c11=np.matmul(y,X_sample)
  c1=c11.reshape(c11.shape[0]*c11.shape[1])
  c1.shape
  return c1,c2

In [389]:
c1,c2=Phase_1_function_comp_quotient(X_sample,q_pol)
c2

array([135.,   2., 165.,  69., 149.])

In [390]:
x_a = cp.Variable(d*m_q)
x_b = cp.Variable(m_q)

Iterations=25
progress_mat=np.zeros(Iterations+1)
progress_mat[0]=np.sum(f_x_i)-np.sum([tropical_sum_pol_function(X_sample[i],q_pol) for i in range(X_sample.shape[0])])
# Main Iteration
for cnt in range(Iterations):
  #print(cnt+1)
  q_pol = (a_hat,b_hat)
  c1,c_2=Phase_1_function_comp_quotient(X_sample,q_pol)
  
  prob = cp.Problem(cp.Maximize(c1@x_a+c2@x_b),
                  [ A_inv_mat@x_a >=np.zeros(m_q*d),
                     A_inverse_hor_conc@x_a<=np.ones(d),
                   B_transp_A_inverse_mat@x_a>=x_b ])
  prob.solve(warm_start=True)
  for i in range(a_hat.shape[0]):
    a_hat[i]=a_hat[i]*0.1+0.9*x_a.value[d*i:d*(i+1)] 
    b_hat[i]=b_hat[i]*0.1+0.9*x_b.value[i]
  progress_mat[cnt+1]=np.sum(f_x_i)-np.sum([tropical_sum_pol_function(X_sample[i],q_pol) for i in range(X_sample.shape[0])])

In [391]:
progress_mat

array([1315.36252189,  895.14756368,  669.86438366,  556.3074511 ,
        523.10871651,  507.29349818,  505.04573844,  504.81711155,
        504.7938478 ,  504.79152143,  504.79128879,  504.79126553,
        504.7912632 ,  504.79126297,  504.79126294,  504.79126294,
        504.79126294,  504.79126294,  504.79126294,  504.79126294,
        504.79126294,  504.79126294,  504.79126294,  504.79126294,
        504.79126294,  504.79126294])

In [392]:
progress_mat1=progress_mat
a_hat_1=a_hat
b_hat_1=b_hat

# Tropical Division 2

In [393]:
# Martix Definitions

A=a2_red_vec
d=A.shape[1]
m_p=A.shape[0]
p_pol=(A.T,B)


B=bp_2_red
A_inverse = np.linalg.inv(A)
B_transp_A_inverse = np.matmul(B.T,A_inverse)
B_transp_A_inverse_mat=B_transp_A_inverse 
A_inverse_hor_conc=A_inverse



a_hat, b_hat=np.array(np.random.randn(m_q,d)), np.array(np.random.randn(m_q))
q_pol=(a_hat, b_hat)
X_sample=X_sample2[0:200]
N_sample=X_sample2.shape[0]

A_inv_mat = A_inverse

for i in range(m_q-1):
  A_inv_mat=block_diag(A_inv_mat,A_inverse) 
  B_transp_A_inverse_mat=block_diag(B_transp_A_inverse_mat,B_transp_A_inverse)
  A_inverse_hor_conc=np.concatenate((A_inverse_hor_conc,A_inverse),axis=1)


In [394]:
f_x_i=np.zeros(np.shape(X_sample)[0])
for i in range(np.shape(X_sample)[0]):
  f_x_i[i]=tropical_sum_pol_function(X_sample[i],p_pol)


In [395]:
x_a = cp.Variable(d*m_q)
x_b = cp.Variable(m_q)

Iterations=25
progress_mat=np.zeros(Iterations+1)
progress_mat[0]=np.sum(f_x_i)-np.sum([tropical_sum_pol_function(X_sample[i],q_pol) for i in range(X_sample.shape[0])])
# Main Iteration
for cnt in range(Iterations):
  #print(cnt+1)
  q_pol = (a_hat,b_hat)
  c1,c_2=Phase_1_function_comp_quotient(X_sample,q_pol)
  
  prob = cp.Problem(cp.Maximize(c1@x_a+c2@x_b),
                  [ A_inv_mat@x_a >=np.zeros(m_q*d),
                     A_inverse_hor_conc@x_a<=np.ones(d),
                   B_transp_A_inverse_mat@x_a>=x_b ])
  prob.solve(warm_start=True)
  for i in range(a_hat.shape[0]):
    a_hat[i]=a_hat[i]*0.1+0.9*x_a.value[d*i:d*(i+1)] 
    b_hat[i]=b_hat[i]*0.1+0.9*x_b.value[i]
  progress_mat[cnt+1]=np.sum(f_x_i)-np.sum([tropical_sum_pol_function(X_sample[i],q_pol) for i in range(X_sample.shape[0])])

In [396]:
progress_mat2=progress_mat
a_hat_2=a_hat
b_hat_2=b_hat

# Check1

In [397]:
err=0

for i in range(X_sample.shape[0]):
  p1_val = np.sum( np.maximum(np.matmul(a_hat_1,X_sample1[i]) +b_hat_1,np.zeros(a_hat_1.shape[0])))
  p2_val = np.sum( np.maximum(np.matmul(a_hat_2,X_sample2[i]) +b_hat_2,np.zeros(a_hat_2.shape[0])))
  p_val =p1_val-p2_val+b2_
  y=p_val
  err_i=0
  if y>0 and y_train_3_5[i]==0:#if y[3]>y[5] and y_test_3_5[i]==0:
    err_i=1
  if y<0 and y_train_3_5[i]==1:#if y[3]<y[5] and y_test_3_5[i]==1:
    err_i=1
  err=err+err_i
err/X_sample.shape[0]


0.03

In [398]:
X_sample1 = np.matmul(x_train_3_5,Q1) 
X_sample2 = np.matmul(x_train_3_5,Q2) 


X_sample1_test = np.matmul(x_test_3_5,Q1) 
X_sample2_test = np.matmul(x_test_3_5,Q2) 

In [399]:
err=0

for i in range(X_sample1_test.shape[0]):
  p1_val = np.sum( np.maximum(np.matmul(a_hat_1,X_sample1_test[i]) +b_hat_1,np.zeros(a_hat_1.shape[0])))
  p2_val = np.sum( np.maximum(np.matmul(a_hat_2,X_sample2_test[i]) +b_hat_2,np.zeros(a_hat_2.shape[0])))
  p_val =p1_val-p2_val+b2_
  y=p_val
  err_i=0
  if y>0 and y_test_3_5[i]==0:#if y[3]>y[5] and y_test_3_5[i]==0:
    err_i=1
  if y<0 and y_test_3_5[i]==1:#if y[3]<y[5] and y_test_3_5[i]==1:
    err_i=1
  err=err+err_i
err/X_sample1_test.shape[0]

0.01892744479495268

In [400]:
m_q

5