# Introduction

This notebook contains the code towards MSML604 Project for **Group 10**

Members:
- Aditya Patkar
- Suraj T.C.
- Carl Ostrenga
- Nantanit Somboon

Note:
- Please upload **weight_diff.txt** into the environment or change the path below
- One of the approach is done in MATLAB. Please check the attached MATLAB file.

In [3]:
#necessary imports
import numpy as np
import time
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
import xgboost as xg

#configuration
filename= 'weight_diff.txt'

# Submission 1: Logistic Regression with Stochastic Gradient Descent

In [4]:
def puf_query(c, w):
    n = c.shape[1]
    phi = np.ones(n+1)
    phi[n] = 1
    for i in range(n-1, -1, -1):
        phi[i] = (2*c[0,i]-1)*phi[i+1]
    r = (np.dot(phi, w) > 0)
    return r

# Problem Setup
target = 0.99  # The desired prediction rate
n = 64  # number of stages in the PUF

# Initialize the PUF
np.random.seed(int(time.time()))
data = np.loadtxt(filename)
w = np.zeros((n+1, 1))
for i in range(1, n+2):
    randi_offset = np.random.randint(1, 45481)
    w[i-1] = data[randi_offset-1]

# You can use the puf_query function to generate your training dataset
# ADD YOUR DATASET GENERATION CODE HERE
np.random.seed(10)
print("Generating training set...")
training_size = 8350
X = np.random.randint(0, 2, size=(training_size, n) ) 
y = np.zeros((training_size, 1))
for i in range(training_size):
    y[i] = puf_query(X[i:i+1, :], w)

print("Training set generated")

# ADD YOUR TRAINING CODE HERE
w0 = np.random.randn(n+1, 1) # The estimated value of w.
t0 = time.process_time()
learning_rate = 0.1
for i in range(training_size):
    c = X[i:i+1,:]
    phi = np.ones(n+1)
    phi[n] = 1
    for j in range(n-1, -1, -1):
        phi[j] = (2*c[0,j]-1)*phi[j+1]
    z = np.dot(phi, w0)
    y_pred = 1/(1+np.exp(-z))
    error = y[i][0] - y_pred
    delta = learning_rate * error * y_pred * (1-y_pred) * phi.reshape((n+1,1))
    w0 = w0 + delta

t1 = time.process_time()
training_time = t1 - t0  # time taken to get w0
print("Training time:", training_time)
print("Training size:", training_size)

# Evaluate your result
n_test = 10000
correct = 0
for i in range(1, n_test+1):
    c_test = np.random.randint(0, 2, size=(1, n))  # a random challenge vector
    #convert 0 to -1
    r = puf_query(c_test, w)
    r0 = puf_query(c_test, w0)
    correct += (r==r0)

success_rate = correct/n_test
print("Success rate:", success_rate)

# If the success rate is less than 99%, a penalty time will be added
# One second is add for each 0.01% below 99%.
effective_training_time = training_time
if success_rate < 0.99:
    effective_training_time = training_time + 10000*(0.99-success_rate)
print("Effective training time:", effective_training_time)

Generating training set...
Training set generated
Training time: 2.50936793
Training size: 8350
Success rate: [0.9701]
Effective training time: [201.50936793]


# Submission 2: Logistic Regression with LBFGS (Best one yet)

In [41]:
def transform_X(X):
    '''
      used to transform x to phi
    '''
    n = X.shape[1]
    phi_X = np.ones((X.shape[0], n+1))
    phi_X[:, n] = 1
    for i in range(n-1, -1, -1):
        phi_X[:, i] = (2*X[:, i]-1)*phi_X[:, i+1]
    return phi_X


def puf_query(c, w):
    n = c.shape[1]
    phi = np.ones(n+1)
    phi[n] = 1
    for i in range(n-1, -1, -1):
        phi[i] = (2*c[0,i]-1)*phi[i+1]

    r = (np.dot(phi, w) > 0)
    return r
    

# Problem Setup
target = 0.99  # The desired prediction rate
n = 64  # number of stages in the PUF

# Initialize the PUF
np.random.seed(int(time.time()))
data = np.loadtxt(filename)
w = np.zeros((n+1, 1))
for i in range(1, n+2):
    randi_offset = np.random.randint(1, 45481)
    w[i-1] = data[randi_offset-1]

# Syntax to query the PUF:
c = np.random.randint(0, 2, size=(1, n))  # a random challenge vector
r = puf_query(c, w)
# you may remove these two lines

# You can use the puf_query function to generate your training dataset
# ADD YOUR TRAINING CODE HERE

# Generate the training dataset
print("Generating training set...")
training_size = 5500
np.random.seed(42)
X = np.random.randint(0, 2, size=(training_size, n)) 
y = np.zeros((training_size, 1))
for i in range(training_size):
    y[i] = puf_query(X[i:i+1, :], w)

print("Training set generated")

# Train the decision tree
t0 = time.process_time()
dt = LogisticRegression()
X = transform_X(X)
dt.fit(X, y.ravel())
t1 = time.process_time()
training_time = t1 - t0
print("Training time:", training_time)

# Evaluate the decision tree
n_test = 10000
correct = 0
for i in range(1, n_test+1):
    c_test = np.random.randint(0, 2, size=(1, n))  # a random challenge vector
    r = puf_query(c_test, w)
    c_test = transform_X(c_test)
    r_dt = dt.predict(c_test)
    correct += (r==r_dt)

success_rate = correct/n_test
print("Success rate:", success_rate)

# If the success rate is less than 99%, a penalty time will be added
# One second is add for each 0.01% below 99%.
effective_training_time = training_time
if success_rate < 0.99:
    effective_training_time = training_time + 10000*(0.99-success_rate)
print("Effective training time:", effective_training_time)

Generating training set...
Training set generated
Training time: 0.062490379999985635
Success rate: [0.9923]
Effective training time: 0.062490379999985635


# Submission 3: SVM with SMO

In [6]:
def puf_query(c, w):
    n = c.shape[1]
    phi = np.ones(n+1)
    phi[n] = 1
    for i in range(n-1, -1, -1):
        phi[i] = (2*c[0,i]-1)*phi[i+1]

    r = (np.dot(phi, w) > 0)
    return r

def transform_X(X):
    n = X.shape[1]
    phi_X = np.ones((X.shape[0], n+1))
    phi_X[:, n] = 1
    for i in range(n-1, -1, -1):
        phi_X[:, i] = (2*X[:, i]-1)*phi_X[:, i+1]
    return phi_X

# Problem Setup
target = 0.99  # The desired prediction rate
n = 64  # number of stages in the PUF

# Initialize the PUF
np.random.seed(int(time.time()))
data = np.loadtxt('weight_diff.txt')
w = np.zeros((n+1, 1))
for i in range(1, n+2):
    randi_offset = np.random.randint(1, 45481)
    w[i-1] = data[randi_offset-1]

# Syntax to query the PUF:
c = np.random.randint(0, 2, size=(1, n))  # a random challenge vector
r = puf_query(c, w)
# you may remove these two lines

# You can use the puf_query function to generate your training dataset
# ADD YOUR DATASET GENERATION CODE HERE
print("Generating training set...")
training_size = 6500
X = np.random.randint(0, 2, size=(training_size, n)) 
y = np.zeros((training_size, 1))
for i in range(training_size):
    y[i] = puf_query(X[i:i+1, :], w)

print("Training set generated")

# Train SVM using Newton's method
print("Training SVM...")
clf = SVC(kernel='linear', C=1.0)
t0 = time.process_time()
clf.fit(transform_X(X), y.ravel())
t1 = time.process_time()
training_time = t1 - t0  # time taken to train SVM
print("Training time:", training_time)
print("Training size:", training_size)

# Evaluate your result
n_test = 10000
correct = 0
for i in range(1, n_test+1):
    c_test = np.random.randint(0, 2, size=(1, n))  # a random challenge vector
    r = puf_query(c_test, w)
    r0 = clf.predict(transform_X(c_test))
    correct += (r==r0)

success_rate = correct/n_test
print("Success rate:", success_rate)

# If the success rate is less than 99%, a penalty time will be added
# One second is add for each 0.01% below 99%.
effective_training_time = training_time
if success_rate < 0.99:
    effective_training_time = training_time + 10000*(0.99-success_rate)
print("Effective training time:", effective_training_time)


Generating training set...
Training set generated
Training SVM...
Training time: 0.7570978469999972
Training size: 6500
Success rate: [0.991]
Effective training time: 0.7570978469999972


# Submission 4: AdaBoost

In [7]:
def puf_query(c, w):
    n = c.shape[1]
    phi = np.ones(n+1)
    phi[n] = 1
    for i in range(n-1, -1, -1):
        phi[i] = (2*c[0,i]-1)*phi[i+1]

    r = (np.dot(phi, w) > 0)
    return r

# Problem Setup
target = 0.99  # The desired prediction rate
n = 64  # number of stages in the PUF

# Initialize the PUF
np.random.seed(int(time.time()))
data = np.loadtxt('weight_diff.txt')
w = np.zeros((n+1, 1))
for i in range(1, n+2):
    randi_offset = np.random.randint(1, len(data)+1)
    w[i-1] = data[randi_offset-1]

# Syntax to query the PUF:
c = np.random.randint(0, 2, size=(1, n))  # a random challenge vector
r = puf_query(c, w)
# you may remove these two lines

# You can use the puf_query function to generate your training dataset
# ADD YOUR DATASET GENERATION CODE HERE
training_size = 10000
training_c = []
training_r = []

for i in range(0,training_size):
  c = np.random.randint(0, 2, size=(1, n))  # a random challenge vector
  r = puf_query(c, w)

  n = c.shape[1]
  phi = np.ones(n+1)
  phi[n] = 1
  for j in range(n-1, -1, -1):
        phi[j] = (2*c[0,j]-1)*phi[j+1]
  training_c.append(phi)
  training_r.append(r)


training_c = np.array(training_c).reshape(training_size,-1)
training_r = np.array(training_r).reshape(training_size,-1)


w0 = np.zeros((n+1, 1))  # The estimated value of w.
# Try to estimate the value of w here. This section will be timed. You are
# allowed to use the puf_query function here too, but it will count towards
# the training time.
t0 = time.process_time()
# ADD YOUR TRAINING CODE HERE



model = AdaBoostClassifier()
model.fit(training_c, training_r)

# for i in range(1, n+2):
#     randi_offset = np.random.randint(1, len(data)+1)
#     w0[i-1] = data[randi_offset-1]



t1 = time.process_time()
training_time = t1 - t0  # time taken to get w0
print("Training time:", training_time)
print("Training size:", training_size)

# Evaluate your result
n_test = 10000
correct = 0
for i in range(1, n_test+1):
    c_test = np.random.randint(0, 2, size=(1, n))  # a random challenge vector
    r = puf_query(c_test, w)
    # r0 = puf_query(c_test, w0)

    n = c_test.shape[1]
    phi_test = np.ones(n+1)
    phi_test[n] = 1
    for j in range(n-1, -1, -1):
          phi_test[j] = (2*c_test[0,j]-1)*phi_test[j+1]
    r0 =  model.predict(phi_test.reshape(1, -1))
    correct += (r==r0)

success_rate = correct/n_test
print("Success rate:", success_rate)

# If the success rate is less than 99%, a penalty time will be added
# One second is add for each 0.01% below 99%.
effective_training_time = training_time
if success_rate < 0.99:
    effective_training_time = training_time + 10000*(0.99-success_rate)
print("Effective training time:", effective_training_time)

  y = column_or_1d(y, warn=True)


Training time: 0.8493677120000029
Training size: 10000
Success rate: [0.9223]
Effective training time: [677.84936771]


# Submission 5: GradientBoost

In [8]:
t0 = time.process_time()
model = GradientBoostingClassifier()
model.fit(training_c, training_r)

t1 = time.process_time()
training_time = t1 - t0  # time taken to get w0
print("Training time:", training_time)
print("Training size:", training_size)

# Evaluate your result
n_test = 10000
correct = 0
for i in range(1, n_test+1):
    c_test = np.random.randint(0, 2, size=(1, n))  # a random challenge vector
    r = puf_query(c_test, w)
    # r0 = puf_query(c_test, w0)

    n = c_test.shape[1]
    phi_test = np.ones(n+1)
    phi_test[n] = 1
    for j in range(n-1, -1, -1):
          phi_test[j] = (2*c_test[0,j]-1)*phi_test[j+1]
    r0 =  model.predict(phi_test.reshape(1, -1))
    correct += (r==r0)

success_rate = correct/n_test
print("Success rate:", success_rate)

# If the success rate is less than 99%, a penalty time will be added
# One second is add for each 0.01% below 99%.
effective_training_time = training_time
if success_rate < 0.99:
    effective_training_time = training_time + 10000*(0.99-success_rate)
print("Effective training time:", effective_training_time)

  y = column_or_1d(y, warn=True)


Training time: 2.980349426999993
Training size: 10000
Success rate: [0.8783]
Effective training time: [1119.98034943]


# Submission 6: Gradient Descent (Please check the matlab code)

# Submission 7: Stochastic Gradient Descent with Absolute Error

In [42]:
class PUF():

  def __init__(self, n):
    np.random.seed(int(time.time()))
    data = np.loadtxt('weight_diff.txt')
    
    self.N = n
    self.TARGET = 0.99
    self.w = np.zeros((n+1, 1))
    self.size = -1
    self.train_time = -1
    self.eval_time = -1

    for i in range(1, n+2):
      randi_offset = np.random.randint(1, 45481)
      self.w[i-1] = data[randi_offset-1]

  def phi(self, c):
    n = c.shape[-1]
    p = np.ones(n+1)
    p[n] = 1
    for i in range(n-1, -1, -1):
      p[i] = (2 * c[0,i] - 1) * p[i+1]

    return p

  def query(self, c, w=None, debug=False):
    phi = self.phi(c)
    if w is not None:
      r = (np.dot(phi, w) > 0)
    else:
      r = (np.dot(phi, self.w) > 0)

    if debug:
      print("Phi: ", phi, "W: ", self.w, sep="\n")

    return r

  def make_samples(self, size):
    assert size > 0
    self.size = size
    c = np.random.randint(0, 2, size=(size, 1, self.w.shape[0] - 1))
    phi = np.array([self.phi(i) for i in c])
    r = np.array([self.query(i) for i in c])

    return c, phi, r

  def train(self, train_fn):
    t0 = time.process_time()

    model = train_fn()

    t1 = time.process_time()
    self.train_time = t1 - t0                             
    print("Training time:", self.train_time)
    print("Training size:", self.size)

    return model

  def eval(self, pred=None, weights=None, eval_weights=False):
    n_test = 10000
    correct = 0
    for i in range(1, n_test + 1):
      c_test = np.random.randint(0, 2, size=(1, self.N))
      r_test = self.query(c_test)

      if eval_weights:
        assert len(weights)
        r_pred = self.query(c_test, weights)
      else:
        assert pred
        c_test = self.phi(c_test).reshape(1, -1)
        r_pred = pred(c_test)
      
      correct += (r_test == r_pred)

    success_rate = correct/n_test
    print("Success rate:", success_rate)

    # If the success rate is less than 99%, a penalty time will be added
    # One second is add for each 0.01% below 99%.
    effective_training_time = self.train_time
    if success_rate < 0.99:
        effective_training_time = self.train_time + 10000*(0.99-success_rate)
    print("Effective training time:", effective_training_time)

  def info(self):
    return {'n': self.N, 'target': self.TARGET}

In [43]:
puf = PUF(64)

X, X_phi, y = puf.make_samples(10000)
     

In [44]:
def sgd_train():
  n = 64
  w0 = np.zeros((n+1, 1))  # The estimated value of w.
  for i in range(n+1):
      w0[i] = np.random.rand()*0.1 - 0.05

  eta = 0.0001  # learning rate
  for t in range(10):
      for i in range(len(y)):
          c = X[i].flatten()
          r = y[i]
          phi = X_phi[i]

          h = (np.dot(phi, w0) > 0)
          e = float(r) - h
          delta_w = eta * e * phi.reshape(n+1,1)
          w0 = w0 + delta_w

  return w0

sgd_weights = puf.train(sgd_train)

puf.eval(weights=sgd_weights, eval_weights=True)

Training time: 1.6915694369999983
Training size: 10000
Success rate: [0.9937]
Effective training time: 1.6915694369999983


#Submission 8: XGBoost

In [None]:
def xgb_train():
  xgb = xg.XGBClassifier()
  xgb.fit(X_phi, y.ravel())
  return xgb

xgb = puf.train(xgb_train)

puf.eval(pred=xgb.predict)