In [1]:
!pip install cvxpy



In [2]:
# !pip install gurobipy

Collecting gurobipy
  Downloading gurobipy-11.0.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (13.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.4/13.4 MB[0m [31m48.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: gurobipy
Successfully installed gurobipy-11.0.1


In [3]:
import cvxpy as cp
# import gurobipy as gp
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

In [4]:
np.random.seed(1)
sc=StandardScaler()

In [5]:
dat=load_breast_cancer(return_X_y=True, as_frame=False)

In [6]:
X,y=dat[0],dat[1]

In [7]:
x_train,x_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=13)

In [8]:
x_train=sc.fit_transform(x_train)
x_test=sc.transform(x_test)

In [9]:
clf = LogisticRegression(random_state=0).fit(x_train,y_train)

In [10]:
def create_obj_var(x,n_ft,ft_type):
  vars=[]
  obj_var=[0]*n_ft
  for i in range(n_ft):
    # vars[i]=cp.Variable()
    # print(ft_type[i])
    if ft_type[i]=='ct':
      # print(vars[i])
      v=cp.Variable()
      obj_var[i]=(cp.norm(v-x[i],1)) #should add denominator
    else:
      v=cp.Variable(boolean=True)
      obj_var[i]=(cp.maximum(1,(v-x[i])/1000))
    vars.append(v)
  vars=cp.Variable(n_ft)
  return vars,obj_var



In [46]:
# vars,_=create_obj_var(x_test[0],30,['ct']*30)
# vars

In [47]:
# p=vars@clf.coef_[0].T
# cp.exp(p)/1+cp.exp(p)

In [49]:
# np.logaddexp(-1000,x_test[0]@clf.coef_[0]+clf.intercept_[0])

-0.11867402693217703

In [51]:
# p=x_test[0]@clf.coef_[0]+clf.intercept_[0]
# (np.exp(p)/(1+np.exp(p)))
# p

-0.11867402693217703

In [11]:
def check_pred(cf,clf,target):
  cf=cp.hstack(cf)
  # p=cp.log_sum_exp(cp.hstack([-1000,cf@clf.coef_[0].T+clf.intercept_[0]]))
  p=cp.matmul(cf,clf.coef_[0])+clf.intercept_[0]
  # p=cp.exp(p)/(1+cp.exp(p))
  p=cp.sum(cp.exp(p - cp.log(target)))
  return p



In [None]:
# import cvxpy as cp
# import numpy as np
# from sklearn.linear_model import LogisticRegression

# # Generate some example data
# np.random.seed(0)  # For reproducibility
# X = np.random.rand(100, 2)  # Example input features
# y = np.random.randint(0, 2, 100)  # Example binary labels (0 or 1)

# # Fit a logistic regression model
# logreg_model = LogisticRegression()
# logreg_model.fit(X, y)

# # Define cvxpy variable for the input vector
# x = cp.Variable(2)

# # Compute the log odds (linear part of logistic regression)
# log_odds = cp.matmul(x, logreg_model.coef_[0]) + logreg_model.intercept_

# # Define the target probability (e.g., close to zero)
# target_probability = 0.3  # Adjust as needed

# # Define the log-sum-exp constraint indirectly
# constraint_lhs = cp.sum(cp.exp(log_odds - cp.log(target_probability)))
# constraint_rhs = len(X)  # Number of data points
# constraint = constraint_lhs <= constraint_rhs

# # Create the cvxpy problem with the constraint
# problem = cp.Problem(cp.Minimize(0), [constraint])

# # Solve the problem
# problem.solve()

# # Access the optimized input vector
# optimal_vector = x.value

# # Evaluate the log odds for the optimal vector
# optimal_log_odds = np.dot(optimal_vector, logreg_model.coef_[0]) + logreg_model.intercept_

# print("Optimal Input Vector:", optimal_vector)
# print("Log Odds for Optimal Vector:", optimal_log_odds)


In [25]:
# Create constraint.
def optimize(vars,obj_var,clf,x,target):
  # vars.append(cp.Variable())
  # x=x.reshape(1,-1)

  # p=cp.sum(vars*np.transpose(np.array(clf.coef_))) +clf.intercept_[0]
  # print(p)
  # p=1/1+cp.exp(-p)


  # cf=[]
  # for j in vars:
  #   j.value=np.random.randn(1)[0]

  # v=cp.Variable(30)
  constraints=[check_pred(vars,clf,target)<=1]

  # Form objective.
  objective = cp.Minimize(cp.sqrt(cp.sum_squares(cp.vstack(obj_var)))) #
  # objective = cp.Minimize((cp.sum(cp.vstack(obj_var))))
  # objective = cp.Minimize(cp.norm(v-x,1))

  # vars.value = numpy.random.randn(30)


  # Form and solve problem.
  prob = cp.Problem(objective,constraints)
  prob.solve(solver=cp.ECOS_BB,qcp=True,verbose=True) #cp.SCS, cp.ECOS_BB
  # prob.solve(warm_start=True)
  print("status: {}".format(prob.status))
  return vars

In [None]:
cf_all=[]
ft_type=['ct']*30
n_ft=30
for k in range(len(x_test)):
  i=x_test[k]
  target=1-y_test[k]
  vars,obj_var=create_obj_var(i,n_ft,ft_type)
  cf=[]
  target=0.001 if target==0 else 0.999
  vals=optimize(vars,obj_var,clf,i,target)
  for j in vals:
    cf.append(j.value)
  cf_all.append(cf)

In [27]:
np.array(cf_all).shape, y_test.shape

((114, 30), (114,))

In [28]:
validity=0
for i in range(len(cf_all)):
  if clf.predict(np.array(cf_all[i]).reshape(1,-1))==1-y_test[i]:
    validity+=1

print(validity)

78


In [8]:
def get_stats(x_train,ft_types):
  '''
  should receive the raw data
  '''
  n_ft= x_train.shape[1]
  stats=[]
  for i in range(n_ft):
    s=[]
    if ft_types[i]=='ct':
      s[0]=np.mean(x_train[:,i])
      s[1]=np.var(x_train[:,i])**0.5
    else: #consider only cont and cat currently. Need discrete data also.
      s[0]=np.min(x_train[:,i])
      s[1]=np.max(x_train[:,i])
    stats.append(s)

  return stats


In [None]:
def gen_random_point(stats,ft_types):
  randx=[]
  for i in range(len(stats)):
    val=0
    if ft_types[i]=='continuous':
      val=np.randn(stats[i][0],stats[i][1],size=None)
    else:
      val=np.random.randint(stats[i][0],stats[i][1])
    randx[i]=val

  return randx

