In [3]:
import numpy as np 
import itertools 
np.random.seed(0)

class Parameters:

    @staticmethod
    def generate_true_dist_parameters(len_x, lb, ub):
        elements = [0,1]
        dict_parameters = {"x1": {(): np.random.uniform(lb,ub)}}
        for k in range(2,len_x+1):
            permutations = list(itertools.product(elements, repeat=k-1))
            dict_parameters["x"+str(k)] = {perm: np.random.uniform(lb,ub) for perm in permutations}
        return dict_parameters
    
    @staticmethod
    def generate_logsitc_parameters(len_x, lb, ub):
        elements = [0,1]
        dict_parameters = {"x1":{"bias": np.random.uniform(lb, ub)}}

        for k in range(2, len_x+1):
            dict_parameters["x"+str(k)] = {"bias": np.random.uniform(lb,ub)}
            for j in range(1,k):
                dict_parameters["x"+str(k)]["x"+str(j)]= np.random.uniform(lb,ub)
        return dict_parameters
    
        

#Number of paramerters of the True distribution 

$$
p(x_1, x_2, x_3, \dots, x_5) = p(x_1) \cdot p(x_2 \mid x_1) \cdot p(x_3 \mid x_1, x_2) \cdots p(x_n \mid x_1, x_2, \dots, x_{4})
$$

#Total number of parameters x can take [0,1]

The total number of parameters is:

$$
1 + 2 + 2^2 + 2^3 + 2^4 = 31
$$


In [4]:
Parameters().generate_true_dist_parameters(len_x=5, lb = 0, ub = 1)


{'x1': {(): 0.5488135039273248},
 'x2': {(0,): 0.7151893663724195, (1,): 0.6027633760716439},
 'x3': {(0, 0): 0.5448831829968969,
  (0, 1): 0.4236547993389047,
  (1, 0): 0.6458941130666561,
  (1, 1): 0.4375872112626925},
 'x4': {(0, 0, 0): 0.8917730007820798,
  (0, 0, 1): 0.9636627605010293,
  (0, 1, 0): 0.3834415188257777,
  (0, 1, 1): 0.7917250380826646,
  (1, 0, 0): 0.5288949197529045,
  (1, 0, 1): 0.5680445610939323,
  (1, 1, 0): 0.925596638292661,
  (1, 1, 1): 0.07103605819788694},
 'x5': {(0, 0, 0, 0): 0.08712929970154071,
  (0, 0, 0, 1): 0.02021839744032572,
  (0, 0, 1, 0): 0.832619845547938,
  (0, 0, 1, 1): 0.7781567509498505,
  (0, 1, 0, 0): 0.8700121482468192,
  (0, 1, 0, 1): 0.978618342232764,
  (0, 1, 1, 0): 0.7991585642167236,
  (0, 1, 1, 1): 0.46147936225293185,
  (1, 0, 0, 0): 0.7805291762864555,
  (1, 0, 0, 1): 0.11827442586893322,
  (1, 0, 1, 0): 0.6399210213275238,
  (1, 0, 1, 1): 0.1433532874090464,
  (1, 1, 0, 0): 0.9446689170495839,
  (1, 1, 0, 1): 0.52184832175007

#Number of paramerters of the logistic approximation of the conditional probability 

$$
p(x_1, x_2, x_3, \dots, x_5) = p(x_1) \cdot p(x_2 \mid x_1) \cdot p(x_3 \mid x_1, x_2) \cdots p(x_n \mid x_1, x_2, \dots, x_{4})
$$

#Total number of parameters x can take [0,1]
$$
p(x_1, x_2, x_3, \dots, x_5) \approx \sigma(w_1^T x_1 + b_1) \cdot \sigma(w_2^T x_2 + b_2) \cdot \sigma(w_3^T x_3 + b_3) \cdots \sigma(w_5^T x_5 + b_5)
$$

Where:
$$
\sigma(z) = \frac{1}{1 + e^{-z}}
$$

The total number of parameters is:

$$
1 + 2 + 3 + 4 + 5 = 15
$$


In [5]:
#generating logistic parameters
parameter = Parameters().generate_logsitc_parameters(len_x=5, lb = 0, ub = 1)
#lets assume that this is the distribution we are planning acheieve and this is our curret approx, true distribtion which redcues our parameters 
parameter

{'x1': {'bias': 0.7742336894342167},
 'x2': {'bias': 0.45615033221654855, 'x1': 0.5684339488686485},
 'x3': {'bias': 0.018789800436355142,
  'x1': 0.6176354970758771,
  'x2': 0.6120957227224214},
 'x4': {'bias': 0.6169339968747569,
  'x1': 0.9437480785146242,
  'x2': 0.6818202991034834,
  'x3': 0.359507900573786},
 'x5': {'bias': 0.43703195379934145,
  'x1': 0.6976311959272649,
  'x2': 0.06022547162926983,
  'x3': 0.6667667154456677,
  'x4': 0.6706378696181594}}

True Distribution provides an exact representation but becomes computationally infeasible as \( n \) grows due to exponential parameter growth.

Logistic Approximation offers a more scalable approach, reducing the parameter count significantly while maintaining sufficient flexibility.

These methods illustrate the trade-off between accuracy and efficiency in probabilistic modeling.


# Generating 1 sample

In [6]:
#Geneating a sample from the distribution 

def sigmoid(z):
    return 1/(1+np.exp(-z))

def get_a_sample(parameters):

    x_sample = []
    p_chain = []
    prob = 1.0

    for i in range(len(parameters)):
        z = parameters["x"+str(i+1)]["bias"]
        for k in range(1,i+1):
            z += x_sample[k-1]*parameters["x"+str(i+1)]["x"+str(k)]
        p_est = sigmoid(z)

        s_i = np.random.binomial(1,p_est)
        x_sample += [s_i]
        prob *= p_est
        p_chain += [float(p_est)]
    return x_sample, prob, p_chain
    

sample, probability , chain = get_a_sample(parameters=parameter)

print(f'Sample : {sample}, Sample probability : {probability}, ')
print(f'Chain probability : {chain}')

Sample : [1, 1, 1, 1, 1], Sample probability : 0.33752692635670706, 
Chain probability : [0.6844360133921935, 0.7358645994418772, 0.7770437377025466, 0.9309908455336089, 0.9263749132521876]


# Generating n Samples


In [7]:
#generating n samples 

def generate_n_samples(num_samples, parameters):
    n_samples = []
    for num in range(num_samples):
        sample, _, _ = get_a_sample(parameters)
        n_samples += [sample]
    return n_samples
    

generate_n_samples(num_samples=10, parameters = parameter)



[[1, 0, 1, 1, 1],
 [1, 1, 1, 1, 1],
 [1, 1, 1, 1, 1],
 [0, 1, 0, 1, 0],
 [1, 0, 1, 1, 1],
 [1, 1, 1, 1, 1],
 [1, 1, 1, 1, 1],
 [1, 1, 1, 1, 1],
 [1, 1, 1, 1, 1],
 [1, 1, 0, 1, 1]]

# Training the Model

In [8]:
def initialize_derivative(len_samples):
    dict_parameters = {}
    dict_parameters = {"dl1":{"db": 0}}
    for k in range(2, len_samples+1):
            dict_parameters["dl"+str(k)] = {"db": 0}
            for j in range(1,k):
                dict_parameters["dl"+str(k)]["da"+str(j)]= 0

    return dict_parameters  

init_derivative_value  = initialize_derivative(len_samples=5)
init_derivative_value

{'dl1': {'db': 0},
 'dl2': {'db': 0, 'da1': 0},
 'dl3': {'db': 0, 'da1': 0, 'da2': 0},
 'dl4': {'db': 0, 'da1': 0, 'da2': 0, 'da3': 0},
 'dl5': {'db': 0, 'da1': 0, 'da2': 0, 'da3': 0, 'da4': 0}}

### 1. Log Probability Expression
$$\log p(X) = \log p(x_1) + \log p(x_2 \mid x_1) + \log p(x_3 \mid x_2) + \cdots + \log p(x_5 \mid x_{4})$$
### 2. Gradient of Log Probability

$$
\nabla \log p(X) = (1 - \sigma_1) z_1' + (1 - \sigma_2) z_2' + \cdots + (1 - \sigma_n) z_n'
$$

### 3. Partial Derivatives

  $$
  \frac{\partial \log p(X)}{\partial b_1} = (1 - \sigma_1) \cdot 1
  $$

  $$
  \frac{\partial \log p(X)}{\partial b_2} = (1 - \sigma_2) \cdot 1
  $$

  $$
  \frac{\partial \log p(X)}{\partial a_2} = (1 - \sigma_2) x_1
  $$

  $$
  \frac{\partial \log p(X)}{\partial a_3} = (1 - \sigma_3) x_2
  $$

### 4. General Partial Derivatives

- **For bias term $$ b_i $$:**
  $$
  \frac{\partial \log p(X)}{\partial b_i} = (1 - \sigma_i) \cdot 1
  $$

- **For weight term $$ a_i $$:**
  $$
  \frac{\partial \log p(X)}{\partial a_i} = (1 - \sigma_i) x_{i-1}
  $$



# Sub-sample Probaility

In [9]:
def get_comp_prob(parameters, sub_sample = []):
    if len(sub_sample) ==0:
        z = parameters["x1"]["bias"]
        p = sigmoid(z)
        return p 
    
    #[1,0,1] - subsample 
    len_x = len(sub_sample)+1  #4
    z = parameters["x"+str(len_x)]["bias"] #exp(b+a1x1+a2x2+a3x3)
    for i in range(len(sub_sample)):
        z += (sub_sample[i]*parameters["x"+str(len_x)]["x"+str(i+1)])
    return sigmoid(z)



get_comp_prob(parameters=parameter, sub_sample=[1,0,1])  

np.float64(0.8721596169702072)

# Sample Derivative 

In [10]:
def derivative_parameters(sample, parameters):
    dict_parameters = {}
    for k in range(len(sample)):
        p = get_comp_prob(parameters=parameters,sub_sample=sample[:k])
        dict_parameters["dl"+str(k+1)]={"db":1-p}
        for j in range(k):
            dict_parameters["dl"+str(k+1)]["da"+str(j+1)]= sample[j]*(1-p)

    return dict_parameters

derivative_parameters(sample=[1,0,1,0,1], parameters=parameter)

{'dl1': {'db': np.float64(0.3155639866078065)},
 'dl2': {'db': np.float64(0.2641354005581228),
  'da1': np.float64(0.2641354005581228)},
 'dl3': {'db': np.float64(0.3460550521819764),
  'da1': np.float64(0.3460550521819764),
  'da2': np.float64(0.0)},
 'dl4': {'db': np.float64(0.12784038302979284),
  'da1': np.float64(0.12784038302979284),
  'da2': np.float64(0.0),
  'da3': np.float64(0.12784038302979284)},
 'dl5': {'db': np.float64(0.14167709747554136),
  'da1': np.float64(0.14167709747554136),
  'da2': np.float64(0.0),
  'da3': np.float64(0.14167709747554136),
  'da4': np.float64(0.0)}}

# Batch Gradient Update 

In [11]:
def derivative_n_samples(samples, learning_parameters):
    len_sample = len(samples[0])
    n = len(samples)

    grad_dict = initialize_derivative(len_samples=len_sample)
    
    #add all the derivatives of a parameters for n samples 
    for sample in samples:
        for k in range(len(sample)):
            p = get_comp_prob(parameters=learning_parameters, sub_sample=sample[:k])
            grad_dict["dl"+str(k+1)]["db"] += (1-p)
            for j in range(k):
                grad_dict["dl"+str(k+1)]["da"+str(j+1)] += sample[j]*(1-p)

    for k in range(len_sample):
        grad_dict["dl"+str(k+1)]["db"] = (grad_dict["dl"+str(k+1)]["db"])/n
        for j in range(k):
            grad_dict["dl"+str(k+1)]["da"+str(j+1)] = (grad_dict["dl"+str(k+1)]["da"+str(j+1)])/n
    
    return grad_dict





In [12]:
nsamples = generate_n_samples(num_samples=10, parameters = parameter)
nsamples

[[1, 1, 0, 1, 1],
 [1, 1, 1, 0, 1],
 [0, 0, 1, 0, 1],
 [0, 1, 0, 1, 1],
 [1, 0, 1, 1, 1],
 [1, 1, 1, 1, 1],
 [1, 1, 1, 1, 1],
 [1, 1, 1, 1, 1],
 [1, 1, 0, 1, 1],
 [1, 0, 0, 0, 1]]

In [13]:
derivative_n_samples(samples=nsamples, learning_parameters=parameter)

{'dl1': {'db': np.float64(0.31556398660780655)},
 'dl2': {'db': np.float64(0.288888214133002),
  'da1': np.float64(0.2113083204464982)},
 'dl3': {'db': np.float64(0.2872460141482388),
  'da1': np.float64(0.2029847678148673),
  'da2': np.float64(0.1685047349026923)},
 'dl4': {'db': np.float64(0.12573952164972188),
  'da1': np.float64(0.07694221157457168),
  'da2': np.float64(0.06824076788420362),
  'da3': np.float64(0.06774753600938017)},
 'dl5': {'db': np.float64(0.14309115612288187),
  'da1': np.float64(0.09446471159118655),
  'da2': np.float64(0.08607415039046343),
  'da3': np.float64(0.06822539830414508),
  'da4': np.float64(0.080407096041443)}}

# Gradient Update 

In [14]:
def gradient_descent(samples, learning_params, lr):
    updated_parameters = {}
    dp = derivative_n_samples(samples, learning_parameters=learning_params)
    print(dp)
    len_sample = len(samples[0])
    for k in range(len_sample):
        updated_parameters["x"+str(k+1)] = {"bias": learning_params["x"+str(k+1)]["bias"] + lr * dp["dl"+str(k+1)]["db"]}
        for j in range(1,k+1):
            updated_parameters["x"+str(k+1)]["x"+str(j)] = learning_params["x"+str(k+1)]["x"+str(j)] + lr*dp["dl"+str(k+1)]["da"+str(j)]
    return updated_parameters
            
    

# Log Probability of a Sample

In [21]:

#logP(x) = log(p(x1))+log(p(x2/x1))+.....
#[1,1,1,1,1]
#logp(x) = log(b1)+ log(sig(a1x1+b1))+log(sig(a2x1+b2+a3x2))+log(sig(a1x1+b1))

def log_prob(parameters, sample):
    #length of sample is 5
    logp = 0
    z = 0
    
    for i in range(1,len(sample)+1):
        b = parameter["x" +str(i)]["bias"]
        if i ==1:
            p1 =  b * sample[0] + (1-sample[0])*(1-b)
            logp += np.log(p1)
    
        if i >= 2:
            z = 0
            z += parameter["x" +str(i)]["bias"] 
            for j in range(1,i):
                z += sample[j]*parameter["x" +str(i)]["x"+str(j)]
            
            
            z = np.log(sigmoid(z))*sample[i-1] +(1-sample[i-1])*(1-np.log(sigmoid(z)))
        logp +=  z
        
    return logp

log_prob(parameters=parameter,sample=[1,1,1,1,1])

np.float64(-0.9628313987205959)

In [None]:
#generating logistic parameters
parameter1 = Parameters().generate_logsitc_parameters(len_x=5, lb = 0, ub = 1)
#lets assume that this is the distribution we are planning acheieve and this is our curret approx, true distribtion which redcues our parameters 
parameter1

gradient_descent(samples=nsamples,learning_params= parameter1, lr = 0.01 )

{'dl1': {'db': np.float64(0.38734097425057606)}, 'dl2': {'db': np.float64(0.3371144397223991), 'da1': np.float64(0.2199199032612774)}, 'dl3': {'db': np.float64(0.09541920462127129), 'da1': np.float64(0.039466514966793355), 'da2': np.float64(0.06611547713511418)}, 'dl4': {'db': np.float64(0.10773296101757553), 'da1': np.float64(0.03973317959872176), 'da2': np.float64(0.06704165065146524), 'da3': np.float64(0.06704165065146524)}, 'dl5': {'db': np.float64(0.16910767924880013), 'da1': np.float64(0.0994376678057229), 'da2': np.float64(0.1322648675700583), 'da3': np.float64(0.1322648675700583), 'da4': np.float64(0.1516012357634046)}}


{'x1': {'bias': np.float64(0.4623765768089508)},
 'x2': {'bias': np.float64(0.4479584322085247),
  'x1': np.float64(0.33830146543250017)},
 'x3': {'bias': np.float64(0.8816323150932923),
  'x1': np.float64(0.9454214420900593),
  'x2': np.float64(0.9925514839259805)},
 'x4': {'bias': np.float64(0.3778185965711625),
  'x1': np.float64(0.9665447774231586),
  'x2': np.float64(0.792549986137416),
  'x3': np.float64(0.6763595641507815)},
 'x5': {'bias': np.float64(0.24658055621258743),
  'x1': np.float64(0.21745163762226705),
  'x2': np.float64(0.16737047319694626),
  'x3': np.float64(0.9240792589010659),
  'x4': np.float64(0.29559267474080014)}}

# Gradient Desenct Training

In [None]:
parameters = Parameters().generate_logsitc_parameters(len_x=5, lb=0.0, ub =2.0)
rand_parameters = Parameters().generate_logsitc_parameters(len_x=5, lb=0.0, ub =2.0)



In [None]:
sample_n = generate_n_samples(parameters=parameters, num_samples=100000)
sample_n

batch_size = 100
num_batches = len(sample_n)//batch_size
num_epochs = 10

In [None]:
def print_parameters(p1,p2):
    for  k in range(len(p1)):
        print(p1["x"+str(k+1)]["bias"], p2["x"+str(k+1)]["bias"])
        for j in range(k):
            print(p1["x" + str(k+1)]["x" + str(j+1)], p2["x" + str(k+1)]["x" + str(j+1)])

print("Parameters before training............................")

print_parameters(p1=parameters, p2=rand_parameters)

Parameters before training............................
1.1948857109641529 2.9150352679583027
0.0732031394916588 2.1623293516352184
1.5127900048834473 1.8122852187162264
0.9765355370413993 2.4289055113797833
0.9378401401748304 1.2689209030386286
0.23612346280755325 0.749789478895605
1.4342467497452893 2.143573321240976
1.8767890491668482 0.9286201262083716
1.9163570792445408 0.8770591808935464
0.5406255529226245 0.8133598462881264
0.08665484664180112 1.1017435671217009
1.7087079139905714 1.024788493058243
0.5202387183363384 1.989063119819028
0.334070328782889 0.611659696757089
1.7773564637249333 1.1448893058480216


In [None]:
for epoch in range(1,num_epochs+1):
    print(f"\nEpoch {epoch}/{num_epochs}")
    for i in range(num_batches):
        sample_data = sample_n[i*batch_size:batch_size*(i+1)]
        deriv_sums = derivative_n_samples(samples=sample_data,learning_parameters=rand_parameters)
        rand_parameters=gradient_descent(samples=sample_data, learning_params=rand_parameters, lr=0.01)



Epoch 1/10
{'dl1': {'db': np.float64(0.05141530111129512)}, 'dl2': {'db': np.float64(0.03962626112992108), 'da1': np.float64(0.013830086259594143)}, 'dl3': {'db': np.float64(0.02242596190104955), 'da1': np.float64(0.009560749444083678), 'da2': np.float64(0.015063841465272248)}, 'dl4': {'db': np.float64(0.01840133420881649), 'da1': np.float64(0.008453680835537826), 'da2': np.float64(0.011626859584483768), 'da3': np.float64(0.011060067133370972)}, 'dl5': {'db': np.float64(0.01042196419749156), 'da1': np.float64(0.0037217346628691528), 'da2': np.float64(0.00380661758889157), 'da3': np.float64(0.00609997767797017), 'da4': np.float64(0.01042196419749156)}}
{'dl1': {'db': np.float64(0.05139023074277196)}, 'dl2': {'db': np.float64(0.0379154885257757), 'da1': np.float64(0.014191440234507136)}, 'dl3': {'db': np.float64(0.024247103798763315), 'da1': np.float64(0.01054418087050204), 'da2': np.float64(0.01219730933877532)}, 'dl4': {'db': np.float64(0.020409388676365926), 'da1': np.float64(0.00890

In [None]:
print("After Training -----, ")
print_parameters(p1=parameters, p2=rand_parameters)

After Training -----, 
1.1948857109641529 2.9150352679583027
0.0732031394916588 2.1623293516352184
1.5127900048834473 1.8122852187162264
0.9765355370413993 2.4289055113797833
0.9378401401748304 1.2689209030386286
0.23612346280755325 0.749789478895605
1.4342467497452893 2.143573321240976
1.8767890491668482 0.9286201262083716
1.9163570792445408 0.8770591808935464
0.5406255529226245 0.8133598462881264
0.08665484664180112 1.1017435671217009
1.7087079139905714 1.024788493058243
0.5202387183363384 1.989063119819028
0.334070328782889 0.611659696757089
1.7773564637249333 1.1448893058480216
