### Rudimentary notebook demonstrating mixture models on graphs

In [66]:
import numpy as np 
#import graspy
from graspy.simulations import er_nm, er_np

from scipy.stats import norm

#### Let's fit Zero-inflated models to simulated data
In ZI models we know that we sample a Bernoulli for edge existence and any other distribution after edge exists

### Fit ZI ER with edge existence probability is not Bernoulli or Binomial

In [67]:


n = 50

p = 0.3 ## prob of edge existing 
np.random.seed(1)
G1 = er_np(n=n, p=p)
G1.shape

(50, 50)

In [68]:
### Replace the edges that exist with a normal distribution

#G1[G1 ==1] = np.random.normal(100,5) ## replace the 1's with a normal distribution value
## HERE I AM USING NORMAL DIST WITH HIGH MEAN AND VARIANCE SO IT DOESN'T OVERLAP WITH THE BERNOULLI (I.E I DON'T GET 
#ZEROS FROM THE NORMAL)
##This doesn't do the correct thing look below

In [69]:
## Try again with a loop to replace the normal values 
for index, x in np.ndenumerate(G1):
    if x == 1:
        G1[index] = np.random.normal(100,5)
        
print(G1)




[[  0.          95.28279892   0.         ...   0.           0.
    0.        ]
 [101.2298065    0.           0.         ... 106.4942715    0.
    0.        ]
 [  0.           0.           0.         ...   0.           0.
    0.        ]
 ...
 [  0.         105.63637833   0.         ...   0.           0.
  101.35779554]
 [  0.           0.           0.         ...   0.           0.
    0.        ]
 [  0.           0.           0.         ... 102.36770396   0.
    0.        ]]


In [70]:
### Now to find the estimated parameters:

## Let's find the probability of not being an edge, i.e the inflation of zeros

prob = ((G1 == 0).sum())/(G1.shape[0]**2) ##1st sum how many zeros and then to find prob, divide total number of edges
prob ## makes sense because the prob of edge existence was 0.3



0.712

In [71]:
### Now need to fit the Normal distribution

normalValues = G1[G1 != 0] ## extract non-zero values, because assuming they come from normal dist
mu, std = norm.fit(normalValues)
print(mu)
print(std)
## I recovered the parameters!!

100.13283753854752
4.971278285693728


### Let's try to fit ZI on an SBM where the block sizes are known

In [74]:
## so give data of known size and prob dist
n = [50, 50]
p = [[0.5, 0.2],
     [0.2, 0.05]]

np.random.seed(1)
G = graspy.simulations.sbm(n=n, p=p)


In [77]:
## Let's start with giving all the blocks the same normal density 
for index, x in np.ndenumerate(G):
    if x == 1:
        G[index] = np.random.normal(100,5)
        
print(G)


[[  0.           0.          97.5366121  ...  88.63224399   0.
    0.        ]
 [  0.           0.         108.90064678 ...   0.           0.
  104.78785225]
 [101.12302295  97.61427891   0.         ...   0.           0.
   93.30550284]
 ...
 [ 92.41598148   0.           0.         ...   0.           0.
    0.        ]
 [  0.           0.           0.         ...   0.           0.
    0.        ]
 [  0.          95.53369205 100.43122627 ...   0.           0.
    0.        ]]


In [79]:
### Let's try recovering the parameters for one of the blocks:

##Block1 is 50x50
prob1 = ((G[:50,:50] == 0).sum())/(n[0]**2) ##so find the probablility of edge not existing
prob1


0.5328

In [84]:
block1 = G[:50,:50]
normalValues1 = block1[block1 != 0] ## extract non-zero values, because assuming they come from normal dist
mu1, std1 = norm.fit(normalValues1)
print(mu1)
print(std1) 
## recovered the normal dist

99.87596684758769
4.949528819242979


In [85]:
block2 = G[:50,50:]
normalValues2 = block2[block2 != 0] ## extract non-zero values, because assuming they come from normal dist
mu2, std2 = norm.fit(normalValues2)
print(mu2)
print(std2)


99.83996042280363
5.090899702561903
