In [1]:
import numpy as np
import scipy as scipy
from scipy.optimize import minimize
import timeit
%run HA_script.py

# Try Solving Many Ancestries problem, multiple SNPs

In this notebook we attempt to solve the following constrained, quadratic optimization problem:

$$\min_{\pi \in \mathbb{R}^k} f(\pi)=\sum_{i=1}^{N}\left(\sum_{j=1}^k a_{j,i}\pi_j-\tilde{a}_i\right)^2$$

$$\text{subject to:} \sum_{j=1}^k \pi_k=1 \quad \pi_j \geq 0, j=1,\ldots,k,$$

where $a_{j,i} \in \mathbb{R}$, $j=1,\ldots, k$; $i=1,\ldots N$ and $\tilde{a}_i \in \mathbb{R}$, $i =1, \ldots, N$ are quantities obtained from a genetics simulation. The $a_{j,i}$'s correspond to the observed allele frequency in ancestry $j$ at SNP $i$. There are $k$ ancestries and $N$ SNPs.

In [2]:
N=1000000 # number of SNPs
k=15 # number of ancestries

A=np.array(np.random.uniform(low=0, high=1, size=(N,1))) # initialize an array for experimental draws

for i in range(1,k):
    A=np.hstack((A,np.random.uniform(low=0, high=1, size=(N,1))))

# First, we choose an answer! This vector must be Nx1

ans=[[0.1], [0.1], [0.1], [0.25], [0.05], [0.1], [0.05], [0.05], [0.01], [0.01], [0.01], [0.01], [0.01], [0.05], [0.1]]

taf=A@ans # Total allele frequency

print(np.shape(A)[1],np.shape(taf), np.shape(ans), np.shape(taf))

15 (1000000, 1) (15, 1) (1000000, 1)


In [3]:
# This is a feasible initial point since its components add to 1 and are positive.

x_t=(1/k)*np.ones((k,1))
print('check shape of x_t:', np.shape(x_t), np.sum(x_t,axis=0))


check shape of x_t: (15, 1) [1.]


In [4]:
what_we_want = HA(A,taf,x_t)

In [5]:
print(what_we_want)

(array([0.09999991, 0.0999998 , 0.09999979, 0.25000007, 0.05      ,
       0.10000017, 0.04999963, 0.05000021, 0.00999959, 0.01000016,
       0.00999988, 0.0100004 , 0.01000009, 0.05000014, 0.10000018]), 35, 19.10940058482811)
