In [None]:
#Set up environment
import malariagen_data
import numpy as np

# Pickle is used to save variables as files for future use
import pickle
# NLopt is the optimization libary dadi uses
import nlopt

# MatPlotLib is a libary dadi uses for plotting frequency spectrum
#import matplotlib.pyplot as plt
import dadi
import allel

In [None]:
# Load API and get SFS for each population
af1 = malariagen_data.Af1(pre=True)
results_dir = '/Users/dennistpw/Projects/funestus_tz/feems/'

# Sample sets
sample_sets = [
#'1231-VO-MULTI-WONDJI-VMF00043',
'1236-VO-TZ-OKUMU-VMF00090',
'1236-VO-TZ-OKUMU-VMF00261',
'1236-VO-TZ-OKUMU-VMF00248',
'1236-VO-TZ-OKUMU-VMF00252',
'1236-VO-TZ-OKUMU-OKFR-TZ-2008',
'AG1000G-TZ']




In [None]:
# Select SNPS,
snps = af1.snp_calls(region = '2RL:6000000-9000000', sample_query=f'taxon == "funestus"',sample_sets = sample_sets)

                                     

In [None]:
# For subsetting by rift location
rift_north = ['Mwanza', 'Kagera', 'Katavi', 'Kigoma']
rift_south = ['Pwani', 'Morogoro', 'Tanga', 'Ruvuma', 'Dodoma', 'Lindi', 'Mtwara']


In [None]:
# Select metadata and subsample inds
df_samples = af1.sample_metadata(sample_query=f"taxon == 'funestus'", sample_sets=sample_sets)


# Select 50 random inds from north and south
north_inds = df_samples.query(f'admin1_name=={rift_north}').index
northsub = np.random.choice(north_inds, 50)
south_inds = df_samples.query(f'admin1_name=={rift_south}').index
southsub = np.random.choice(south_inds, 50)
subsampled_inds = np.concatenate((northsub, southsub))

sample_list = df_samples.loc[subsampled_inds].sample_id.tolist()

subpops = {
    'west': northsub,
    'east': southsub,
    'all': subsampled_inds
}

In [None]:
#Convert to genotype array in allel
gt = allel.GenotypeArray(snps.call_genotype)

# Count alleles by east/west
ac_subpops = gt.count_alleles_subpops(subpops)

# Filter on variants segregating in union of the two pops
is_seg = ac_subpops['all'].is_segregating()[:]
genotypes_seg = gt.compress(is_seg, axis=0)

# Eecount alleles
seg_ac = genotypes_seg.count_alleles_subpops(subpops)

all 172072
north 151169
south 107915


In [None]:
# Generate and plot 2dSFS
joint_sfs = allel.joint_sfs_folded(ac_subpops['west'], ac_subpops['east'])
allel.plot_joint_sfs_folded(joint_sfs)

In [None]:
# Save 2dSFS
np.save('rift_joint.sfs.npy', joint_sfs)