In [43]:
import networkx as nx
import seaborn
import custom_funcs as cf
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from joblib import Parallel, delayed
from time import time

%load_ext autoreload
%autoreload 2

%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [44]:
# Set Seaborn styles & context
seaborn.set_context("paper")
seaborn.set_style('white')

In [None]:
# Read in data and do data pre-processing
G = nx.read_gpickle('20150902_all_ird Final Graph.pkl')
G = cf.impute_reassortant_status(G)
G = cf.impute_host_group_name(G)
G = cf.impute_weights(G)
G = cf.remove_zero_weighted_edges(G)

In [None]:
# Compute proportion reassortant
data_props = cf.edge_proportion_reassortant(G, attr='host_group', exclusions=['Unknown'])
data_props

In [None]:
start = time()
def null_distribution_proportion_reassortant(G):
    G_shuffled = cf.shuffle_node_attribute_label(G, 'host_group')
    props = cf.edge_proportion_reassortant(G_shuffled, 'host_group', exclusions=['Unknown'])
    return props

props_null = Parallel(n_jobs=-1)(delayed(null_distribution_proportion_reassortant)(G) for i in range(100))
end = time()
print(end - start)

In [None]:
null = pd.DataFrame(props_null)
null.mean()

In [None]:
data = pd.DataFrame([data_props])
data.mean()

In [None]:
fig = plt.figure(figsize=(3,2))
ax = fig.add_subplot(1,1,1)
ind = np.arange(2)
width = 0.35

ax.bar(ind, null.mean(), width, color='blue', label='Null', alpha=0.5, yerr=null.std()*5)
ax.bar(ind+width, data.mean(), width, color='blue', label='Data')
ax.set_xticks(ind+width)
ax.set_xticklabels(['Different', 'Same'])
ax.set_xlabel('Host Group Pair')
ax.set_ylabel('Proportion Reassortant')
ax.legend()
ax.set_ylim(0,1)

In [None]:
# Between which host groups are they over-represented or under-represented?

def host_group_domain_graph(G):
    """
    Computes the proportion of reassortant representation when crossing between host groups.
    """
    hg_graph = nx.DiGraph()

    for n, node_d in G.nodes(data=True):

        in_edges = G.in_edges(n, data=True)
        total_edges = len(in_edges)
        is_reassortant = node_d['reassortant']

        sk_hg = G.node[n]['host_group']
        if sk_hg not in hg_graph.nodes():
            hg_graph.add_node(sk_hg)

        for sc, _, edge_d in in_edges:
            sc_hg = G.node[sc]['host_group']
            if sc_hg not in hg_graph.nodes():
                hg_graph.add_node(sc_hg)

            if (sc_hg, sk_hg) not in hg_graph.edges():
                hg_graph.add_edge(sc_hg, sk_hg, total=edge_d['weight'], reassortant=0)
            if (sc_hg, sk_hg) in hg_graph.edges():
                hg_graph.edge[sc_hg][sk_hg]['total'] += edge_d['weight']

            if is_reassortant:
                hg_graph.edge[sc_hg][sk_hg]['reassortant'] += edge_d['weight']

    for sc, sk, d in hg_graph.edges(data=True):
        hg_graph.edge[sc][sk]['p_reassortant'] = d['reassortant'] / d['total']
        
    return hg_graph


In [None]:
hg_graph = host_group_domain_graph(G)
hg_graph.edges(data=True)

In [None]:
def null_proportion_hg_reassortant(G, equally=False):
    
    G_shuffled = cf.shuffle_node_attribute_label(G, 'host_group', equally)
    hg_graph_shuf = host_group_domain_graph(G_shuffled)
    
    return hg_graph_shuf

In [None]:
start = time()
results = Parallel(n_jobs=-1)(delayed(null_proportion_hg_reassortant)(G, equally=True) for i in range(100))
len(results)
end = time()
print(end - start)

In [None]:
def distr_null_p_reassortant(list_of_hg_graphs):
    hg_graph = nx.DiGraph()
    for g in list_of_hg_graphs:
        hg_graph.add_nodes_from(g.nodes())
        for sc, sk, d in g.edges(data=True):
            if (sc, sk) not in hg_graph.edges():
                hg_graph.add_edge(sc, sk, p_reassortant=[d['p_reassortant']])
            else:
                hg_graph.edge[sc][sk]['p_reassortant'].append(d['p_reassortant'])
    return hg_graph

summaryG = distr_null_p_reassortant(results)

In [None]:
summaryG.edges(data=True)

In [None]:
name_map = {'Wild':'W',
            'Domestic':'D',
            'Human':'H'}
names = []
means = []
stds  = []
for sc, sk, d in summaryG.edges(data=True):
    if sc == 'Unknown' or sk == 'Unknown':
        pass
    else:
        mean = np.mean(d['p_reassortant'])
        std = np.std(d['p_reassortant'])
        
        names.append('{0}:{1}'.format(name_map[sc], name_map[sk]))
        means.append(mean)
        stds.append(std)

In [None]:
data = []

for sc, sk, d in hg_graph.edges(data=True):
    if sc == 'Unknown' or sk == 'Unknown':
        pass
    else:
        data.append(d['p_reassortant'])
data

In [None]:
fig = plt.figure(figsize=(4,2))

ind = np.arange(len(means))
width = 0.35

ax = fig.add_subplot(1,1,1)
ax.bar(ind, means, width=width, color='blue', label='Null', yerr=np.array(stds)*5, alpha=0.5)
ax.bar(ind+width, data, width=width, color='blue', label='Data')
ax.set_xticks(ind+width)
ax.set_xticklabels(names)
ax.set_ylabel('Weighted Proportion\nReassortant Per Event')
ax.set_xlabel('Host Group Transition')
ax.legend(loc='upper center')
plt.subplots_adjust(left=0.15, right=0.95)
plt.savefig('figures/Proportion Reassortant Ecological.pdf')
plt.show()

In [None]:
np.array(means) - np.array(data)