In [1]:
from scipy import stats 

In [2]:
my_values_expected = [48,48] 
my_values_actual = [74,48]

In [3]:
stats.chisquare(my_values_actual, my_values_expected)

Power_divergenceResult(statistic=14.083333333333334, pvalue=0.0001748865925420932)

In [4]:
#http://scikit-bio.org/docs/0.1.1/generated/skbio.math.stats.ordination.PCoA.html?highlight=pcoa
from skbio.stats.ordination import pcoa

In [5]:
import pickle
import numpy as np

In [6]:
with open('../data/cambridge_analytica/matrix_collection_1_1', 'rb') as f:
    matrix = pickle.load(f)


In [7]:
def _pcoa(pair_dists, n_components=2):
    """Principal Coordinate Analysis,
    aka Classical Multidimensional Scaling
    """
    # code referenced from skbio.stats.ordination.pcoa
    # https://github.com/biocore/scikit-bio/blob/0.5.0/skbio/stats/ordination/_principal_coordinate_analysis.py

    # pairwise distance matrix is assumed symmetric
    pair_dists = np.asarray(pair_dists, np.float64)

    # perform SVD on double centred distance matrix
    n = pair_dists.shape[0]
    H = np.eye(n) - np.ones((n, n)) / n
    B = - H.dot(pair_dists ** 2).dot(H) / 2
    eigvals, eigvecs = np.linalg.eig(B)

    # Take first n_components of eigenvalues and eigenvectors
    # sorted in decreasing order
    ix = eigvals.argsort()[::-1][:n_components]
    eigvals = eigvals[ix]
    eigvecs = eigvecs[:, ix]

    # replace any remaining negative eigenvalues and associated eigenvectors with zeroes
    # at least 1 eigenvalue must be zero
    eigvals[np.isclose(eigvals, 0)] = 0
    if np.any(eigvals < 0):
        ix_neg = eigvals < 0
        eigvals[ix_neg] = np.zeros(eigvals[ix_neg].shape)
        eigvecs[:, ix_neg] = np.zeros(eigvecs[:, ix_neg].shape)

    return np.sqrt(eigvals) * eigvecs

In [8]:
import matplotlib.pyplot as plt


In [9]:
import matplotlib.cm as cm


In [10]:

from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

In [11]:
def f(x):
    n = [1, 4, 8, 15, 14, 5, 13, 10, 12, 7, 6, 2, 3, 9, 11]
    matrix_cosine_distance = 1-matrix[x]
    np.fill_diagonal(matrix_cosine_distance,0)
    colormap = np.array(['azure', 'black', 'darkgreen','green', 'indigo', 'lavender','orange', 'magenta', 'yellow','yellowgreen', 'g', 'b','r', 'g', 'teal'])
    fig, ax = plt.subplots(figsize=(10,10))
    ax.scatter(_pcoa(matrix_cosine_distance)[:,0],_pcoa(matrix_cosine_distance)[:,1], c=colormap)
    ax.set_xlim([-0.25,0.25]) 
    ax.set_ylim([-0.25,0.25]) 
    
    for i, txt in enumerate(n):
        ax.annotate(txt, (_pcoa(matrix_cosine_distance)[:,0][i], _pcoa(matrix_cosine_distance)[:,1][i]))
    
    #ax.show()

In [12]:
interact(f, x=(0.0, 1.0, 0.01));

interactive(children=(FloatSlider(value=0.5, description='x', max=1.0, step=0.01), Output()), _dom_classes=('w…