In [12]:
__copyright__ = "Copyright 2017, HH-HZI Project"
__author__ = "Ehsaneddin Asgari"
__license__ = "GPL"
__version__ = "1.0.0"
__maintainer__ = "Ehsaneddin Asgari"
__email__ = "asgari@berkeley.edu ehsaneddin.asgari@helmholtz-hzi.de"

import sys
sys.path.append('../')
import numpy as np
from data_access.data_create_utility import ABRDataCreate
from utility.math_utility import get_kl_rows
from utility.visualization_utility import create_mat_plot
import pandas as pd

class PhenotypeTable(object):
    def __init__(self, path):
        # load ABRDataCreat for basic access
        self.phenotype_table=pd.read_table(path)
        # init to be filled by make_drug_vector
        self.drug_vectors=[]
        self.drugs=[]
        # fill drugs and drug vectors
        #self.make_drug_vector()

    def get_isolate_list(self):
        '''
        Get the isolate list
        :return:
        '''
        return self.BasicDataObj.labeled_isolates

    def make_drug_vector(self, mapping={'0': 0, '0.0': 0, '1': 1, '1.0': 1, '': 0.5}):
        '''
        :param mapping: resistance value mapping
        :return: drug vectors
        '''
        self.drug_vectors = np.zeros((len(self.phenotype_table.drugs), len(self.phenotype_table.labeled_isolates)))
        for col, isolate in enumerate(self.phenotype_table.labeled_isolates):
            self.drug_vectors[:, col] = [mapping[res_val] for res_val in
                                         self.phenotype_table.isolate2label_vec_mapping[isolate]]
        self.drugs = self.phenotype_table.drugs

    def get_correlation_coefficient(self):
        '''
        :return: Return Pearson product-moment correlation coefficients
        '''
        return np.corrcoef(self.drug_vectors)

    def get_kl_divergence(self):
        '''
        :return: kl-div between drugs
        '''
        return get_kl_rows(self.drug_vectors)

    def get_isolate_profile_kldiv(self):
        '''
        :return: kl_div matrix, list of isolates on col,row
        '''
        return get_kl_rows(self.drug_vectors.T), self.phenotype_table.labeled_isolates

    def get_isolate_profile_correlation_coefficient(self):
        '''
        :return: corr matrix, list of isolates on col,row
        '''
        return np.corrcoef(self.drug_vectors.T), self.phenotype_table.labeled_isolates

    def create_kl_divergence(self, filename):
        '''
        :param filename
        to play with colormaps https://matplotlib.org/users/colormaps.html
        '''
        create_mat_plot(self.get_kl_divergence(), self.drugs, 'Drug performance Kullback–Leibler divergence',
                        'results/drug_analysis/' + filename, cmap='Purples')

    def create_correlation_coefficient(self, filename):
        '''
        :param filename
        to play with colormaps https://matplotlib.org/users/colormaps.html
        '''
        create_mat_plot(self.get_correlation_coefficient(), self.drugs,
                        'Drug performance Pearson correlation coefficients', 'results/drug_analysis/' + filename,
                         cmap='Purples')


In [13]:
PT=PhenotypeTable('/mounts/data/proj/asgari/Pseudogenomics/metadata/phenotypes.txt')

In [19]:
isolates=PT.phenotype_table[PT.phenotype_table.columns[0]]

In [29]:
phenotypes=[x for x in PT.phenotype_table.columns[1::]]

In [26]:
a=PT.phenotype_table[[x for x in PT.phenotype_table.columns[1::]]]

In [34]:
mapping={'0':0,'0.0':0,'1':1,'1.0':1}
isolates_selected=[]
for index, row in a.iterrows():
    row=[mapping[y] for y in [str(x) for x in list(row)] if y in mapping]
    if len(row)==len(phenotypes):
        print (row)
        isolates_selected.append(isolates[index])

[0, 0, 1, 0, 0]
[0, 0, 0, 0, 0]
[1, 0, 1, 1, 0]
[1, 1, 1, 0, 0]
[1, 1, 1, 1, 0]
[0, 0, 0, 1, 0]
[1, 1, 1, 1, 1]
[1, 1, 1, 1, 0]
[1, 0, 1, 1, 0]
[0, 1, 0, 1, 0]
[1, 1, 1, 1, 0]
[0, 0, 1, 1, 0]
[0, 0, 1, 1, 0]
[0, 0, 1, 0, 1]
[0, 0, 0, 1, 0]
[0, 0, 1, 1, 0]
[0, 0, 0, 0, 0]
[1, 0, 1, 1, 0]
[1, 1, 1, 1, 1]
[1, 1, 1, 1, 0]
[1, 1, 0, 1, 0]
[0, 0, 0, 1, 0]
[0, 0, 0, 1, 0]
[0, 0, 1, 1, 0]
[0, 0, 0, 1, 0]
[0, 0, 0, 0, 1]
[0, 1, 0, 0, 0]
[0, 0, 0, 1, 0]
[0, 0, 0, 0, 0]
[0, 1, 1, 1, 0]
[0, 0, 0, 0, 0]
[0, 0, 0, 0, 0]
[1, 1, 1, 0, 0]
[0, 0, 0, 1, 0]
[0, 1, 0, 1, 0]
[0, 0, 0, 1, 0]
[1, 1, 1, 0, 1]
[1, 1, 1, 1, 0]
[1, 1, 1, 0, 1]
[0, 0, 0, 0, 0]
[1, 1, 1, 0, 0]
[1, 1, 1, 0, 0]
[1, 1, 1, 0, 0]
[0, 0, 0, 1, 0]
[0, 0, 0, 0, 1]
[1, 1, 1, 0, 1]
[1, 1, 1, 1, 0]
[0, 0, 0, 0, 0]
[0, 0, 1, 1, 0]
[0, 1, 0, 0, 0]
[0, 1, 0, 1, 0]
[0, 0, 0, 0, 0]
[1, 1, 1, 1, 0]
[0, 0, 1, 1, 0]
[0, 1, 0, 1, 0]
[0, 1, 1, 1, 1]
[0, 1, 0, 1, 0]
[0, 0, 1, 1, 0]
[1, 0, 1, 1, 0]
[1, 1, 1, 1, 0]
[0, 0, 0, 0, 0]
[0, 0, 0, 0, 1]
[0, 1, 0