In [13]:
import pandas as pd
import numpy as np
import pickle as pkl
import os

import seaborn as sns
import matplotlib.pyplot as plt

In [14]:
# np.array(os.listdir())

### 绘制热力图

* concentration = count* V<sub>f</sub>* D<sub>f</sub> / (TE* V<sub>i</sub>* m)

> TE: 传输效率   
> V<sub>i</sub>: 进样体积(ml)   
> V<sub>f</sub>: 定容体积(ml)  
> D<sub>f</sub>: 稀释倍数  
> m: 称样质量(mg)

In [22]:
class HeatMapDrawer:
    
    def __init__(self, file, base, TE, Vi, Vf, Df, m):
        '''
        file:物质名的features文件名
        base:log的底数
        TE;传输效率
        Vi:进样体积(ml)
        Vf:定容体积(ml)
        Df:稀释倍数
        m:称样质量(mg)
        '''
        self.file = file
        self.df = pd.read_csv(file)
        self.base = base
        self.TE = TE
        self.Vi = Vi
        self.Vf = Vf
        self.Df = Df
        self.m = m
        self.iso_dic = dict()
        
    
    
    def get_iso_dic(self):
        '''
        读取同位素的二进制字典。
        '''
        iso_dic_pk = './existing_isotopes_dict.pk'
        with open(iso_dic_pk, 'rb') as f:
            self.iso_dic = pkl.load(f)
            
            
            
    def decode(self, emb):
        '''
        将颗粒所属频繁项的embedding转成物质组成式。
        emb_ser:颗粒所属频繁项embedding
        '''
        self.get_iso_dic()
        res = ''
        for k,v in self.iso_dic.items():
            if v==emb&v:
                if not res:
                    res = ''.join([res,k])
                else:
                    res = '-'.join([res,k])
        return res
    
    
    
    def heatmap_data(self):
        '''
        得到绘图需要的数据文件并保存为csv。
        '''
        coef = self.Vf*self.Df/(self.TE*self.Vi*self.m)
        
        iso_n = int((self.df.shape[1]-7)/2)
        iso_df = self.df.iloc[:, 1:iso_n+6]
        iso_df = iso_df.drop(['uniq_iso','uniq_iso_n','number','concentration'], axis=1)
        iso_df['uniq_iso_emb'] = iso_df['uniq_iso_emb'].astype('int64')
        iso_df['components'] = iso_df['uniq_iso_emb'].apply(lambda x:self.decode(x))
        iso_df = iso_df.iloc[:,1:]

        comp_li = iso_df['components'].unique()
        
        res = pd.DataFrame()
        for comp in comp_li:
            tmp = iso_df.groupby('components').get_group(comp)
            res = pd.concat([res, coef*pd.DataFrame(tmp.count()).T], axis=0)
        res.drop('components', axis=1)
        res = res.replace(0,1)
        res = np.log(res)/np.log(self.base)

        res['components'] = comp_li
        # res.to_csv(''.join(['heatmap_', str(self.base), '.csv']), index=None)
        return res
        
        
        
    def draw(self):
        '''
        绘制热力图。
        '''
        data = self.heatmap_data()
        data.set_index(['components'], inplace=True)
        sns.set_style('whitegrid')
        ax = sns.heatmap(data, cmap='RdBu_r')
        p = ax.get_figure()
        figname = '.'.join(['-'.join([self.file[:-4], str(self.base)]), 'png'])
        p.savefig(figname, bbox_inches='tight')

In [23]:
file_li = ['A1-features.csv','A11-features.csv', 'A13-features.csv', 'A15-features.csv',
       'A17-features.csv', 'A19-features.csv', 'A21-features.csv',
       'A23-features.csv', 'A25-features.csv', 'A27-features.csv',
       'A29-features.csv', 'A3-features.csv', 'A31-features.csv',
       'A33-features.csv', 'A35-features.csv', 'A37-features.csv',
       'A39-features.csv', 'A41-features.csv', 'A43-features.csv',
       'A45-features.csv', 'A47-features.csv', 'A49-features.csv',
       'A5-features.csv', 'A51-features.csv', 'A53-features.csv',
       'A55-features.csv', 'A57-features.csv', 'A59-features.csv',
       'A61-features.csv', 'A63-features.csv', 'A65-features.csv',
       'A67-features.csv', 'A69-features.csv', 'A7-features.csv',
       'A71-features.csv', 'A73-features.csv', 'A75-features.csv',
       'A77-features.csv', 'A79-features.csv', 'A81-features.csv',
       'A9-features.csv', 'S23-features.csv', 'S9-features.csv']

TE = 0.1
Vi = 0.05
Vf = 50
Df = 10
m = 20

In [25]:
plt.rcParams["figure.figsize"] = (30,30)

for file in file_li:
    base = 10
    drawer = HeatMapDrawer(file, base, TE, Vi, Vf, Df, m)
    drawer.draw()
    plt.clf()

<Figure size 2160x2160 with 0 Axes>