## Seaborn 画图

In [None]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
# sns.set_style("whitegrid")
tips = sns.load_dataset("tips")

In [None]:
print(tips.head())

In [None]:
ax = sns.boxplot(x="size", y="tip", data=tips)
ax = sns.pointplot(x="size",y='tip',data=tips,palette="Set2",errwidth=0)

In [None]:
# 画图方法

def compare_plot(*data):
    f, axes = plt.subplots(len(set(merge_csv1['chr']))+1, 2, figsize=(7, 7), sharex=True)
    sns.despine(left=True)

    for n,da in enumerate(data):
        sns.distplot(da[da['pool']=='POOL=1']['depth'], kde=True,label='pool1-all',ax=axes[0,n])
        sns.distplot(da[da['pool']=='POOL=2']['depth'], kde=True,label='pool2-all',ax=axes[0,n])    
        axes[0,n].legend()
        for j,i in enumerate(set(da['chr'])):
            sns.distplot(da[(da['pool']=='POOL=1') & (da['chr']==i)]['depth'], 
                              kde=True,label='pool1-chr{}'.format(i),ax=axes[j+1,n])
            sns.distplot(da[(da['pool']=='POOL=2') &(da['chr']==i)]['depth'], 
                              kde=True,label='pool2-chr{}'.format(i),ax=axes[j+1,n])
            axes[j+1,n].legend()

In [None]:
merge_csv1 = pd.read_csv('./demo_data/twopoolmerge.csv')
merge_csv1.columns=['tag','loc','pool','depth']
merge_csv1['chr-temp'] = merge_csv1['loc'].str.split(':')
merge_csv1['chr'] = merge_csv1['chr-temp'].str.get(0)
# print(merge_csv[''])

merge_csv2 = pd.read_csv('./demo_data/onePool.csv')
merge_csv2.columns=['tag','loc','depth','pool']
merge_csv2['chr-temp'] = merge_csv2['loc'].str.split(':')
merge_csv2['chr'] = merge_csv2['chr-temp'].str.get(0)
print('第一列是分pool进行标准化，第二列为不分pool')
compare_plot(merge_csv1,merge_csv2)

In [None]:
ax = sns.pointplot(x="size",y='tip',data=tips,palette="Set2")
ax.plot([1,1],[-5,5],'--',color='red')
ax.plot([2,2],[-5,5],'--',color='red')
ax.plot([1,2],[4,4],'--',color='yellow')
ax.text(x=1.5,y=5,s='LDLR',color='red')

## Draw pdf or density

In [None]:
def draw_density(data,title,value):
    ax=sns.distplot(data[value],kde=True)
    ax.set_title(title)
    fig = ax.get_figure()
    fig.savefig('./demo_data/hist.png') 

In [None]:
import numpy as np
data=pd.DataFrame(np.random.normal(0,1,(1000,3)),columns=['chr','loc','depth'])
print(data.head())
draw_density(data,'depth','depth')

In [None]:
import statsmodels.api as sm
data=pd.DataFrame(np.random.normal(0,1,(10000,4)),columns=['C','CHH','CG','CHG'])
ecdf = sm.distributions.ECDF(data['C'])
x = np.linspace(min(data['C']), max(data['C']))
y1 = ecdf(x)
ecdf = sm.distributions.ECDF(data['CHH'])
y2 = ecdf(x)
print(y1,y2)

In [None]:
sns.set(style="whitegrid")

rs = np.random.RandomState(365)
values = rs.randn(365, 4).cumsum(axis=0)
dates = pd.date_range("1 1 2016", periods=365, freq="D")
data = pd.DataFrame(values, dates, columns=["A", "B", "C", "D"])
data = data.rolling(7).mean()

sns.lineplot(data=data, palette="tab10", linewidth=2.5)

## 多种数据类型

看如何合适的表示

In [None]:
mytype = np.random.randint(1,4,20)
mydepth = np.random.randint(2,200,20)
mypercentage=np.random.rand(20)
mycontext = np.random.randint(1,3,20)
print(mytype,mydepth,mypercentage,mycontext)

In [None]:
pd_data = pd.DataFrame(mytype,columns=['type'])
pd_data['depth']=mydepth
pd_data['percentage']=mypercentage
pd_data['context']=mycontext
print(pd_data)

In [None]:
pd_data['context'][pd_data['context'] == 1] = 'CpG'
pd_data['context'][pd_data['context'] == 2] = 'CHH'
pd_data['type'][pd_data['type'] == 1] = 'cgi'
pd_data['type'][pd_data['type'] == 2] = 'promoter'
pd_data['type'][pd_data['type'] == 3] = 'nothing'
print(pd_data)

In [None]:
sns.pairplot(pd_data, hue="context")

In [None]:
sns.pairplot(pd_data, hue="type")

In [None]:
sns.pairplot(pd_data,hue='depth')

In [None]:
sns.pairplot(pd_data,hue='percentage')

In [None]:
sns.pairplot(pd_data)

## 甲基化plot

不同样本甲基化不同程度差异

In [None]:
my_data = pd.read_csv('./demo_data/statOfPercentage.csv')
my_data = my_data[my_data['contig'] != 'all']

g=sns.catplot(x="sample", y="count_percent",hue="percentage" ,data=my_data)
# ax=g.get_xticklabels()
g.set_xticklabels(rotation=45)

In [None]:
my_data.groupby('contig').describe()

In [None]:
my_data2 = pd.read_csv('./demo_data/statOfPercentage_dif_region_gene.csv')
my_data2[my_data2['gene']=='.']['gene']='unknown'

regions = my_data2['region'].unique()



In [None]:
for region in regions:

#     g=sns.catplot(x="sample", y="level",data=my_data2)
    g=sns.catplot(x="sample", y="level",kind='boxen',data=my_data2[my_data2['region']==region],hue='region')
    # ax=g.get_xticklabels()
    g.set_xticklabels(rotation=45)
    
    g.savefig('./demo_data/rrbs_pics/region_{}.png'.format(region))
    # my_data2.head()

In [None]:
genenames = my_data2['gene'].unique()
for gene in genenames:
    g=sns.catplot(x="sample", y="level",row='region',hue='region',kind="violin",data=my_data2[my_data2['gene']==gene])
    # ax=g.get_xticklabels()
    g.set_xticklabels(rotation=45)
    g.savefig('./demo_data/rrbs_pics/gene_{}.png'.format(gene))