Fig1

In [None]:
#fig1a
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
from matplotlib.patches import Ellipse
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

expression_matrix = pd.read_csv('/xiongxi2/Shrimpbase/Vannamei/PLOT/tmm', sep='\t', index_col=0)
expression_matrix = expression_matrix[(expression_matrix > 5).sum(axis=1) >= 2]
tissue_info = pd.read_csv('/xiongxi2/Shrimpbase/Vannamei/PLOT/tissueinfo3.txt', sep='\t', header=None, names=['sample', 'group'])
common_samples = list(set(expression_matrix.columns) & set(tissue_info['sample']))
expression_matrix = expression_matrix[common_samples]
tissue_info = tissue_info[tissue_info['sample'].isin(common_samples)]
tissue_info = tissue_info.set_index('sample').loc[expression_matrix.columns].reset_index()

expression_matrix_log2 = np.log2(expression_matrix + 1)
gene_std = expression_matrix_log2.std(axis=1)
top_5000_genes = gene_std.sort_values(ascending=False).head(5000).index
expression_matrix_log2 = expression_matrix_log2.loc[top_5000_genes]
scaler = StandardScaler()
expression_matrix_scaled = scaler.fit_transform(expression_matrix_log2.T).T
pca = PCA(n_components=2)
pca_result = pca.fit_transform(expression_matrix_scaled.T)
explained_var = pca.explained_variance_ratio_ * 100

pca_df = pd.DataFrame(pca_result, columns=['PC1', 'PC2'])
pca_df['group'] = tissue_info['group']
mpl.rcParams['font.family'] = 'Arial'
mpl.rcParams['axes.linewidth'] = 0.8
mpl.rcParams['xtick.direction'] = 'out'
mpl.rcParams['ytick.direction'] = 'out'

def confidence_ellipse(x, y, ax, n_std=2.0, edgecolor='black', linestyle='--', lw=1.2, **kwargs):
    if x.size != y.size:
        raise ValueError("x y!")
    cov = np.cov(x, y)
    mean_x, mean_y = np.mean(x), np.mean(y)
    vals, vecs = np.linalg.eigh(cov)
    order = vals.argsort()[::-1]
    vals, vecs = vals[order], vecs[:, order]
    theta = np.degrees(np.arctan2(*vecs[:, 0][::-1]))

    width, height = 2 * n_std * np.sqrt(vals)
    ellipse = Ellipse(
        (mean_x, mean_y), width, height, angle=theta,
        edgecolor=edgecolor, facecolor='none',
        lw=lw, linestyle=linestyle, **kwargs
    )
    ax.add_patch(ellipse)
    return ellipse

plt.figure(figsize=(4, 2))
ax = plt.gca()
groups = pca_df['group'].unique()
colors = plt.cm.tab20(np.linspace(0, 1, len(groups)))
for group, color in zip(groups, colors):
    subset = pca_df[pca_df['group'] == group]
    plt.scatter(subset['PC1'], subset['PC2'], color=color, label=group, s=3, alpha=0.8)
    if group in ['Hepatopancreas', 'Intestine','Gill','Blood','Muscle']:
        confidence_ellipse(
            subset['PC1'].values, subset['PC2'].values, ax,
            n_std=2, edgecolor='black', alpha=0.9,
            linestyle='--', lw=1 )

plt.xlabel(f'PC1 ({explained_var[0]:.2f}%)', fontsize=7)
plt.ylabel(f'PC2 ({explained_var[1]:.2f}%)', fontsize=7)
plt.grid(False)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
plt.xticks(fontsize=6)
plt.yticks(fontsize=6)
plt.legend(
    title='', bbox_to_anchor=(1.02, 0.9), loc='upper left',
    fontsize=6, frameon=False, ncol=2,
    columnspacing=0.5, labelspacing=0.4, handletextpad=0.02
)

plt.tight_layout()
plt.show()


In [None]:
#fig1b
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl

df = pd.read_csv('/xiongxi2/Shrimpbase/Vannamei/08_Trinotate/tmap2.txt', sep='\t')
df = df[['class_code', 'type']]
class_code_map = {'i': 'Intronic', 'u': 'Antisense', 'x': 'Intergenic'}
type_map = {'LNC': 'lncRNA', 'COD': 'mRNA'}


df['class_code'] = df['class_code'].map(class_code_map)
df['type'] = df['type'].map(type_map)
df['RNA_type'] = df['class_code'] + ' ' + df['type']
rna_counts = df['RNA_type'].value_counts()

colors = ['#ff6666', '#66b2ff', '#66b366', '#ff8c00', '#8c8ccf', '#ff4d94']

labels = [f'{rna} ({count})' for rna, count in zip(rna_counts.index, rna_counts)]

mpl.rcParams['font.family'] = 'Arial'

x_labels = rna_counts.index
counts = rna_counts.values

plt.figure(figsize=(4, 3))
ax = plt.gca()  

bars = ax.bar(range(len(rna_counts)), rna_counts.values, color=colors[:len(rna_counts)])

for bar in bars:
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width() / 2, height + 1, str(height),
            ha='center', va='bottom', fontsize=10)

ax.set_xticks([])
legend_labels = [f'{rna}' for rna, count in zip(rna_counts.index, rna_counts.values)]
ax.legend(bars, legend_labels, title='', bbox_to_anchor=(0.45, 1), loc='upper left', frameon=False)

ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)

plt.tight_layout()
plt.show()



In [None]:
#fig1c
adata=ov.read('umap3.h5ad')
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
fig, ax = plt.subplots(1, 1, figsize=(5, 5))
ov.utils.embedding(adata,
                basis='X_umap',
                color=['major_celltype'],title=['Major Cell Types'],
                show=False,frameon='small',size=5,
                   #legend_loc='on data',
                   legend_fontsize=10,wspace=0.4,ax=ax
                  )
ax.set_title('Major Cell Types', fontsize=14, fontweight='normal')
ax.set_xlabel('X_umap1',loc='left',fontsize=9)
ax.set_ylabel('X_umap2',loc='bottom',fontsize=9)
ax.xaxis.set_label_coords(0.05,0.03)
ax.yaxis.set_label_coords(0.03, 0.05)

In [None]:
#fig1d
fig, ax = plt.subplots(1, 1, figsize=(4, 5))
ov.pl.cellproportion(adata=adata, celltype_clusters='major_celltype',
                     groupby='tissue', legend=False, ax=ax)
ax.set_ylabel('Cell Proportion', fontsize=14)
ax.set_xlabel(' ', fontsize=12)
ax.set_xticklabels(ax.get_xticklabels(), rotation=90, ha="right", fontsize=10)

In [None]:
#fig1e
adata=ov.read('hepannotationmt20.h5ad')
adata = adata[~adata.obs['major_celltype'].isin(['CTL Hemocytes'])]
mapping = {
    "LOC113817261": "CHIA",
    "LOC113817260": "CHIT1",
    "LOC113810057":"Peritrophin-1",
    "LOC113809896":"ZCCHC24",
    "LOC113806454":"HEBP",
    "LOC113809598":"HPGDS",
    "LOC113822815":"SORLA",
    "CODG63013":"LNCG63013"}
adata.var_names = adata.var_names.to_series().replace(mapping).values
mpl.rcParams['font.family'] = 'Arial'
genes_of_interest = [
        'LNCG63013','LNCG62990',
    'LNCG16491','LNCG35807',
      'CODG62976',
    'CODG18169',
    'LNCG22837',
    'CHIA', 
'CHIT1',
'Peritrophin-1', 
    'ZCCHC24',
'LOC113820201',
'HEBP',
'LOC113823098',
'LOC113819117',
'LOC113823089',
'HPGDS', 
'SORLA', 
'LOC113810775',]
sc.pl.dotplot(
    adata,
    var_names=genes_of_interest,  
    groupby='major_celltype',  
    cmap='YlGnBu',swap_axes=True,
    standard_scale='var',var_group_rotation='0',#size_title='',  
    show=False,
)