In [1]:
import pandas as pd

# 读取 CSV 文件
hybrid_result_df = pd.read_csv('./hybrid_result.csv')
miranda_result_df = pd.read_csv('./miranda_TidyResult.csv')

# 筛选 Energy 小于 -20 的行
miranda_result_df = miranda_result_df[miranda_result_df['Energy'] < -20]

# 获取所有独特的 sRNAs
sRNAs_list = hybrid_result_df['sRNAs'].unique()

# 用来保存结果的列表
result = []

# 遍历所有sRNAs，找交集
for srna in sRNAs_list:
    # 获取该sRNAs在hybrid_result_df中的Gene
    hybrid_genes = set(hybrid_result_df[hybrid_result_df['sRNAs'] == srna]['Gene'])
    
    # 获取该sRNAs在miranda_result_df中的Gene
    miranda_genes = set(miranda_result_df[miranda_result_df['sRNAs'] == srna]['Gene'])
    
    # 计算交集
    intersect_genes = hybrid_genes.intersection(miranda_genes)
    
    # 如果交集不为空，保存每个基因
    for gene in intersect_genes:
        result.append([srna, gene])

# 将结果转换为 DataFrame
result_df = pd.DataFrame(result, columns=['sRNA', 'Gene'])

result_df.to_csv('./sRNA_gene_intersection.csv', index=False)

# 显示结果（或调试）
print(result_df)

              sRNA             Gene
0       28S.rRNA.1  ENSG00000118162
1       28S.rRNA.1  ENSG00000130643
2       28S.rRNA.1  ENSG00000292214
3       28S.rRNA.1  ENSG00000237703
4       28S.rRNA.1  ENSG00000161013
...            ...              ...
1275  5.8S.rRNA.10  ENSG00000140968
1276  5.8S.rRNA.10  ENSG00000197283
1277  5.8S.rRNA.10  ENSG00000107099
1278  5.8S.rRNA.10  ENSG00000258366
1279  5.8S.rRNA.10  ENSG00000185024

[1280 rows x 2 columns]


In [2]:
import os

# 设置文件夹路径
folder_path = "./"

# 遍历文件夹中的每一个文件
for filename in os.listdir(folder_path):
    # 检查文件是否是.txt文件
    if filename.endswith(".txt"):
        # 构建完整的输入和输出文件路径
        input_file = os.path.join(folder_path, filename)
        output_file = os.path.join(folder_path, f"modified_{filename}")

        # 读取文件并在每行前加Tab
        with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'w', encoding='utf-8') as f_out:
            for line in f_in:
                f_out.write('\t' + line)

        print(f"Processed: {input_file} -> {output_file}")


Processed: ./bd_hc_opposite_gene.txt -> ./modified_bd_hc_opposite_gene.txt


In [2]:
import os

# 设置文件夹路径
folder_path = "./all"

# 遍历文件夹中的每一个文件
for filename in os.listdir(folder_path):
    # 检查文件是否是.txt文件
    if filename.endswith(".txt"):
        # 构建完整的输入和输出文件路径
        input_file = os.path.join(folder_path, filename)
        output_file = os.path.join(folder_path, f"modified_{filename}")

        # 读取文件并在每行前加Tab
        with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'w', encoding='utf-8') as f_out:
            for line in f_in:
                f_out.write('\t' + line)

        print(f"Processed: {input_file} -> {output_file}")

Processed: ./all\all_gene.txt -> ./all\modified_all_gene.txt


In [9]:
import pandas as pd

# 读取CSV文件
df = pd.read_csv("./bd_specific_mirna/bd_specific_gene.csv")

# 提取BD和HC基因列
bd_genes = set(df["bd"].dropna())  # 去除空值并转换为集合
hc_genes = set(df["hc"].dropna())  # 去除空值并转换为集合

# 找出BD特有基因
bd_unique_genes = bd_genes - hc_genes

# 将结果保存为新的DataFrame
result_df = pd.DataFrame({"BD特有基因": list(bd_unique_genes)})

# 保存结果到新的CSV文件
result_df.to_csv("./bd_specific_mirna/bd_unique_genes.txt", index=False, sep="\t")

print("BD特有基因已保存到 bd_unique_genes.csv")

BD特有基因已保存到 bd_unique_genes.csv


In [10]:
import os

# 设置文件夹路径
folder_path = "./bd_specific_mirna/"

# 遍历文件夹中的每一个文件
for filename in os.listdir(folder_path):
    # 检查文件是否是.txt文件
    if filename.endswith(".txt"):
        # 构建完整的输入和输出文件路径
        input_file = os.path.join(folder_path, filename)
        output_file = os.path.join(folder_path, f"modified_{filename}")

        # 读取文件并在每行前加Tab
        with open(input_file, 'r', encoding='utf-8') as f_in, open(output_file, 'w', encoding='utf-8') as f_out:
            for line in f_in:
                f_out.write('\t' + line)

        print(f"Processed: {input_file} -> {output_file}")

Processed: ./bd_specific_mirna/bd_unique_genes.txt -> ./bd_specific_mirna/modified_bd_unique_genes.txt


## Enrich GO

```bash
cd /mnt/e/adolescent_dep/target/mirna

ref=/mnt/d/adult_dep/DE2/MDD_hc_mirna/ning/up
for i in modified_*.txt;do
  python ../enrichgo.py ${ref}/goa_human.gaf ${ref}/go.obo ${ref}/uniprot-proteome_UP000005640_reviewed_yes.fasta ${i} ${i%%.*}_go.txt
done


cd /mnt/e/adolescent_dep/target/all

ref=/mnt/d/adult_dep/DE2/MDD_hc_mirna/ning/up
for i in modified_*.txt;do
  python ../enrichgo.py ${ref}/goa_human.gaf ${ref}/go.obo ${ref}/uniprot-proteome_UP000005640_reviewed_yes.fasta ${i} ${i%%.*}_go.txt
done
```

# ALL_GO分析

In [21]:
import os
import pandas as pd

# 设置文件夹路径
folder_path = 'D:/adult_dep/penal_target/all_go/'  # 替换成你的文件夹路径

# 获取文件夹内所有csv文件
csv_files = [f for f in os.listdir(folder_path) if f.endswith('_summary.csv')]

# 遍历每个CSV文件
for csv_file in csv_files:
    file_path = os.path.join(folder_path, csv_file)
    
    # 读取CSV文件，并指定分隔符为逗号
    df = pd.read_csv(file_path)
    
    # 检查是否包含 'target_symbol' 列
    if 'target_symbol' in df.columns:
        # 提取并去除空值，去重
        genes = df['target_symbol'].dropna().unique()
        
        # 将基因名称中的逗号替换为制表符
        genes = [gene.replace(',', '\t') for gene in genes]
        
        # 构造输出文件路径（保存在同一文件夹内）
        output_file = os.path.join(folder_path, f"{os.path.splitext(csv_file)[0]}_genes.txt")
        
        # 将基因名保存到相应的TXT文件
        with open(output_file, 'w') as f:
            for gene in sorted(genes):
                f.write(f"{gene}\n")
        
        print(f"{csv_file} 中的基因已保存到 {output_file}")
    else:
        print(f"警告: 文件 {csv_file} 中没有 'target_symbol' 列")

delong_summary.csv 中的基因已保存到 D:/adult_dep/penal_target/all_go/delong_summary_genes.txt
