# 性特異的な遺伝子モジュールを探索する

# セットアップ

In [1]:
# Move up to top directory
import os
from pathlib import Path

print(os.getcwd())

while not Path("LICENSE").exists():
    os.chdir('../')

print(os.getcwd())

/mnt/e/Research/TSUMUGI-dev-main/notebooks/notebooks-experiments
/mnt/e/Research/TSUMUGI-dev-main


In [4]:
from pathlib import Path
from pprint import pprint
from collections import defaultdict, Counter
from itertools import combinations
import csv
import pandas as pd
import polars as pl
from matplotlib import pyplot as plt
import seaborn as sns

P = print
PP = pprint
C = Counter


# 実験

In [None]:
df_raw = pl.read_csv("data/TSUMUGI_raw_data.csv.gz")

df_raw
# 3.3 s

Gene1,Gene2,Jaccard Similarity,Number of shared phenotype,List of shared phenotypes
str,str,f64,i64,str
"""0610010K14Rik""","""0610040J01Rik""",0.0,0,"""[]"""
"""0610010K14Rik""","""1110059G10Rik""",0.0,0,"""[]"""
"""0610010K14Rik""","""1500009L16Rik""",0.0,0,"""[]"""
"""0610010K14Rik""","""1600014C10Rik""",0.0,0,"""[]"""
"""0610010K14Rik""","""1600029I14Rik""",0.0,0,"""[]"""
…,…,…,…,…
"""Zyg11b""","""Zzz3""",0.0,0,"""[]"""
"""Zyg11b""","""a""",0.0,0,"""[]"""
"""Zzef1""","""Zzz3""",0.0,0,"""[]"""
"""Zzef1""","""a""",0.05,1,"""[""abnormal kidney morphology (…"


In [9]:
df_filter = df_raw.filter((pl.col("Jaccard Similarity") > 0.1) & (pl.col("Number of shared phenotype") > 1))

df_filter

Gene1,Gene2,Jaccard Similarity,Number of shared phenotype,List of shared phenotypes
str,str,f64,i64,str
"""0610010K14Rik""","""4933427D14Rik""",0.5,3,"""[""embryonic lethality prior to…"
"""0610010K14Rik""","""Aamp""",0.5,2,"""[""embryonic lethality prior to…"
"""0610010K14Rik""","""Aasdhppt""",0.286,2,"""[""embryonic lethality prior to…"
"""0610010K14Rik""","""Aatf""",0.4,2,"""[""embryonic lethality prior to…"
"""0610010K14Rik""","""Abce1""",0.182,2,"""[""embryonic lethality prior to…"
…,…,…,…,…
"""Znhit1""","""Znhit6""",0.154,2,"""[""embryonic lethality prior to…"
"""Znhit1""","""Zwint""",0.25,2,"""[""embryonic lethality prior to…"
"""Znhit2""","""Znhit6""",0.154,2,"""[""embryonic lethality prior to…"
"""Znhit2""","""Zwint""",0.25,2,"""[""embryonic lethality prior to…"


In [13]:
df_filter = df_filter.with_columns([
    pl.col("List of shared phenotypes").str.json_decode().alias("List of shared phenotypes")
  ])

df_filter

Gene1,Gene2,Jaccard Similarity,Number of shared phenotype,List of shared phenotypes
str,str,f64,i64,list[str]
"""0610010K14Rik""","""4933427D14Rik""",0.5,3,"[""embryonic lethality prior to organogenesis (Homo, Embryo)"", ""embryonic lethality prior to tooth bud stage (Homo, Embryo)"", ""preweaning lethality, complete penetrance (Homo, Early)""]"
"""0610010K14Rik""","""Aamp""",0.5,2,"[""embryonic lethality prior to organogenesis (Homo, Embryo)"", ""preweaning lethality, complete penetrance (Homo, Early)""]"
"""0610010K14Rik""","""Aasdhppt""",0.286,2,"[""embryonic lethality prior to organogenesis (Homo, Embryo)"", ""preweaning lethality, complete penetrance (Homo, Early)""]"
"""0610010K14Rik""","""Aatf""",0.4,2,"[""embryonic lethality prior to organogenesis (Homo, Embryo)"", ""preweaning lethality, complete penetrance (Homo, Early)""]"
"""0610010K14Rik""","""Abce1""",0.182,2,"[""embryonic lethality prior to organogenesis (Homo, Embryo)"", ""preweaning lethality, complete penetrance (Homo, Early)""]"
…,…,…,…,…
"""Znhit1""","""Znhit6""",0.154,2,"[""embryonic lethality prior to organogenesis (Homo, Embryo)"", ""preweaning lethality, complete penetrance (Homo, Early)""]"
"""Znhit1""","""Zwint""",0.25,2,"[""embryonic lethality prior to organogenesis (Homo, Embryo)"", ""preweaning lethality, complete penetrance (Homo, Early)""]"
"""Znhit2""","""Znhit6""",0.154,2,"[""embryonic lethality prior to organogenesis (Homo, Embryo)"", ""preweaning lethality, complete penetrance (Homo, Early)""]"
"""Znhit2""","""Zwint""",0.25,2,"[""embryonic lethality prior to organogenesis (Homo, Embryo)"", ""preweaning lethality, complete penetrance (Homo, Early)""]"


In [12]:
len(df_filter)/ len(df_raw) * 100

1.3436685787304037

In [17]:
# Overlapeed phenotypeの中で、maleとfemaleを含むものを抽出する
overlapped_phenotype = df_similarity_with_sexual_dimorphism['Overlapped phenotype'].tolist()

import ast

# 各文字列をパースしてリストに変換
parsed_lists = [ast.literal_eval(item) for item in overlapped_phenotype]

# 全てのリストを結合
flattened_list = []
for sublist in parsed_lists:
    flattened_list.extend(sublist)

P(flattened_list[:5])

['decreased bone mineral content (Homo, Female)', 'decreased bone mineral density (Homo)', 'decreased bone mineral content (Homo, Female)', 'decreased bone mineral content (Homo, Female)', 'decreased bone mineral content (Homo, Female)']


In [18]:

phenotype_with_sexual_dimorphism = [p for p in flattened_list if "Male" in p or "Female" in p]

P(phenotype_with_sexual_dimorphism[:5])

['decreased bone mineral content (Homo, Female)', 'decreased bone mineral content (Homo, Female)', 'decreased bone mineral content (Homo, Female)', 'decreased bone mineral content (Homo, Female)', 'decreased bone mineral content (Homo, Female)']


In [20]:
Counter(phenotype_with_sexual_dimorphism).most_common(100)

[('hyperactivity (Homo, Female)', 13781),
 ('increased circulating alkaline phosphatase level (Homo, Female)', 3981),
 ('decreased bone mineral content (Homo, Female)', 3132),
 ('decreased grip strength (Homo, Female)', 2839),
 ('increased grip strength (Homo, Female)', 1835),
 ('decreased locomotor activity (Homo, Female)', 1821),
 ('hyperactivity (Hetero, Female)', 1246),
 ('decreased bone mineral density (Homo, Female)', 1208),
 ('increased total body fat amount (Homo, Female)', 917),
 ('abnormal bone structure (Homo, Female)', 809),
 ('increased fasting circulating glucose level (Homo, Female)', 682),
 ('increased circulating alkaline phosphatase level (Hetero, Female)', 612),
 ('increased bone mineral content (Homo, Female)', 572),
 ('decreased lean body mass (Homo, Female)', 525),
 ('increased grip strength (Hetero, Female)', 509),
 ('decreased grip strength (Hetero, Female)', 434),
 ('decreased exploration in new environment (Homo, Female)', 292),
 ('decreased body length (Homo,