In [1]:
import io

import pandas as pd
from Bio import Entrez, SeqIO
from mysql.connector import connection
from sshtunnel import SSHTunnelForwarder

# Укажите ваш email (обязательно для использования Entrez)
Entrez.email = "your.email@example.com"

In [2]:
with open("db_curated_server_info.txt", "r") as file:
    lines = file.readlines()

config = {}

for line in lines:
    line = line.strip()
    if line and not line.startswith("#"):
        key, value = line.split("=", 1)
        config[key] = value.strip()

server_name = config.get("server_name")
srever_port = int(config.get("srever_port"))
ssh_password = config.get("ssh_password")
ssh_username = config.get("ssh_username")
db_adress = config.get("db_adress")
db_port = int(config.get("db_port"))

In [3]:
tunnel = SSHTunnelForwarder(
    (server_name, srever_port),
    ssh_password=ssh_password,
    ssh_username=ssh_username,
    remote_bind_address=(db_adress, db_port),
)
tunnel.start()
print(tunnel.local_bind_port)

41191


In [4]:
conn = connection.MySQLConnection(
    user="db_user",
    password="db_password",
    host="localhost",
    port=tunnel.local_bind_port,
    database="db_name",
)
cursor = conn.cursor()

In [5]:
query = "SHOW TABLES;"
cursor.execute(query)
cursor.fetchall()

[('alternative_name',),
 ('histone',),
 ('histone_description',),
 ('histone_has_publication',),
 ('publication',),
 ('sequence',),
 ('sequence_has_publication',)]

In [6]:
# add_histone = (
#     "INSERT INTO histone "
#     "(id, level, taxonomic_span, taxonomic_span_id, description, parent) "
#     "VALUES (%(id)s, %(level)s, %(taxonomic_span)s, %(taxonomic_span_id)s, %(description)s, %(parent)s)"
# )
add_histone_description = (
    "INSERT INTO histone_description "
    "(summary, taxonomy, genes, evolution, expression, knock_out, function, sequence, localization, deposition, structure, interactions, disease, caveats) "
    "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
)
# add_publication = (
#     "INSERT INTO publication "
#     "(id, title, doi, author, year) "
#     "VALUES (%(id)s, %(title)s, %(doi)s, %(author)s, %(year)s)"
# )
# add_sequence = (
#     "INSERT INTO sequence "
#     "(accession, variant, gi, ncbi_gene_id, hgnc_gene_name, taxonomy_id, organism, phylum, class, taxonomy_group, info, sequence, variant_under_consideration) "
#     "VALUES (%(accession)s, %(variant)s, %(gi)s, %(ncbi_gene_id)s, %(hgnc_gene_name)s, %(taxonomy_id)s, %(organism)s, %(phylum)s, %(class)s, %(taxonomy_group)s, %(info)s, %(sequence)s, %(variant_under_consideration)s)"
# )
# add_sequence_has_publication = (
#     "INSERT INTO sequence_has_publication "
#     "(sequence_accession, publication_id) "
#     "VALUES (%s, %s)"
# )
# add_alternate_names = (
#     "INSERT INTO alternative_name "
#     "(name, taxonomy, gene, splice, histone) "
#     "VALUES (%(name)s, %(taxonomy)s, %(gene)s, %(splice)s, %(histone)s)"
# )
# add_histone_has_publication = (
#     "INSERT INTO histone_has_publication "
#     "(histone_id, publication_id) "
#     "VALUES (%s, %s)"
# )

# To Do H3-like

## <span style="color:green">Update description to H3.Y.2_(Primates)</span>

### <span style="color:green">Update summary</span>

```H3.Y.2_(Primates) is a primate-specific histone H3 variant (also known as H3.X) encoded by the gene homologous to human H3Y2 [wiedemann_identification_2010].```

## <span style="color:green">Add description to H3.Y.1_(Homo_sapiens)</span>

### <span style="color:green">Add summary</span>

```H3.Y.1_(Homo_sapiens) is a human histone H3 variant encoded by the H3Y1 gene. Human H3.Y.1, together with H3.Y.2, promotes cell growth, regulates cell cycle genes, and is implicated in primate-specific brain function [wiedemann_identification_2010]. It also facilitates sustained expression of DUX4-target genes [talbert_histone_2021, resnick_dux4-induced_2019].```

### <span style="color:green">Add function</span>

```Although the functions of H3.Y.1 remain poorly understood, it is known to promote cell growth and regulate the expression of genes involved in cell cycle control and mitosis [wiedemann_identification_2010]. The presence of H3.Y.1 (along with H3.Y.2) in hippocampal neurons suggests a potential role in primate-specific brain functions [wiedemann_identification_2010]. Furthermore, H3.Y.1 and H3.Y.2 are induced by the transcription factor DUX4 to facilitate the persistence and reactivation of DUX4 target genes following its transient expression [talbert_histone_2021, resnick_dux4-induced_2019]. Interestinglly, H3.Y has been identified as a specific marker of 8-cell-like cells (8CLCs) and is also detected in vivo within the nuclei of human 8-cell embryos at the peak of zygotic genome activation (ZGA) [taubenschmid-stowers_8c-like_2022]. During blastomere division, H3.Y shows strong association with condensed chromosomes at prophase and metaphase stages [taubenschmid-stowers_8c-like_2022]. Taubenschmid-Stowers et al. suggest that H3.Y may be necessary for large-scale genome activation during early human embryogenesis [taubenschmid-stowers_8c-like_2022].```

### <span style="color:green">Add sequence</span>

```The H3.Y.1 protein consists of 135 amino acids and shares high similarity with H3.Y.2 (89.7% identity), primarily differing by a shorter C-terminal tail [wiedemann_identification_2010]. Although its sequence resembles that of H3.3, H3.Y.1 contains specific amino acid substitutions at known post-translational modification sites of canonical H3 variants: S10A, S28R, K14Q, and K79S [wiedemann_identification_2010]. Mass spectrometry analysis confirmed that H3.Y.1 undergoes acetylation at lysines 18, 23, and 27 [wiedemann_identification_2010]. The H3.Y-specific residues, such as Lys42, Leu46, Lys53, and Gln59, are located at the nucleosomal DNA entry/exit sites and may potentially influence DNA-histone interactions and nucleosome stability [kujirai_structure_2016].```

### <span style="color:green">Add expression</span>

```H3.Y.1 and H3.Y.2 are detected at low levels in certain cell lines (e.g., osteosarcoma U2OS), as well as in a range of normal (brain, testis) and malignant (bone, breast, lung, and ovarian tumors) human tissues [wiedemann_identification_2010]. In addition, H3.Y.1 and H3.Y.2 are expressed in early embryos at the cleavage stage and in testicular tissue, consistently co-expressing with DUX4 [talbert_histone_2021, resnick_dux4-induced_2019].```

### <span style="color:green">Add localization</span>

```Endogenous H3.Y.1 is predominantly localized outside dense DAPI regions (heterochromatin), associating with less condensed, transcriptionally active euchromatin, enriched H3K4me3 [wiedemann_identification_2010]. H3.Y.1 and H3.Y.2 are incorporated into highly expressed genes, particularly those induced by DUX4, where it is enriched throughout the gene body, while in constitutively expressed genes, it is primarily localized to the transcription start site (TSS) region [resnick_dux4-induced_2019].```

### <span style="color:green">Add deposition</span>

```H3.Y.1 is deposited into chromatin via the HIRA chaperone complex, which facilitates its replication-independent incorporation into actively transcribed genomic regions [resnick_dux4-induced_2019]. Despite its high similarity to H3.3, H3.Y.1 is incapable of interacting with the DAXX/ATRX complex responsible for H3.3 deposition into heterochromatin [zink_h3y_2017].```

### <span style="color:green">Add structure</span>

```The crystal structure of the H3.Y.1 nucleosome reveals that its specific amino acid residues located at the DNA entry/exit sites result in increased DNA end flexibility compared to H3.3-containing nucleosomes, as well as reduced binding of linker histone H1 [kujirai_structure_2016]. This facilitates transcription factor access to DNA and may promote transcription activation. Kujirai et al. suggest that the heterotypic H3.Y/H3.3 nucleosome, which retains the same biochemical properties as its homotypic counterpart, is likely the predominant form in cells [kujirai_structure_2016].```

### <span style="color:green">Add knock_out</span>

```Knockdown of both genes encoding H3.Y (H3Y1 and H3Y2) using siRNA suppresses the super-induction of DUX4 target genes upon reactivation and reduces the persistence of their expression, but does not affect constitutively expressed genes [resnick_dux4-induced_2019].```

## <span style="color:green">Add description to H3.Y.2_(Homo_sapiens)</span>

### <span style="color:green">Add summary</span>

```H3.Y.2_(Homo_sapiens) is a human histone H3 variant (also known as H3.X) encoded by the H3Y2 gene. The protein sequence of H3Y2 differs from that of H3Y1 by the presence of an additional 11 amino acid residues at the C-terminal tail [ding_primate-specific_2021]. Human H3.Y.2, together with H3.Y.1, promotes cell growth, regulates cell cycle genes, and is implicated in primate-specific brain function [wiedemann_identification_2010]. It also facilitates sustained expression of DUX4-target genes [talbert_histone_2021, resnick_dux4-induced_2019].```

### <span style="color:green">Add function</span>

```The functions of H3.Y.2 remain poorly understood. However, the presence of H3.Y.2 (along with H3.Y.1) in hippocampal neurons suggests a potential role in primate-specific brain functions [wiedemann_identification_2010]. In addition, H3.Y.1 and H3.Y.2 are induced by the transcription factor DUX4 to facilitate the persistence and reactivation of DUX4 target genes following its transient expression [talbert_histone_2021, resnick_dux4-induced_2019].```

### <span style="color:green">Add sequence</span>

```The H3.Y.2 protein consists of 146 amino acids and shares high similarity with H3.Y.1 (89.7% identity), primarily differing by an additional 11 amino acid residues at the C-terminal tail with no sequence homology to other proteins [wiedemann_identification_2010]. Although its sequence resembles that of H3.3, H3.Y.2 contains specific amino acid substitutions at known post-translational modification sites of canonical H3 variants: S10A, S28R, K14Q, and K79S [wiedemann_identification_2010]. The H3.Y-specific residues, such as Lys42, Leu46, Lys53, and Gln59, are located at the nucleosomal DNA entry/exit sites and may potentially influence DNA-histone interactions and nucleosome stability [kujirai_structure_2016].```

### <span style="color:green">Add expression</span>

```H3.Y.1 and H3.Y.2 are detected at low levels in certain cell lines (e.g., osteosarcoma U2OS), as well as in a range of normal (brain, testis) and malignant (bone, breast, lung, and ovarian tumors) human tissues [wiedemann_identification_2010]. In addition, H3.Y.1 and H3.Y.2 are expressed in early embryos at the cleavage stage and in testicular tissue, consistently co-expressing with DUX4 [talbert_histone_2021, resnick_dux4-induced_2019].```

### <span style="color:green">Add localization</span>

```H3.Y.1 and H3.Y.2 are incorporated into highly expressed genes, particularly those induced by DUX4, where it is enriched throughout the gene body, while in constitutively expressed genes, it is primarily localized to the transcription start site (TSS) region [resnick_dux4-induced_2019].```

### <span style="color:green">Add deposition</span>

```H3.Y.2 is deposited into chromatin via the HIRA chaperone complex, which facilitates its replication-independent incorporation into actively transcribed genomic regions [resnick_dux4-induced_2019]. Despite its high similarity to H3.3, H3.Y.2 is incapable of interacting with the DAXX/ATRX complex responsible for H3.3 deposition into heterochromatin [zink_h3y_2017].```

### <span style="color:green">Add structure</span>

```The crystal structure of the H3.Y.1 nucleosome reveals that its specific amino acid residues located at the DNA entry/exit sites result in increased DNA end flexibility compared to H3.3-containing nucleosomes, as well as reduced binding of linker histone H1 [talbert_histone_2021, kujirai_structure_2016]. This facilitates transcription factor access to DNA and may promote transcription activation. Kujirai et al. suggest that the heterotypic H3.Y/H3.3 nucleosome, which retains the same biochemical properties as its homotypic counterpart, is likely the predominant form in cells [kujirai_structure_2016].```

### <span style="color:green">Add knock_out</span>

```Knockdown of both genes encoding H3.Y (H3Y1 and H3Y2) using siRNA suppresses the super-induction of DUX4 target genes upon reactivation and reduces the persistence of their expression, but does not affect constitutively expressed genes [resnick_dux4-induced_2019].```

## <span style="color:black">Update description to H3.Y.2_(Primates)</span>

### <span style="color:black">Update summary</span>

```H3.Y.2_(Primates) is a primate-specific histone H3 variant (also known as H3.X) encoded by the gene homologous to human H3Y2 [wiedemann_identification_2010].```

In [7]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    "WHERE h.id='H3.Y.2_(Primates)'"
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,id.1,summary,taxonomy,genes,...,knock_out,function,sequence,localization,deposition,structure,interactions,disease,caveats,relations
0,H3.Y.2_(Primates),variant,Primates,9443,253,H3.Y_(Primates),253,H3.Y.2_(Primates) is a primate-specific histon...,,,...,,,,,,,,,,


In [9]:
summary_desc = "H3.Y.2_(Primates) is a primate-specific histone H3 variant (also known as H3.X) encoded by the gene homologous to human H3Y2 [wiedemann_identification_2010]."
query = f"UPDATE histone_description SET summary='{summary_desc}' WHERE id=253"
print(query)
cursor.execute(query)

UPDATE histone_description SET summary='H3.Y.2_(Primates) is a primate-specific histone H3 variant (also known as H3.X) encoded by the gene homologous to human H3Y2 [wiedemann_identification_2010].' WHERE id=253


In [10]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    "WHERE h.id='H3.Y.2_(Primates)'"
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df["summary"].values

array(['H3.Y.2_(Primates) is a primate-specific histone H3 variant (also known as H3.X) encoded by the gene homologous to human H3Y2 [wiedemann_identification_2010].'],
      dtype=object)

In [11]:
# Make sure data is committed to the database
conn.commit()

## <span style="color:black">Add description to H3.Y.1_(Homo_sapiens)</span>

### <span style="color:black">Add summary</span>

```H3.Y.1_(Homo_sapiens) is a human histone H3 variant encoded by the H3Y1 gene. Human H3.Y.1, together with H3.Y.2, promotes cell growth, regulates cell cycle genes, and is implicated in primate-specific brain function [wiedemann_identification_2010]. It also facilitates sustained expression of DUX4-target genes [talbert_histone_2021, resnick_dux4-induced_2019].```

### <span style="color:black">Add function</span>

```Although the functions of H3.Y.1 remain poorly understood, it is known to promote cell growth and regulate the expression of genes involved in cell cycle control and mitosis [wiedemann_identification_2010]. The presence of H3.Y.1 (along with H3.Y.2) in hippocampal neurons suggests a potential role in primate-specific brain functions [wiedemann_identification_2010]. Furthermore, H3.Y.1 and H3.Y.2 are induced by the transcription factor DUX4 to facilitate the persistence and reactivation of DUX4 target genes following its transient expression [talbert_histone_2021, resnick_dux4-induced_2019]. Interestinglly, H3.Y has been identified as a specific marker of 8-cell-like cells (8CLCs) and is also detected in vivo within the nuclei of human 8-cell embryos at the peak of zygotic genome activation (ZGA) [taubenschmid-stowers_8c-like_2022]. During blastomere division, H3.Y shows strong association with condensed chromosomes at prophase and metaphase stages [taubenschmid-stowers_8c-like_2022]. Taubenschmid-Stowers et al. suggest that H3.Y may be necessary for large-scale genome activation during early human embryogenesis [taubenschmid-stowers_8c-like_2022].```

### <span style="color:black">Add sequence</span>

```The H3.Y.1 protein consists of 135 amino acids and shares high similarity with H3.Y.2 (89.7% identity), primarily differing by a shorter C-terminal tail [wiedemann_identification_2010]. Although its sequence resembles that of H3.3, H3.Y.1 contains specific amino acid substitutions at known post-translational modification sites of canonical H3 variants: S10A, S28R, K14Q, and K79S [wiedemann_identification_2010]. Mass spectrometry analysis confirmed that H3.Y.1 undergoes acetylation at lysines 18, 23, and 27 [wiedemann_identification_2010]. The H3.Y-specific residues, such as Lys42, Leu46, Lys53, and Gln59, are located at the nucleosomal DNA entry/exit sites and may potentially influence DNA-histone interactions and nucleosome stability [kujirai_structure_2016].```

### <span style="color:black">Add expression</span>

```H3.Y.1 and H3.Y.2 are detected at low levels in certain cell lines (e.g., osteosarcoma U2OS), as well as in a range of normal (brain, testis) and malignant (bone, breast, lung, and ovarian tumors) human tissues [wiedemann_identification_2010]. In addition, H3.Y.1 and H3.Y.2 are expressed in early embryos at the cleavage stage and in testicular tissue, consistently co-expressing with DUX4 [talbert_histone_2021, resnick_dux4-induced_2019].```

### <span style="color:black">Add localization</span>

```Endogenous H3.Y.1 is predominantly localized outside dense DAPI regions (heterochromatin), associating with less condensed, transcriptionally active euchromatin, enriched H3K4me3 [wiedemann_identification_2010]. H3.Y.1 and H3.Y.2 are incorporated into highly expressed genes, particularly those induced by DUX4, where it is enriched throughout the gene body, while in constitutively expressed genes, it is primarily localized to the transcription start site (TSS) region [resnick_dux4-induced_2019].```

### <span style="color:black">Add deposition</span>

```H3.Y.1 is deposited into chromatin via the HIRA chaperone complex, which facilitates its replication-independent incorporation into actively transcribed genomic regions [resnick_dux4-induced_2019]. Despite its high similarity to H3.3, H3.Y.1 is incapable of interacting with the DAXX/ATRX complex responsible for H3.3 deposition into heterochromatin [zink_h3y_2017].```

### <span style="color:black">Add structure</span>

```The crystal structure of the H3.Y.1 nucleosome reveals that its specific amino acid residues located at the DNA entry/exit sites result in increased DNA end flexibility compared to H3.3-containing nucleosomes, as well as reduced binding of linker histone H1 [kujirai_structure_2016]. This facilitates transcription factor access to DNA and may promote transcription activation. Kujirai et al. suggest that the heterotypic H3.Y/H3.3 nucleosome, which retains the same biochemical properties as its homotypic counterpart, is likely the predominant form in cells [kujirai_structure_2016].```

### <span style="color:black">Add knock_out</span>

```Knockdown of both genes encoding H3.Y (H3Y1 and H3Y2) using siRNA suppresses the super-induction of DUX4 target genes upon reactivation and reduces the persistence of their expression, but does not affect constitutively expressed genes [resnick_dux4-induced_2019].```

In [12]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    "WHERE h.id='H3.Y.1_(Homo_sapiens)'"
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,id.1,summary,taxonomy,genes,...,knock_out,function,sequence,localization,deposition,structure,interactions,disease,caveats,relations
0,H3.Y.1_(Homo_sapiens),variant,Homo sapiens,9606,172,H3.Y.1_(Primates),172,,,,...,,,,,,,,,,


In [13]:
df.to_dict(orient='records')

[{'id': 172,
  'level': 'variant',
  'taxonomic_span': 'Homo sapiens',
  'taxonomic_span_id': '9606',
  'description': 172,
  'parent': 'H3.Y.1_(Primates)',
  'summary': 'null',
  'taxonomy': 'null',
  'genes': 'null',
  'evolution': 'null',
  'expression': 'null',
  'knock_out': 'null',
  'function': 'null',
  'sequence': 'null',
  'localization': 'null',
  'deposition': 'null',
  'structure': 'null',
  'interactions': 'null',
  'disease': 'null',
  'caveats': 'null',
  'relations': None}]

In [14]:
desc_dict = {
    "summary": "H3.Y.1_(Homo_sapiens) is a human histone H3 variant encoded by the H3Y1 gene. Human H3.Y.1, together with H3.Y.2, promotes cell growth, regulates cell cycle genes, and is implicated in primate-specific brain function [wiedemann_identification_2010]. It also facilitates sustained expression of DUX4-target genes [talbert_histone_2021, resnick_dux4-induced_2019].",
    "expression": "H3.Y.1 and H3.Y.2 are detected at low levels in certain cell lines (e.g., osteosarcoma U2OS), as well as in a range of normal (brain, testis) and malignant (bone, breast, lung, and ovarian tumors) human tissues [wiedemann_identification_2010]. In addition, H3.Y.1 and H3.Y.2 are expressed in early embryos at the cleavage stage and in testicular tissue, consistently co-expressing with DUX4 [talbert_histone_2021, resnick_dux4-induced_2019].",
    "knock_out": "Knockdown of both genes encoding H3.Y (H3Y1 and H3Y2) using siRNA suppresses the super-induction of DUX4 target genes upon reactivation and reduces the persistence of their expression, but does not affect constitutively expressed genes [resnick_dux4-induced_2019].",
    "function":"Although the functions of H3.Y.1 remain poorly understood, it is known to promote cell growth and regulate the expression of genes involved in cell cycle control and mitosis [wiedemann_identification_2010]. The presence of H3.Y.1 (along with H3.Y.2) in hippocampal neurons suggests a potential role in primate-specific brain functions [wiedemann_identification_2010]. Furthermore, H3.Y.1 and H3.Y.2 are induced by the transcription factor DUX4 to facilitate the persistence and reactivation of DUX4 target genes following its transient expression [talbert_histone_2021, resnick_dux4-induced_2019]. Interestinglly, H3.Y has been identified as a specific marker of 8-cell-like cells (8CLCs) and is also detected in vivo within the nuclei of human 8-cell embryos at the peak of zygotic genome activation (ZGA) [taubenschmid-stowers_8c-like_2022]. During blastomere division, H3.Y shows strong association with condensed chromosomes at prophase and metaphase stages [taubenschmid-stowers_8c-like_2022]. Taubenschmid-Stowers et al. suggest that H3.Y may be necessary for large-scale genome activation during early human embryogenesis [taubenschmid-stowers_8c-like_2022].",
    "sequence": "The H3.Y.1 protein consists of 135 amino acids and shares high similarity with H3.Y.2 (89.7% identity), primarily differing by a shorter C-terminal tail [wiedemann_identification_2010]. Although its sequence resembles that of H3.3, H3.Y.1 contains specific amino acid substitutions at known post-translational modification sites of canonical H3 variants: S10A, S28R, K14Q, and K79S [wiedemann_identification_2010]. Mass spectrometry analysis confirmed that H3.Y.1 undergoes acetylation at lysines 18, 23, and 27 [wiedemann_identification_2010]. The H3.Y-specific residues, such as Lys42, Leu46, Lys53, and Gln59, are located at the nucleosomal DNA entry/exit sites and may potentially influence DNA-histone interactions and nucleosome stability [kujirai_structure_2016].",
    "localization": "Endogenous H3.Y.1 is predominantly localized outside dense DAPI regions (heterochromatin), associating with less condensed, transcriptionally active euchromatin, enriched H3K4me3 [wiedemann_identification_2010]. H3.Y.1 and H3.Y.2 are incorporated into highly expressed genes, particularly those induced by DUX4, where it is enriched throughout the gene body, while in constitutively expressed genes, it is primarily localized to the transcription start site (TSS) region [resnick_dux4-induced_2019].",
    "deposition": "H3.Y.1 is deposited into chromatin via the HIRA chaperone complex, which facilitates its replication-independent incorporation into actively transcribed genomic regions [resnick_dux4-induced_2019]. Despite its high similarity to H3.3, H3.Y.1 is incapable of interacting with the DAXX/ATRX complex responsible for H3.3 deposition into heterochromatin [zink_h3y_2017].",
    "structure": "The crystal structure of the H3.Y.1 nucleosome reveals that its specific amino acid residues located at the DNA entry/exit sites result in increased DNA end flexibility compared to H3.3-containing nucleosomes, as well as reduced binding of linker histone H1 [kujirai_structure_2016]. This facilitates transcription factor access to DNA and may promote transcription activation. Kujirai et al. suggest that the heterotypic H3.Y/H3.3 nucleosome, which retains the same biochemical properties as its homotypic counterpart, is likely the predominant form in cells [kujirai_structure_2016].",
}
desk_str = ', '.join([f'{k}="{v}"' for k, v in desc_dict.items()])
query = (
    f"UPDATE histone_description SET {desk_str} "
    "WHERE id = 172"
)
print(query)
cursor.execute(query)

In [15]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    "WHERE h.id='H3.Y.1_(Homo_sapiens)'"
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,id.1,summary,taxonomy,genes,...,knock_out,function,sequence,localization,deposition,structure,interactions,disease,caveats,relations
0,H3.Y.1_(Homo_sapiens),variant,Homo sapiens,9606,172,H3.Y.1_(Primates),172,H3.Y.1_(Homo_sapiens) is a human histone H3 va...,,,...,Knockdown of both genes encoding H3.Y (H3Y1 an...,Although the functions of H3.Y.1 remain poorly...,The H3.Y.1 protein consists of 135 amino acids...,Endogenous H3.Y.1 is predominantly localized o...,H3.Y.1 is deposited into chromatin via the HIR...,The crystal structure of the H3.Y.1 nucleosome...,,,,


In [16]:
df.to_dict(orient='records')

[{'id': 172,
  'level': 'variant',
  'taxonomic_span': 'Homo sapiens',
  'taxonomic_span_id': '9606',
  'description': 172,
  'parent': 'H3.Y.1_(Primates)',
  'summary': 'H3.Y.1_(Homo_sapiens) is a human histone H3 variant encoded by the H3Y1 gene. Human H3.Y.1, together with H3.Y.2, promotes cell growth, regulates cell cycle genes, and is implicated in primate-specific brain function [wiedemann_identification_2010]. It also facilitates sustained expression of DUX4-target genes [talbert_histone_2021, resnick_dux4-induced_2019].',
  'taxonomy': 'null',
  'genes': 'null',
  'evolution': 'null',
  'expression': 'H3.Y.1 and H3.Y.2 are detected at low levels in certain cell lines (e.g., osteosarcoma U2OS), as well as in a range of normal (brain, testis) and malignant (bone, breast, lung, and ovarian tumors) human tissues [wiedemann_identification_2010]. In addition, H3.Y.1 and H3.Y.2 are expressed in early embryos at the cleavage stage and in testicular tissue, consistently co-expressing wi

In [17]:
# Make sure data is committed to the database
conn.commit()

## <span style="color:black">Add description to H3.Y.2_(Homo_sapiens)</span>

### <span style="color:black">Add summary</span>

```H3.Y.2_(Homo_sapiens) is a human histone H3 variant (also known as H3.X) encoded by the H3Y2 gene. The protein sequence of H3Y2 differs from that of H3Y1 by the presence of an additional 11 amino acid residues at the C-terminal tail [ding_primate-specific_2021]. Human H3.Y.2, together with H3.Y.1, promotes cell growth, regulates cell cycle genes, and is implicated in primate-specific brain function [wiedemann_identification_2010]. It also facilitates sustained expression of DUX4-target genes [talbert_histone_2021, resnick_dux4-induced_2019].```

### <span style="color:black">Add function</span>

```The functions of H3.Y.2 remain poorly understood. However, the presence of H3.Y.2 (along with H3.Y.1) in hippocampal neurons suggests a potential role in primate-specific brain functions [wiedemann_identification_2010]. In addition, H3.Y.1 and H3.Y.2 are induced by the transcription factor DUX4 to facilitate the persistence and reactivation of DUX4 target genes following its transient expression [talbert_histone_2021, resnick_dux4-induced_2019].```

### <span style="color:black">Add sequence</span>

```The H3.Y.2 protein consists of 146 amino acids and shares high similarity with H3.Y.1 (89.7% identity), primarily differing by an additional 11 amino acid residues at the C-terminal tail with no sequence homology to other proteins [wiedemann_identification_2010]. Although its sequence resembles that of H3.3, H3.Y.2 contains specific amino acid substitutions at known post-translational modification sites of canonical H3 variants: S10A, S28R, K14Q, and K79S [wiedemann_identification_2010]. The H3.Y-specific residues, such as Lys42, Leu46, Lys53, and Gln59, are located at the nucleosomal DNA entry/exit sites and may potentially influence DNA-histone interactions and nucleosome stability [kujirai_structure_2016].```

### <span style="color:black">Add expression</span>

```H3.Y.1 and H3.Y.2 are detected at low levels in certain cell lines (e.g., osteosarcoma U2OS), as well as in a range of normal (brain, testis) and malignant (bone, breast, lung, and ovarian tumors) human tissues [wiedemann_identification_2010]. In addition, H3.Y.1 and H3.Y.2 are expressed in early embryos at the cleavage stage and in testicular tissue, consistently co-expressing with DUX4 [talbert_histone_2021, resnick_dux4-induced_2019].```

### <span style="color:black">Add localization</span>

```H3.Y.1 and H3.Y.2 are incorporated into highly expressed genes, particularly those induced by DUX4, where it is enriched throughout the gene body, while in constitutively expressed genes, it is primarily localized to the transcription start site (TSS) region [resnick_dux4-induced_2019].```

### <span style="color:black">Add deposition</span>

```H3.Y.2 is deposited into chromatin via the HIRA chaperone complex, which facilitates its replication-independent incorporation into actively transcribed genomic regions [resnick_dux4-induced_2019]. Despite its high similarity to H3.3, H3.Y.2 is incapable of interacting with the DAXX/ATRX complex responsible for H3.3 deposition into heterochromatin [zink_h3y_2017].```

### <span style="color:black">Add structure</span>

```The crystal structure of the H3.Y.1 nucleosome reveals that its specific amino acid residues located at the DNA entry/exit sites result in increased DNA end flexibility compared to H3.3-containing nucleosomes, as well as reduced binding of linker histone H1 [talbert_histone_2021, kujirai_structure_2016]. This facilitates transcription factor access to DNA and may promote transcription activation. Kujirai et al. suggest that the heterotypic H3.Y/H3.3 nucleosome, which retains the same biochemical properties as its homotypic counterpart, is likely the predominant form in cells [kujirai_structure_2016].```

### <span style="color:black">Add knock_out</span>

```Knockdown of both genes encoding H3.Y (H3Y1 and H3Y2) using siRNA suppresses the super-induction of DUX4 target genes upon reactivation and reduces the persistence of their expression, but does not affect constitutively expressed genes [resnick_dux4-induced_2019].```

In [18]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    "WHERE h.id='H3.Y.2_(Homo_sapiens)'"
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,id.1,summary,taxonomy,genes,...,knock_out,function,sequence,localization,deposition,structure,interactions,disease,caveats,relations
0,H3.Y.2_(Homo_sapiens),variant,Homo sapiens,9606,173,H3.Y.2_(Primates),173,,,,...,,,,,,,,,,


In [19]:
df.to_dict(orient='records')

[{'id': 173,
  'level': 'variant',
  'taxonomic_span': 'Homo sapiens',
  'taxonomic_span_id': '9606',
  'description': 173,
  'parent': 'H3.Y.2_(Primates)',
  'summary': 'null',
  'taxonomy': 'null',
  'genes': 'null',
  'evolution': 'null',
  'expression': 'null',
  'knock_out': 'null',
  'function': 'null',
  'sequence': 'null',
  'localization': 'null',
  'deposition': 'null',
  'structure': 'null',
  'interactions': 'null',
  'disease': 'null',
  'caveats': 'null',
  'relations': None}]

In [20]:
desc_dict = {
    "summary": "H3.Y.2_(Homo_sapiens) is a human histone H3 variant (also known as H3.X) encoded by the H3Y2 gene. The protein sequence of H3Y2 differs from that of H3Y1 by the presence of an additional 11 amino acid residues at the C-terminal tail [ding_primate-specific_2021]. Human H3.Y.2, together with H3.Y.1, promotes cell growth, regulates cell cycle genes, and is implicated in primate-specific brain function [wiedemann_identification_2010]. It also facilitates sustained expression of DUX4-target genes [talbert_histone_2021, resnick_dux4-induced_2019].",
    "expression": "H3.Y.1 and H3.Y.2 are detected at low levels in certain cell lines (e.g., osteosarcoma U2OS), as well as in a range of normal (brain, testis) and malignant (bone, breast, lung, and ovarian tumors) human tissues [wiedemann_identification_2010]. In addition, H3.Y.1 and H3.Y.2 are expressed in early embryos at the cleavage stage and in testicular tissue, consistently co-expressing with DUX4 [talbert_histone_2021, resnick_dux4-induced_2019].",
    "knock_out": "Knockdown of both genes encoding H3.Y (H3Y1 and H3Y2) using siRNA suppresses the super-induction of DUX4 target genes upon reactivation and reduces the persistence of their expression, but does not affect constitutively expressed genes [resnick_dux4-induced_2019].",
    "function": "The functions of H3.Y.2 remain poorly understood. However, the presence of H3.Y.2 (along with H3.Y.1) in hippocampal neurons suggests a potential role in primate-specific brain functions [wiedemann_identification_2010]. In addition, H3.Y.1 and H3.Y.2 are induced by the transcription factor DUX4 to facilitate the persistence and reactivation of DUX4 target genes following its transient expression [talbert_histone_2021, resnick_dux4-induced_2019].",
    "sequence": "The H3.Y.2 protein consists of 146 amino acids and shares high similarity with H3.Y.1 (89.7% identity), primarily differing by an additional 11 amino acid residues at the C-terminal tail with no sequence homology to other proteins [wiedemann_identification_2010]. Although its sequence resembles that of H3.3, H3.Y.2 contains specific amino acid substitutions at known post-translational modification sites of canonical H3 variants: S10A, S28R, K14Q, and K79S [wiedemann_identification_2010]. The H3.Y-specific residues, such as Lys42, Leu46, Lys53, and Gln59, are located at the nucleosomal DNA entry/exit sites and may potentially influence DNA-histone interactions and nucleosome stability [kujirai_structure_2016].",
    "localization": "H3.Y.1 and H3.Y.2 are incorporated into highly expressed genes, particularly those induced by DUX4, where it is enriched throughout the gene body, while in constitutively expressed genes, it is primarily localized to the transcription start site (TSS) region [resnick_dux4-induced_2019].",
    "deposition": "H3.Y.2 is deposited into chromatin via the HIRA chaperone complex, which facilitates its replication-independent incorporation into actively transcribed genomic regions [resnick_dux4-induced_2019]. Despite its high similarity to H3.3, H3.Y.2 is incapable of interacting with the DAXX/ATRX complex responsible for H3.3 deposition into heterochromatin [zink_h3y_2017].",
    "structure": "The crystal structure of the H3.Y.1 nucleosome reveals that its specific amino acid residues located at the DNA entry/exit sites result in increased DNA end flexibility compared to H3.3-containing nucleosomes, as well as reduced binding of linker histone H1 [talbert_histone_2021, kujirai_structure_2016]. This facilitates transcription factor access to DNA and may promote transcription activation. Kujirai et al. suggest that the heterotypic H3.Y/H3.3 nucleosome, which retains the same biochemical properties as its homotypic counterpart, is likely the predominant form in cells [kujirai_structure_2016].",
}
desk_str = ', '.join([f'{k}="{v}"' for k, v in desc_dict.items()])
query = (
    f"UPDATE histone_description SET {desk_str} "
    "WHERE id = 173"
)
print(query)
cursor.execute(query)

UPDATE histone_description SET summary="H3.Y.2_(Homo_sapiens) is a human histone H3 variant (also known as H3.X) encoded by the H3Y2 gene. The protein sequence of H3Y2 differs from that of H3Y1 by the presence of an additional 11 amino acid residues at the C-terminal tail [ding_primate-specific_2021]. Human H3.Y.2, together with H3.Y.1, promotes cell growth, regulates cell cycle genes, and is implicated in primate-specific brain function [wiedemann_identification_2010]. It also facilitates sustained expression of DUX4-target genes [talbert_histone_2021, resnick_dux4-induced_2019].", expression="H3.Y.1 and H3.Y.2 are detected at low levels in certain cell lines (e.g., osteosarcoma U2OS), as well as in a range of normal (brain, testis) and malignant (bone, breast, lung, and ovarian tumors) human tissues [wiedemann_identification_2010]. In addition, H3.Y.1 and H3.Y.2 are expressed in early embryos at the cleavage stage and in testicular tissue, consistently co-expressing with DUX4 [talber

In [21]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    "WHERE h.id='H3.Y.2_(Homo_sapiens)'"
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,id.1,summary,taxonomy,genes,...,knock_out,function,sequence,localization,deposition,structure,interactions,disease,caveats,relations
0,H3.Y.2_(Homo_sapiens),variant,Homo sapiens,9606,173,H3.Y.2_(Primates),173,H3.Y.2_(Homo_sapiens) is a human histone H3 va...,,,...,Knockdown of both genes encoding H3.Y (H3Y1 an...,The functions of H3.Y.2 remain poorly understo...,The H3.Y.2 protein consists of 146 amino acids...,H3.Y.1 and H3.Y.2 are incorporated into highly...,H3.Y.2 is deposited into chromatin via the HIR...,The crystal structure of the H3.Y.1 nucleosome...,,,,


In [22]:
df.to_dict(orient='records')

[{'id': 173,
  'level': 'variant',
  'taxonomic_span': 'Homo sapiens',
  'taxonomic_span_id': '9606',
  'description': 173,
  'parent': 'H3.Y.2_(Primates)',
  'summary': 'H3.Y.2_(Homo_sapiens) is a human histone H3 variant (also known as H3.X) encoded by the H3Y2 gene. The protein sequence of H3Y2 differs from that of H3Y1 by the presence of an additional 11 amino acid residues at the C-terminal tail [ding_primate-specific_2021]. Human H3.Y.2, together with H3.Y.1, promotes cell growth, regulates cell cycle genes, and is implicated in primate-specific brain function [wiedemann_identification_2010]. It also facilitates sustained expression of DUX4-target genes [talbert_histone_2021, resnick_dux4-induced_2019].',
  'taxonomy': 'null',
  'genes': 'null',
  'evolution': 'null',
  'expression': 'H3.Y.1 and H3.Y.2 are detected at low levels in certain cell lines (e.g., osteosarcoma U2OS), as well as in a range of normal (brain, testis) and malignant (bone, breast, lung, and ovarian tumors) h

In [23]:
# Make sure data is committed to the database
conn.commit()

# Close connections

In [24]:
cursor.close()
conn.close()
tunnel.stop()