In [1]:
import io

import pandas as pd
from Bio import Entrez, SeqIO
from mysql.connector import connection
from sshtunnel import SSHTunnelForwarder

# Укажите ваш email (обязательно для использования Entrez)
Entrez.email = "your.email@example.com"

In [2]:
with open("db_curated_server_info.txt", "r") as file:
    lines = file.readlines()

config = {}

for line in lines:
    line = line.strip()
    if line and not line.startswith("#"):
        key, value = line.split("=", 1)
        config[key] = value.strip()

server_name = config.get("server_name")
srever_port = int(config.get("srever_port"))
ssh_password = config.get("ssh_password")
ssh_username = config.get("ssh_username")
db_adress = config.get("db_adress")
db_port = int(config.get("db_port"))

In [3]:
tunnel = SSHTunnelForwarder(
    (server_name, srever_port),
    ssh_password=ssh_password,
    ssh_username=ssh_username,
    remote_bind_address=(db_adress, db_port),
)
tunnel.start()
print(tunnel.local_bind_port)

45747


In [4]:
conn = connection.MySQLConnection(
    user="db_user",
    password="db_password",
    host="localhost",
    port=tunnel.local_bind_port,
    database="db_name",
)
cursor = conn.cursor()

In [5]:
query = "SHOW TABLES;"
cursor.execute(query)
cursor.fetchall()

[('alternative_name',),
 ('histone',),
 ('histone_description',),
 ('histone_has_publication',),
 ('publication',),
 ('sequence',),
 ('sequence_has_publication',)]

In [6]:
add_histone = (
    "INSERT INTO histone "
    "(id, level, taxonomic_span, taxonomic_span_id, description, parent) "
    "VALUES (%(id)s, %(level)s, %(taxonomic_span)s, %(taxonomic_span_id)s, %(description)s, %(parent)s)"
)
add_histone_description = (
    "INSERT INTO histone_description "
    "(summary, taxonomy, genes, evolution, expression, knock_out, function, sequence, localization, deposition, structure, interactions, disease, caveats) "
    "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)"
)
add_publication = (
    "INSERT INTO publication "
    "(id, title, doi, author, year) "
    "VALUES (%(id)s, %(title)s, %(doi)s, %(author)s, %(year)s)"
)
# add_sequence = (
#     "INSERT INTO sequence "
#     "(accession, variant, gi, ncbi_gene_id, hgnc_gene_name, taxonomy_id, organism, phylum, class, taxonomy_group, info, sequence, variant_under_consideration) "
#     "VALUES (%(accession)s, %(variant)s, %(gi)s, %(ncbi_gene_id)s, %(hgnc_gene_name)s, %(taxonomy_id)s, %(organism)s, %(phylum)s, %(class)s, %(taxonomy_group)s, %(info)s, %(sequence)s, %(variant_under_consideration)s)"
# )
# add_sequence_has_publication = (
#     "INSERT INTO sequence_has_publication "
#     "(sequence_accession, publication_id) "
#     "VALUES (%s, %s)"
# )
# add_alternate_names = (
#     "INSERT INTO alternative_name "
#     "(name, taxonomy, gene, splice, histone) "
#     "VALUES (%(name)s, %(taxonomy)s, %(gene)s, %(splice)s, %(histone)s)"
# )
add_histone_has_publication = (
    "INSERT INTO histone_has_publication "
    "(histone_id, publication_id) "
    "VALUES (%s, %s)"
)

In [10]:
def get_taxonomy_data(record):
    import re
    import sys

    taxonomy_data = {}
    taxonomy_data["organism"] = record.annotations["organism"]
    try:
        for a in record.features[0].qualifiers["db_xref"]:
            text = re.search("(\S+):(\S+)", a).group(1)
            taxid = re.search("(\S+):(\S+)", a).group(2)
            if text == "taxon":
                print(f"Fetched taxid from NCBI {taxid}")
                taxonomy_data["taxonomy_id"] = int(taxid)
            else:
                continue
    except:
        print("!!!!!!Unable to get TAXID for this record setting it to 1")
        taxonomy_data["taxonomy_id"] = 1  # unable to identify

    lineage = dict()
    for i in range(10):
        try:
            handle = Entrez.efetch(
                id=taxonomy_data["taxonomy_id"], db="taxonomy", retmode="xml"
            )
            tax_data = Entrez.read(handle)
            lineage = {
                d["Rank"]: d["ScientificName"]
                for d in tax_data[0]["LineageEx"]
                if d["Rank"] in ["class", "phylum"]
            }
            break
        except:
            print(
                "Unexpected error: {}, Retrying, attempt {}".format(
                    sys.exc_info()[0], i
                )
            )
            if i == 9:
                print(
                    f"FATAL ERROR could not get class and phylum from NCBI after 10 attempts for taxid:{taxonomy_data['taxonomy_id']}. Will add None for class and phylum!"
                )
            else:
                continue
    taxonomy_data["phylum"] = lineage.get("phylum", None)
    taxonomy_data["class"] = lineage.get("class", None)
    if taxonomy_data["phylum"] is not None:
        taxonomy_data["phylum"] = str(taxonomy_data["phylum"])
    if taxonomy_data["class"] is not None:
        taxonomy_data["class"] = str(taxonomy_data["class"])
    return taxonomy_data

## <span style="color:black">ВАЖНО!!!</span>

Некоторые источники сообщают о том, что CENP-A нуклеосома не во всех фаазах клеточного цикла имеет октамерную структуру in vivo. Цитата из [статьи](https://pubmed.ncbi.nlm.nih.gov/23324462/) ([bui_cenp-nucleosome_2013, bui_cell-cycle-dependent_2012]):

>"Unlike H3 nucleosomes, which exist as invariant octamers, our data revealed that native CENP-A nucleosomes adopt a stable tetrameric structure for the majority of the cell cycle, but alter in shape to an octameric structure at the transition from G1/S to S-phase. In G2 phase, CENP-A nucleosomes convert back to tetramers, suggesting that they are structurally flexible."

## <span style="color:black">Дополнительно</span>

1. Синтез CENP-A происходит в G2 фазе [mcnulty_alpha_2018].
2. Экспресия CENP-A происходит в раннюю G1 фазу. Вместе с тем, активно экспрессируется и шаперон HJURP, который встраивает CENP-A в нуклеосомы.
3. HJURP важен не просто для вставки CENP-A в нуклеосому, но и для определения локализации центромеры. Это происходит благодаря взаимодействию между HJURP и плотным комплексом, который состоит из белков Mis18α, Mis18β и M18BP1. Можно сказать, что комплекс Mis18 првлекает к себе HJURP, который в свою очередь уже связан с тетрамером CENP-A:H4.
4. CENP-A-нуклеосомы стабильны в течение всего клеточного цикла и не разбираются во время репликации ДНК в S-фазе. Во время S-фазы гистоны CENP-A перераспределяются (для наследования) полуконсервативно: после репликации каждая дочерняя хромосома получает примерно половину родительских CENP-A-нуклеосом [mcnulty_alpha_2018].

# To Do cenH3

## <span style="color:green">Update summary of cenH3_(Insecta)</span>

```cenH3_(Insecta) is a centromere-specific histone variant in insects, functionally analogous to CENP-A (Centromere Protein A) in other eukaryotes. It is essential for centromere specification and proper chromosome segregation during cell division. Notably, cenH3 is absent in four holocentric insect clades, suggesting alternative centromere organization mechanisms in these lineages [talbert_histone_2021, senaratne_formation_2021, sridhar_kinetochore_2022, cortes-silva_cenh3-independent_2020]. Some insect families, such as Drosophilidae and Culicidae, possess multiple cenH3 paralogs with evidence of functional specialization. For more overview of these families, see the cenH3_(Drosophilidae) and cenH3_(Culicidae) classes.```

## <span style="color:green">Update description of cenH3_(Drosophilidae)</span>

### <span style="color:green">Add deposition</span>

```In Drosophila melanogaster, the deposition of cenH3 into nucleosomes is mediated by the chaperone CAL1 in a DNA sequence-independent manner. CAL1 facilitates nucleosome formation through direct interaction with cenH3-H4 histones and additionally recruits the FACT complex (comprising Dre4 and SSRP1 subunits) along with RNA polymerase II (RNAPII) [chen_establishment_2015, chen_cal1_2014]. FACT destabilizes H3-containing nucleosomes, enabling RNAPII-mediated transcription of DNA, a prerequisite for the replacement of H3 with CENP-A/H4 [chen_establishment_2015]. In the absence of FACT, transcription is abolished, preventing CENP-A incorporation. Importantly, CAL1 mediates the assembly of octameric nucleosomes with left-handed DNA wrapping, analogous to canonical nucleosomes [chen_cal1_2014].```

### <span style="color:green">Add function</span>

```Similar to other animals, Drosophila cenH3 functions primarily in kinetochore assembly through CENP-C recruitment. However, in contrast to vertebrates where CENP-A directly binds CENP-C, Drosophila depends on the CAL1 chaperone to bridge cenH3-CENP-C interaction for proper kinetochore formation and chromosome segregation [chen_cal1_2014].```

## <span style="color:green">Update description of cenH3</span>

### <span style="color:green">Update function</span>

```Despite the high diversity of cenH3 proteins, they represent a key factor involved in centromere identity and recruiting constitutive centromere-associated network (CCAN), ensuring accurate chromosome segregation, across a wide range of organisms. For example, the deep evolutionary conservation of cenH3 histone function in plants demonstarted by the ability of Zea mays cenH3 to functionally replace its counterpart in Arabidopsis thaliana [maheshwari_naturally_2015]. However, certain functional differences of cenH3 in chromosome segregation are observed across different species [wong_epigenetic_2020, steiner_diversity_2015]. Distinction is mainly related to cenH3 distribution on the chromosome, timing of cenH3 replenishment, heterochromatin dependence and chaperone specificity. In most mammals (including humans), cenH3 is incorporated into nucleosomes positioned within satellite repeat regions. In contrast, yeast species exhibit both regional (fission yeast) and point (budding yeast) centromeres, the latter consisting of a single cenH3 nucleosome complex assembled on specific DNA sequences. Nematode holocentromeres are characterized by the distribution of cenH3 along the entire chromosome length. Key factors of centromere identity in speicies with regional centromere (e.g., vertebrates) are the epigenetic context and transcription of centromeric DNA, whereas centromeres in budding yeast (S. cerevisiae) are determined by specific DNA sequences [wong_epigenetic_2020, hara_critical_2017, hori_histone_2014]. Moreover, studies in human and chicken cells have demonstrated the existence of natural and artificially created functional neocentromeres lacking alpha-satellite DNA [hara_critical_2017]. At the same time, despite the possibility of epigenetic drift, cenH3 positioning in maize centromeres is stable over generations and primarily governed by genetic changes [gent_stable_2015].```

### <span style="color:green">Add deposition</span>

```Unlike canonical histones, cenH3 deposition is replication-independent. In most speices, cenH3 synthesis and loading occurs during late mitosis and G1 phase of the cell cycle, mediated by specialized chaperones (HJURP in humans, Scm3 in budding and fission yeast, and CAL1 in Drosophila). However, the timing of deposition differs in some organisms. For instance, in budding yeast, cenH3 replenishment occurs during S-phase [wong_epigenetic_2020, pearson_stable_2004]. Notably, both budding yeast and nematodes (C. elegans) exhibit complete turnover of cenH3 each cell cycle [wong_epigenetic_2020]. The assembly and replenishment of cenH3 nucleosomes involves chromatin remodeling, resulting in the displacement of H3.3 histones and active stabilization by specific proteins (e.g., HJURP, CENP-C, CENP-B in humans) [dunleavy_h33_2011, wong_epigenetic_2020, black_epigenetic_2011]. It is possible that new cenH3 deposition doesn't always occur at the same centromeric position during every cell cycle [dunleavy_h33_2011]. Deposition of cenH3 into centromeric nucleosomes depends not only on a specific chaperone but also on numerous other factors. The study using DT40 (chicken) and HeLa S3 (human) cell lines demonstrated that chromatin-remodeling complexes FACT and CHD1 play important roles in the proper recruitment of cenH3 [okada_cenp-hcontaining_2009]. However, this observation is likely not universal across all organisms. For example, in Drosophila, cenH3 incorporation occurs independently of CHD1, yet still involves direct participation of FACT [podhraski_cenh3cid_2010, chen_establishment_2015, chen_cal1_2014].```

## <span style="color:green">Update description of cenH3_(Animals)</span>

### <span style="color:green">Update summary</span>

```cenH3_(Animals) is a centromere-specific histone variant in animals (Metazoa), often called CENP-A (Centromere Protein A) in mammals, and an important component of active centromere required for chromosome segregation.```

### <span style="color:green">Add interactions</span>

```cenH3 nucleosomes stabilize the inner kinetochore known as the CCAN complex (Constitutive Centromere-Associated Network). The initiation of kinetochore assembly involves the interaction between CENP-A and CENP-C [watanabe_cdk1-mediated_2019, walstein_assembly_2021]. CDK1-mediated phosphorylation of CENP-C enhances its binding to CENP-A nucleosomes during mitosis, as shown in both chicken and human cells [watanabe_cdk1-mediated_2019]. However, the functional importance of this interaction varies: while it is nonessential for viability in chicken cells, it is crucial for proper CENP-C kinetochore localization and long-term survival in human cells [watanabe_cdk1-mediated_2019]. It was demonstrated that binding of the CENP-C protein stiffens the cenH3 nucleosome, decreasing its flexibility and stabilizing its conformation, which promotes chromatin condensation and restricts access for transcriptional machinery [melters_intrinsic_2019]. CENP-N specifically recognizes CENP-A in the centromeric nucleosome by binding to its L1 loop through key residues (e.g., E3, T4, E7 in humans) and stabilizes the interaction via electrostatic contacts with nucleosomal DNA [chittori_structural_2018]. This interaction has co-evolved across species to ensure accurate kinetochore assembly and epigenetic centromere inheritance [chittori_structural_2018].```

### <span style="color:green">Update genes</span>

```The nematodes Caenorhabditis elegans and C. remanei possess two genes encoding cenH3: HCP-3 (also known as CeCENP-A) and CPAR-1 [monen_separase_2015, monen_differential_2005]. While HCP-3 functions as the predominant centromeric histone - being highly expressed and essential for precise chromosome segregation during mitotic divisions - CPAR-1 displays distinct characteristics: it exhibits lower expression levels but shows specific enrichment on meiotic chromosomes. Importantly, CPAR-1 undergoes separase-mediated proteolytic cleavage at the meiosis I metaphase-to-anaphase transition, implying a specialized, though not yet fully characterized, role in meiotic processes [monen_separase_2015].```

### <span style="color:green">Update function</span>

```The active centromeric chromatin, enriched with cenH3, defines the region of kinetochore interaction for spindle formation. However, certain functional differences in chromosome segregation are observed across different species. For example, in vertebrates, CENP-A is concentrated in a compact centromeric region, whereas in nematodes, HCP-3 is distributed diffusely, corresponding to a holocentric organization [hara_critical_2017]. Notably, in certain holocentric organisms, cenH3 appears dispensable for meiotic chromosome segregation while remaining essential for mitosis. As demonstrated in Caenorhabditis elegans, cenH3 loading is specifically eliminated following meiosis I, and RNAi-mediated depletion of CENP-A during meiosis fails to disrupt proper chromosome segregation [monen_differential_2005]. This stands in striking contrast to mitotic divisions, where cenH3 is absolutely required for kinetochore assembly and faithful chromosome segregation. Moreover, C. elegans and C. remanei possess a second cenH3 histone gene that likely performs specialized functions distinct from canonical cenH3 roles [monen_separase_2015]. For more detailed information see the "gene" section.```

## <span style="color:green">Update description of cenH3_(Homo_sapiens)</span>

### <span style="color:green">Update summary cenH3_(Homo_sapiens)</span>

### <span style="color:green">Update interactions of cenH3_(Homo_sapiens)</span>

```CENP-A nucleosomes stabilize the inner kinetochore known as the CCAN complex (Constitutive Centromere-Associated Network) through direct interaction with CENP-C and CENP-N proteins of this complex [cao_constitutive_2018, pesenti_structure_2022, sridhar_kinetochore_2022, xu_gross_2023]. Human CENP-C has been demonstrated to bind with the CENP-A C-terminal hydrophobic tail, the acidic patch of H2A-H2B and histone H4, thereby facilitating its engagement with all four histone subunits present on the nucleosome surface [allu_structure_2019]. The binding of CENP-C to CENP-A is critical for long-term viability in human RPE-1 cells [watanabe_cdk1-mediated_2019]. This interaction is stabilized by the process of CENP-C phosphorylation mediated by CDK1, which strengthens the complex's structure through the occurrence of an intramolecular interaction [watanabe_cdk1-mediated_2019, walstein_assembly_2021, ariyoshi_cryoem_2021]. Human CENP-N has been observed to bind to CENP-A nucleosomes through a direct recognition of the L1 loop and RG loop of CENP-A [chittori_structural_2018, tian_molecular_2018, ariyoshi_cryoem_2021]. However, cryo-EM studies have revealed that CENP-C and CENP-N bind to a single CENP-A nucleosome in a non-simultaneous manner, thereby demonstrating an asymmetric structure where CENP-C and CENP-N bind to opposite sides of the nucleosome [ariyoshi_cryoem_2021]. Furthermore, in 94% of cases, phosphorylated CENP-C exhibits exclusive binding to the RG-loop of CENP-A, suggesting that this particular interaction is predominant [ariyoshi_cryoem_2021]. In addition, in vitro reconstruction of the human CENP-C protein showed that CENP-C can bind to two centromeric nucleosomes simultaneously [walstein_assembly_2021]. Nevertheless, interactions with CENP-C or CENP-N do not determine the stability of CENP-A nucleosomes in chromatin [cao_constitutive_2018].```

### <span style="color:green">Update function of cenH3_(Homo_sapiens)</span>

```While CENP-A is a crucial centromeric marker, the formation of a functional kinetochore and proper chromosome segregation requires an epigenetic context, including the targeting of the chaperone HJURP and histone PTMs [hara_critical_2017, hori_histone_2014]. Human CENP-A plays an important role in stabilization and retention inner kinetochore during G1 phase of cell cycle [pesenti_structure_2022, hoffmann_cenp-is_2016]. However, mitosis can proceed without CENP-A as long as the CENP-B protein remains stably bound to centromeric sequences, facilitating the assembly of the inner kinetochore [hoffmann_cenp-is_2016].```

### <span style="color:green">Add to deposition of cenH3_(Homo_sapiens)</span>

```Notably, in contrast to maintenance, de novo centromere formation (e.g., neocentromere establishment) depends on HJURP-mediated recruitment of CENP-C [tachiwana_hjurp_2015]. The deposition of CENP-A depends not only on protein factors but also on the characteristics of alpha-satellite DNA. CENP-B boxes stabilize CENP-A nucleosomes through interaction with the CENP-B protein [mcnulty_alpha_2018]. During early G1 phase, RNA polymerase II-mediated transcription of alpha-satellite DNA generates non-coding RNAs that facilitate recruitment of the CENP-A/HJURP complex [mcnulty_alpha_2018]. Furthermore, post-translational histone modifications (H3K4me2, H3K36me2, H3K9ac, H4K16ac) establish a permissive chromatin environment and prevent heterochromatin spreading [mcnulty_alpha_2018].```

### <span style="color:green">Add structure to cenH3_(Homo_sapiens)</span>

```CENP-A confers enhanced flexibility and elasticity to nucleosomes, influencing centromeric chromatin organization and ensuring accurate chromosome segregation during mitosis [fedulova_molecular_2024, pitman_minimal_2020, melters_intrinsic_2019, tachiwana_crystal_2011]. Partial nucleosome unwrapping in CENP-A-containing nucleosomes is promoted by its shortened αN-helix [roulland_flexible_2016]. Cryo-EM analysis of human CENP-A nucleosomes revealed asymmetric DNA end flexibility, with the right DNA end being highly dynamic and partially detached from the histone core while the left end remained stably bound, a sequence-dependent behavior confirmed by molecular dynamics simulations [kono_free_2019, boopathi_phase-plate_2020]. Notably, this intrinsic asymmetry can be overcome by antibody stabilization. The single-chain antibody fragment PL2-6 symmetrically stabilizes both DNA ends through allosteric interactions, despite the inherent sequence-dependent asymmetry [zhou_atomic_2019, dogan_cenp-nucleosome_2021]. Molecular dynamics simulations revealed that CENP-A-containing tetramers exhibit enhanced stability but distinct dynamics compared to canonical octameric structures, explaining the unusual properties of CENP-A nucleosomes [zhao_oligomerization_2019].```

### <span style="color:green">Add sequence to cenH3_(Homo_sapiens)</span>

```The conserved residue K124 locaded in histone fold of CENP-A plays a crucial role in regulating centromeric function. During the cell cycle, K124 undergoes cyclic modifications [bui_internal_2017]. In G1/S phase, K124ac compacts the nucleosome, interfering with CENP-C binding (crucial for kinetochore assembly) while simultaneously promoting chromatin decondensation for centromere replication [bui_internal_2017]. During S phase, K124me stabilizes nucleosomes. Furthermore, K124 mutations disrupt both mitosis and replication, demonstrating its critical role in centromere epigenetic regulation [bui_internal_2017].```



## <span style="color:black">Update summary of cenH3_(Insecta)</span>

```cenH3_(Insecta) is a centromere-specific histone variant in insects, functionally analogous to CENP-A (Centromere Protein A) in other eukaryotes. It is essential for centromere specification and proper chromosome segregation during cell division. Notably, cenH3 is absent in four holocentric insect clades, suggesting alternative centromere organization mechanisms in these lineages [talbert_histone_2021, senaratne_formation_2021, sridhar_kinetochore_2022, cortes-silva_cenh3-independent_2020]. Some insect families, such as Drosophilidae and Culicidae, possess multiple cenH3 paralogs with evidence of functional specialization. For more overview of these families, see the cenH3_(Drosophilidae) and cenH3_(Culicidae) classes.```

In [8]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    "WHERE h.id='cenH3_(Insecta)'"
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,id.1,summary,taxonomy,genes,...,knock_out,function,sequence,localization,deposition,structure,interactions,disease,caveats,relations
0,cenH3_(Insecta),variant,Insecta,50557,237,cenH3_(Animals),237,cenH3_(Insecta) is a centromere-specific histo...,,"In some insect lineages, cenH3 has undergone g...",...,,,,,,,,,,


In [10]:
summary_desc = "cenH3_(Insecta) is a centromere-specific histone variant in insects, functionally analogous to CENP-A (Centromere Protein A) in other eukaryotes. It is essential for centromere specification and proper chromosome segregation during cell division. Notably, cenH3 is absent in four holocentric insect clades, suggesting alternative centromere organization mechanisms in these lineages [talbert_histone_2021, senaratne_formation_2021, sridhar_kinetochore_2022, cortes-silva_cenh3-independent_2020]. Some insect families, such as Drosophilidae and Culicidae, possess multiple cenH3 paralogs with evidence of functional specialization. For more overview of these families, see the cenH3_(Drosophilidae) and cenH3_(Culicidae) classes."
query = f"UPDATE histone_description SET summary='{summary_desc}' WHERE id=237"
print(query)
cursor.execute(query)

UPDATE histone_description SET summary='cenH3_(Insecta) is a centromere-specific histone variant in insects, functionally analogous to CENP-A (Centromere Protein A) in other eukaryotes. It is essential for centromere specification and proper chromosome segregation during cell division. Notably, cenH3 is absent in four holocentric insect clades, suggesting alternative centromere organization mechanisms in these lineages [talbert_histone_2021, senaratne_formation_2021, sridhar_kinetochore_2022, cortes-silva_cenh3-independent_2020]. Some insect families, such as Drosophilidae and Culicidae, possess multiple cenH3 paralogs with evidence of functional specialization. For more overview of these families, see the cenH3_(Drosophilidae) and cenH3_(Culicidae) classes.' WHERE id=237


In [12]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    "WHERE h.id='cenH3_(Insecta)'"
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df["summary"].values[0]

'cenH3_(Insecta) is a centromere-specific histone variant in insects, functionally analogous to CENP-A (Centromere Protein A) in other eukaryotes. It is essential for centromere specification and proper chromosome segregation during cell division. Notably, cenH3 is absent in four holocentric insect clades, suggesting alternative centromere organization mechanisms in these lineages [talbert_histone_2021, senaratne_formation_2021, sridhar_kinetochore_2022, cortes-silva_cenh3-independent_2020]. Some insect families, such as Drosophilidae and Culicidae, possess multiple cenH3 paralogs with evidence of functional specialization. For more overview of these families, see the cenH3_(Drosophilidae) and cenH3_(Culicidae) classes.'

In [13]:
# Make sure data is committed to the database
conn.commit()

## <span style="color:black">Update description of cenH3_(Drosophilidae)</span>

### <span style="color:black">Add deposition</span>

```In Drosophila melanogaster, the deposition of cenH3 into nucleosomes is mediated by the chaperone CAL1 in a DNA sequence-independent manner. CAL1 facilitates nucleosome formation through direct interaction with cenH3-H4 histones and additionally recruits the FACT complex (comprising Dre4 and SSRP1 subunits) along with RNA polymerase II (RNAPII) [chen_establishment_2015, chen_cal1_2014]. FACT destabilizes H3-containing nucleosomes, enabling RNAPII-mediated transcription of DNA, a prerequisite for the replacement of H3 with CENP-A/H4 [chen_establishment_2015]. In the absence of FACT, transcription is abolished, preventing CENP-A incorporation. Importantly, CAL1 mediates the assembly of octameric nucleosomes with left-handed DNA wrapping, analogous to canonical nucleosomes [chen_cal1_2014].```

### <span style="color:black">Add function</span>

```Similar to other animals, Drosophila cenH3 functions primarily in kinetochore assembly through CENP-C recruitment. However, in contrast to vertebrates where CENP-A directly binds CENP-C, Drosophila depends on the CAL1 chaperone to bridge cenH3-CENP-C interaction for proper kinetochore formation and chromosome segregation [chen_cal1_2014].```

In [14]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    "WHERE h.id='cenH3_(Drosophilidae)'"
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,id.1,summary,taxonomy,genes,...,knock_out,function,sequence,localization,deposition,structure,interactions,disease,caveats,relations
0,cenH3_(Drosophilidae),variant,Drosophilidae,7214,238,cenH3_(Insecta),238,cenH3_(Drosophilidae) is a centromere-specific...,,"In Drosophila, six cenH3 paralogs (Cid1–Cid6) ...",...,,,,,,,,,,


In [16]:
deposition_desc = "In Drosophila melanogaster, the deposition of cenH3 into nucleosomes is mediated by the chaperone CAL1 in a DNA sequence-independent manner. CAL1 facilitates nucleosome formation through direct interaction with cenH3-H4 histones and additionally recruits the FACT complex (comprising Dre4 and SSRP1 subunits) along with RNA polymerase II (RNAPII) [chen_establishment_2015, chen_cal1_2014]. FACT destabilizes H3-containing nucleosomes, enabling RNAPII-mediated transcription of DNA, a prerequisite for the replacement of H3 with CENP-A/H4 [chen_establishment_2015]. In the absence of FACT, transcription is abolished, preventing CENP-A incorporation. Importantly, CAL1 mediates the assembly of octameric nucleosomes with left-handed DNA wrapping, analogous to canonical nucleosomes [chen_cal1_2014]."
function_desc = "Similar to other animals, Drosophila cenH3 functions primarily in kinetochore assembly through CENP-C recruitment. However, in contrast to vertebrates where CENP-A directly binds CENP-C, Drosophila depends on the CAL1 chaperone to bridge cenH3-CENP-C interaction for proper kinetochore formation and chromosome segregation [chen_cal1_2014]."
query = f"UPDATE histone_description SET deposition='{deposition_desc}', function='{function_desc}' WHERE id=238"
print(query)
cursor.execute(query)

UPDATE histone_description SET deposition='In Drosophila melanogaster, the deposition of cenH3 into nucleosomes is mediated by the chaperone CAL1 in a DNA sequence-independent manner. CAL1 facilitates nucleosome formation through direct interaction with cenH3-H4 histones and additionally recruits the FACT complex (comprising Dre4 and SSRP1 subunits) along with RNA polymerase II (RNAPII) [chen_establishment_2015, chen_cal1_2014]. FACT destabilizes H3-containing nucleosomes, enabling RNAPII-mediated transcription of DNA, a prerequisite for the replacement of H3 with CENP-A/H4 [chen_establishment_2015]. In the absence of FACT, transcription is abolished, preventing CENP-A incorporation. Importantly, CAL1 mediates the assembly of octameric nucleosomes with left-handed DNA wrapping, analogous to canonical nucleosomes [chen_cal1_2014].', function='Similar to other animals, Drosophila cenH3 functions primarily in kinetochore assembly through CENP-C recruitment. However, in contrast to vertebr

In [19]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    "WHERE h.id='cenH3_(Drosophilidae)'"
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df[["deposition", "function"]]

Unnamed: 0,deposition,function
0,"In Drosophila melanogaster, the deposition of ...","Similar to other animals, Drosophila cenH3 fun..."


In [20]:
# Make sure data is committed to the database
conn.commit()

## <span style="color:black">Update description of cenH3</span>

### <span style="color:black">Update function</span>

```Despite the high diversity of cenH3 proteins, they represent a key factor involved in centromere identity and recruiting constitutive centromere-associated network (CCAN), ensuring accurate chromosome segregation, across a wide range of organisms. For example, the deep evolutionary conservation of cenH3 histone function in plants demonstarted by the ability of Zea mays cenH3 to functionally replace its counterpart in Arabidopsis thaliana [maheshwari_naturally_2015]. However, certain functional differences of cenH3 in chromosome segregation are observed across different species [wong_epigenetic_2020, steiner_diversity_2015]. Distinction is mainly related to cenH3 distribution on the chromosome, timing of cenH3 replenishment, heterochromatin dependence and chaperone specificity. In most mammals (including humans), cenH3 is incorporated into nucleosomes positioned within satellite repeat regions. In contrast, yeast species exhibit both regional (fission yeast) and point (budding yeast) centromeres, the latter consisting of a single cenH3 nucleosome complex assembled on specific DNA sequences. Nematode holocentromeres are characterized by the distribution of cenH3 along the entire chromosome length. Key factors of centromere identity in speicies with regional centromere (e.g., vertebrates) are the epigenetic context and transcription of centromeric DNA, whereas centromeres in budding yeast (S. cerevisiae) are determined by specific DNA sequences [wong_epigenetic_2020, hara_critical_2017, hori_histone_2014]. Moreover, studies in human and chicken cells have demonstrated the existence of natural and artificially created functional neocentromeres lacking alpha-satellite DNA [hara_critical_2017]. At the same time, despite the possibility of epigenetic drift, cenH3 positioning in maize centromeres is stable over generations and primarily governed by genetic changes [gent_stable_2015].```

### <span style="color:black">Add deposition</span>

```Unlike canonical histones, cenH3 deposition is replication-independent. In most speices, cenH3 synthesis and loading occurs during late mitosis and G1 phase of the cell cycle, mediated by specialized chaperones (HJURP in humans, Scm3 in budding and fission yeast, and CAL1 in Drosophila). However, the timing of deposition differs in some organisms. For instance, in budding yeast, cenH3 replenishment occurs during S-phase [wong_epigenetic_2020, pearson_stable_2004]. Notably, both budding yeast and nematodes (C. elegans) exhibit complete turnover of cenH3 each cell cycle [wong_epigenetic_2020]. The assembly and replenishment of cenH3 nucleosomes involves chromatin remodeling, resulting in the displacement of H3.3 histones and active stabilization by specific proteins (e.g., HJURP, CENP-C, CENP-B in humans) [dunleavy_h33_2011, wong_epigenetic_2020, black_epigenetic_2011]. It is possible that new cenH3 deposition doesn't always occur at the same centromeric position during every cell cycle [dunleavy_h33_2011]. Deposition of cenH3 into centromeric nucleosomes depends not only on a specific chaperone but also on numerous other factors. The study using DT40 (chicken) and HeLa S3 (human) cell lines demonstrated that chromatin-remodeling complexes FACT and CHD1 play important roles in the proper recruitment of cenH3 [okada_cenp-hcontaining_2009]. However, this observation is likely not universal across all organisms. For example, in Drosophila, cenH3 incorporation occurs independently of CHD1, yet still involves direct participation of FACT [podhraski_cenh3cid_2010, chen_establishment_2015, chen_cal1_2014].```

In [23]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    "WHERE h.id='cenH3'"
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,id.1,summary,taxonomy,genes,...,knock_out,function,sequence,localization,deposition,structure,interactions,disease,caveats,relations
0,cenH3,variant_group,Eukaryotes,2759,96,H3,96,cenH3 is a centromere-specific histone variant...,"cenH3 present in most eukaryotes. However, it ...",,...,,"Despite the high diversity of cenH3 proteins, ...",cenH3 has an extended L1-loop and its N-termin...,,,,,CenH3 plays pro-viral and restriction role in ...,,


In [26]:
deposition_desc = "Unlike canonical histones, cenH3 deposition is replication-independent. In most speices, cenH3 synthesis and loading occurs during late mitosis and G1 phase of the cell cycle, mediated by specialized chaperones (HJURP in humans, Scm3 in budding and fission yeast, and CAL1 in Drosophila). However, the timing of deposition differs in some organisms. For instance, in budding yeast, cenH3 replenishment occurs during S-phase [wong_epigenetic_2020, pearson_stable_2004]. Notably, both budding yeast and nematodes (C. elegans) exhibit complete turnover of cenH3 each cell cycle [wong_epigenetic_2020]. The assembly and replenishment of cenH3 nucleosomes involves chromatin remodeling, resulting in the displacement of H3.3 histones and active stabilization by specific proteins (e.g., HJURP, CENP-C, CENP-B in humans) [dunleavy_h33_2011, wong_epigenetic_2020, black_epigenetic_2011]. It is possible that new cenH3 deposition doesn't always occur at the same centromeric position during every cell cycle [dunleavy_h33_2011]. Deposition of cenH3 into centromeric nucleosomes depends not only on a specific chaperone but also on numerous other factors. The study using DT40 (chicken) and HeLa S3 (human) cell lines demonstrated that chromatin-remodeling complexes FACT and CHD1 play important roles in the proper recruitment of cenH3 [okada_cenp-hcontaining_2009]. However, this observation is likely not universal across all organisms. For example, in Drosophila, cenH3 incorporation occurs independently of CHD1, yet still involves direct participation of FACT [podhraski_cenh3cid_2010, chen_establishment_2015, chen_cal1_2014]."
function_desc = "Despite the high diversity of cenH3 proteins, they represent a key factor involved in centromere identity and recruiting constitutive centromere-associated network (CCAN), ensuring accurate chromosome segregation, across a wide range of organisms. For example, the deep evolutionary conservation of cenH3 histone function in plants demonstarted by the ability of Zea mays cenH3 to functionally replace its counterpart in Arabidopsis thaliana [maheshwari_naturally_2015]. However, certain functional differences of cenH3 in chromosome segregation are observed across different species [wong_epigenetic_2020, steiner_diversity_2015]. Distinction is mainly related to cenH3 distribution on the chromosome, timing of cenH3 replenishment, heterochromatin dependence and chaperone specificity. In most mammals (including humans), cenH3 is incorporated into nucleosomes positioned within satellite repeat regions. In contrast, yeast species exhibit both regional (fission yeast) and point (budding yeast) centromeres, the latter consisting of a single cenH3 nucleosome complex assembled on specific DNA sequences. Nematode holocentromeres are characterized by the distribution of cenH3 along the entire chromosome length. Key factors of centromere identity in speicies with regional centromere (e.g., vertebrates) are the epigenetic context and transcription of centromeric DNA, whereas centromeres in budding yeast (S. cerevisiae) are determined by specific DNA sequences [wong_epigenetic_2020, hara_critical_2017, hori_histone_2014]. Moreover, studies in human and chicken cells have demonstrated the existence of natural and artificially created functional neocentromeres lacking alpha-satellite DNA [hara_critical_2017]. At the same time, despite the possibility of epigenetic drift, cenH3 positioning in maize centromeres is stable over generations and primarily governed by genetic changes [gent_stable_2015]."
query = f'UPDATE histone_description SET deposition="{deposition_desc}", function="{function_desc}" WHERE id=96'
print(query)
cursor.execute(query)

UPDATE histone_description SET deposition="Unlike canonical histones, cenH3 deposition is replication-independent. In most speices, cenH3 synthesis and loading occurs during late mitosis and G1 phase of the cell cycle, mediated by specialized chaperones (HJURP in humans, Scm3 in budding and fission yeast, and CAL1 in Drosophila). However, the timing of deposition differs in some organisms. For instance, in budding yeast, cenH3 replenishment occurs during S-phase [wong_epigenetic_2020, pearson_stable_2004]. Notably, both budding yeast and nematodes (C. elegans) exhibit complete turnover of cenH3 each cell cycle [wong_epigenetic_2020]. The assembly and replenishment of cenH3 nucleosomes involves chromatin remodeling, resulting in the displacement of H3.3 histones and active stabilization by specific proteins (e.g., HJURP, CENP-C, CENP-B in humans) [dunleavy_h33_2011, wong_epigenetic_2020, black_epigenetic_2011]. It is possible that new cenH3 deposition doesn't always occur at the same ce

In [27]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    "WHERE h.id='cenH3'"
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df[["deposition", "function"]]

Unnamed: 0,deposition,function
0,"Unlike canonical histones, cenH3 deposition is...","Despite the high diversity of cenH3 proteins, ..."


In [28]:
# Make sure data is committed to the database
conn.commit()

## <span style="color:black">Update description of cenH3_(Animals)</span>

### <span style="color:black">Update summary</span>

```cenH3_(Animals) is a centromere-specific histone variant in animals (Metazoa), often called CENP-A (Centromere Protein A) in mammals, and an important component of active centromere required for chromosome segregation.```

### <span style="color:black">Add interactions</span>

```cenH3 nucleosomes stabilize the inner kinetochore known as the CCAN complex (Constitutive Centromere-Associated Network). The initiation of kinetochore assembly involves the interaction between CENP-A and CENP-C [watanabe_cdk1-mediated_2019, walstein_assembly_2021]. CDK1-mediated phosphorylation of CENP-C enhances its binding to CENP-A nucleosomes during mitosis, as shown in both chicken and human cells [watanabe_cdk1-mediated_2019]. However, the functional importance of this interaction varies: while it is nonessential for viability in chicken cells, it is crucial for proper CENP-C kinetochore localization and long-term survival in human cells [watanabe_cdk1-mediated_2019]. It was demonstrated that binding of the CENP-C protein stiffens the cenH3 nucleosome, decreasing its flexibility and stabilizing its conformation, which promotes chromatin condensation and restricts access for transcriptional machinery [melters_intrinsic_2019]. CENP-N specifically recognizes CENP-A in the centromeric nucleosome by binding to its L1 loop through key residues (e.g., E3, T4, E7 in humans) and stabilizes the interaction via electrostatic contacts with nucleosomal DNA [chittori_structural_2018]. This interaction has co-evolved across species to ensure accurate kinetochore assembly and epigenetic centromere inheritance [chittori_structural_2018].```

### <span style="color:black">Update genes</span>

```The nematodes Caenorhabditis elegans and C. remanei possess two genes encoding cenH3: HCP-3 (also known as CeCENP-A) and CPAR-1 [monen_separase_2015, monen_differential_2005]. While HCP-3 functions as the predominant centromeric histone - being highly expressed and essential for precise chromosome segregation during mitotic divisions - CPAR-1 displays distinct characteristics: it exhibits lower expression levels but shows specific enrichment on meiotic chromosomes. Importantly, CPAR-1 undergoes separase-mediated proteolytic cleavage at the meiosis I metaphase-to-anaphase transition, implying a specialized, though not yet fully characterized, role in meiotic processes [monen_separase_2015].```

### <span style="color:black">Update function</span>

```The active centromeric chromatin, enriched with cenH3, defines the region of kinetochore interaction for spindle formation. However, certain functional differences in chromosome segregation are observed across different species. For example, in vertebrates, CENP-A is concentrated in a compact centromeric region, whereas in nematodes, HCP-3 is distributed diffusely, corresponding to a holocentric organization [hara_critical_2017]. Notably, in certain holocentric organisms, cenH3 appears dispensable for meiotic chromosome segregation while remaining essential for mitosis. As demonstrated in Caenorhabditis elegans, cenH3 loading is specifically eliminated following meiosis I, and RNAi-mediated depletion of CENP-A during meiosis fails to disrupt proper chromosome segregation [monen_differential_2005]. This stands in striking contrast to mitotic divisions, where cenH3 is absolutely required for kinetochore assembly and faithful chromosome segregation. Moreover, C. elegans and C. remanei possess a second cenH3 histone gene that likely performs specialized functions distinct from canonical cenH3 roles [monen_separase_2015]. For more detailed information see the "gene" section.```

In [29]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    "WHERE h.id='cenH3_(Animals)'"
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,id.1,summary,taxonomy,genes,...,knock_out,function,sequence,localization,deposition,structure,interactions,disease,caveats,relations
0,cenH3_(Animals),variant,Homo sapiens,9606,94,cenH3,94,cenH3_(Animals) is a centromere-specific histo...,,CENP-A nucleosomes are essential for chromosom...,...,,The nematodes Caenorhabditis elegans and C. re...,,,,,,,,


In [30]:
df[["summary", "interactions", "genes", "function"]]

Unnamed: 0,summary,interactions,genes,function
0,cenH3_(Animals) is a centromere-specific histo...,,CENP-A nucleosomes are essential for chromosom...,The nematodes Caenorhabditis elegans and C. re...


In [33]:
summary_desc = "cenH3_(Animals) is a centromere-specific histone variant in animals (Metazoa), often called CENP-A (Centromere Protein A) in mammals, and an important component of active centromere required for chromosome segregation."
interactions_desc = "cenH3 nucleosomes stabilize the inner kinetochore known as the CCAN complex (Constitutive Centromere-Associated Network). The initiation of kinetochore assembly involves the interaction between CENP-A and CENP-C [watanabe_cdk1-mediated_2019, walstein_assembly_2021]. CDK1-mediated phosphorylation of CENP-C enhances its binding to CENP-A nucleosomes during mitosis, as shown in both chicken and human cells [watanabe_cdk1-mediated_2019]. However, the functional importance of this interaction varies: while it is nonessential for viability in chicken cells, it is crucial for proper CENP-C kinetochore localization and long-term survival in human cells [watanabe_cdk1-mediated_2019]. It was demonstrated that binding of the CENP-C protein stiffens the cenH3 nucleosome, decreasing its flexibility and stabilizing its conformation, which promotes chromatin condensation and restricts access for transcriptional machinery [melters_intrinsic_2019]. CENP-N specifically recognizes CENP-A in the centromeric nucleosome by binding to its L1 loop through key residues (e.g., E3, T4, E7 in humans) and stabilizes the interaction via electrostatic contacts with nucleosomal DNA [chittori_structural_2018]. This interaction has co-evolved across species to ensure accurate kinetochore assembly and epigenetic centromere inheritance [chittori_structural_2018]."
genes_desc = "The nematodes Caenorhabditis elegans and C. remanei possess two genes encoding cenH3: HCP-3 (also known as CeCENP-A) and CPAR-1 [monen_separase_2015, monen_differential_2005]. While HCP-3 functions as the predominant centromeric histone - being highly expressed and essential for precise chromosome segregation during mitotic divisions - CPAR-1 displays distinct characteristics: it exhibits lower expression levels but shows specific enrichment on meiotic chromosomes. Importantly, CPAR-1 undergoes separase-mediated proteolytic cleavage at the meiosis I metaphase-to-anaphase transition, implying a specialized, though not yet fully characterized, role in meiotic processes [monen_separase_2015]."
function_desc = 'The active centromeric chromatin, enriched with cenH3, defines the region of kinetochore interaction for spindle formation. However, certain functional differences in chromosome segregation are observed across different species. For example, in vertebrates, CENP-A is concentrated in a compact centromeric region, whereas in nematodes, HCP-3 is distributed diffusely, corresponding to a holocentric organization [hara_critical_2017]. Notably, in certain holocentric organisms, cenH3 appears dispensable for meiotic chromosome segregation while remaining essential for mitosis. As demonstrated in Caenorhabditis elegans, cenH3 loading is specifically eliminated following meiosis I, and RNAi-mediated depletion of CENP-A during meiosis fails to disrupt proper chromosome segregation [monen_differential_2005]. This stands in striking contrast to mitotic divisions, where cenH3 is absolutely required for kinetochore assembly and faithful chromosome segregation. Moreover, C. elegans and C. remanei possess a second cenH3 histone gene that likely performs specialized functions distinct from canonical cenH3 roles [monen_separase_2015]. For more detailed information see the "gene" section.'
query = f"UPDATE histone_description SET summary='{summary_desc}', interactions='{interactions_desc}', genes='{genes_desc}', function='{function_desc}' WHERE id=94"
print(query)
cursor.execute(query)

UPDATE histone_description SET summary='cenH3_(Animals) is a centromere-specific histone variant in animals (Metazoa), often called CENP-A (Centromere Protein A) in mammals, and an important component of active centromere required for chromosome segregation.', interactions='cenH3 nucleosomes stabilize the inner kinetochore known as the CCAN complex (Constitutive Centromere-Associated Network). The initiation of kinetochore assembly involves the interaction between CENP-A and CENP-C [watanabe_cdk1-mediated_2019, walstein_assembly_2021]. CDK1-mediated phosphorylation of CENP-C enhances its binding to CENP-A nucleosomes during mitosis, as shown in both chicken and human cells [watanabe_cdk1-mediated_2019]. However, the functional importance of this interaction varies: while it is nonessential for viability in chicken cells, it is crucial for proper CENP-C kinetochore localization and long-term survival in human cells [watanabe_cdk1-mediated_2019]. It was demonstrated that binding of the C

In [34]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    "WHERE h.id='cenH3_(Animals)'"
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df[["summary", "interactions", "genes", "function"]]

Unnamed: 0,summary,interactions,genes,function
0,cenH3_(Animals) is a centromere-specific histo...,cenH3 nucleosomes stabilize the inner kinetoch...,The nematodes Caenorhabditis elegans and C. re...,"The active centromeric chromatin, enriched wit..."


In [35]:
# Make sure data is committed to the database
conn.commit()

## <span style="color:black">Update description of cenH3_(Homo_sapiens)</span>

### <span style="color:black">Update summary cenH3_(Homo_sapiens)</span>

### <span style="color:black">Update interactions of cenH3_(Homo_sapiens)</span>

```CENP-A nucleosomes stabilize the inner kinetochore known as the CCAN complex (Constitutive Centromere-Associated Network) through direct interaction with CENP-C and CENP-N proteins of this complex [cao_constitutive_2018, pesenti_structure_2022, sridhar_kinetochore_2022, xu_gross_2023]. Human CENP-C has been demonstrated to bind with the CENP-A C-terminal hydrophobic tail, the acidic patch of H2A-H2B and histone H4, thereby facilitating its engagement with all four histone subunits present on the nucleosome surface [allu_structure_2019]. The binding of CENP-C to CENP-A is critical for long-term viability in human RPE-1 cells [watanabe_cdk1-mediated_2019]. This interaction is stabilized by the process of CENP-C phosphorylation mediated by CDK1, which strengthens the complex's structure through the occurrence of an intramolecular interaction [watanabe_cdk1-mediated_2019, walstein_assembly_2021, ariyoshi_cryoem_2021]. Human CENP-N has been observed to bind to CENP-A nucleosomes through a direct recognition of the L1 loop and RG loop of CENP-A [chittori_structural_2018, tian_molecular_2018, ariyoshi_cryoem_2021]. However, cryo-EM studies have revealed that CENP-C and CENP-N bind to a single CENP-A nucleosome in a non-simultaneous manner, thereby demonstrating an asymmetric structure where CENP-C and CENP-N bind to opposite sides of the nucleosome [ariyoshi_cryoem_2021]. Furthermore, in 94% of cases, phosphorylated CENP-C exhibits exclusive binding to the RG-loop of CENP-A, suggesting that this particular interaction is predominant [ariyoshi_cryoem_2021]. In addition, in vitro reconstruction of the human CENP-C protein showed that CENP-C can bind to two centromeric nucleosomes simultaneously [walstein_assembly_2021]. Nevertheless, interactions with CENP-C or CENP-N do not determine the stability of CENP-A nucleosomes in chromatin [cao_constitutive_2018].```

### <span style="color:black">Update function of cenH3_(Homo_sapiens)</span>

```While CENP-A is a crucial centromeric marker, the formation of a functional kinetochore and proper chromosome segregation requires an epigenetic context, including the targeting of the chaperone HJURP and histone PTMs [hara_critical_2017, hori_histone_2014]. Human CENP-A plays an important role in stabilization and retention inner kinetochore during G1 phase of cell cycle [pesenti_structure_2022, hoffmann_cenp-is_2016]. However, mitosis can proceed without CENP-A as long as the CENP-B protein remains stably bound to centromeric sequences, facilitating the assembly of the inner kinetochore [hoffmann_cenp-is_2016].```

### <span style="color:black">Add to deposition of cenH3_(Homo_sapiens)</span>

```Notably, in contrast to maintenance, de novo centromere formation (e.g., neocentromere establishment) depends on HJURP-mediated recruitment of CENP-C [tachiwana_hjurp_2015]. The deposition of CENP-A depends not only on protein factors but also on the characteristics of alpha-satellite DNA. CENP-B boxes stabilize CENP-A nucleosomes through interaction with the CENP-B protein [mcnulty_alpha_2018]. During early G1 phase, RNA polymerase II-mediated transcription of alpha-satellite DNA generates non-coding RNAs that facilitate recruitment of the CENP-A/HJURP complex [mcnulty_alpha_2018]. Furthermore, post-translational histone modifications (H3K4me2, H3K36me2, H3K9ac, H4K16ac) establish a permissive chromatin environment and prevent heterochromatin spreading [mcnulty_alpha_2018].```

### <span style="color:black">Add structure to cenH3_(Homo_sapiens)</span>

```CENP-A confers enhanced flexibility and elasticity to nucleosomes, influencing centromeric chromatin organization and ensuring accurate chromosome segregation during mitosis [fedulova_molecular_2024, pitman_minimal_2020, melters_intrinsic_2019, tachiwana_crystal_2011]. Partial nucleosome unwrapping in CENP-A-containing nucleosomes is promoted by its shortened αN-helix [roulland_flexible_2016]. Cryo-EM analysis of human CENP-A nucleosomes revealed asymmetric DNA end flexibility, with the right DNA end being highly dynamic and partially detached from the histone core while the left end remained stably bound, a sequence-dependent behavior confirmed by molecular dynamics simulations [kono_free_2019, boopathi_phase-plate_2020]. Notably, this intrinsic asymmetry can be overcome by antibody stabilization. The single-chain antibody fragment PL2-6 symmetrically stabilizes both DNA ends through allosteric interactions, despite the inherent sequence-dependent asymmetry [zhou_atomic_2019, dogan_cenp-nucleosome_2021]. Molecular dynamics simulations revealed that CENP-A-containing tetramers exhibit enhanced stability but distinct dynamics compared to canonical octameric structures, explaining the unusual properties of CENP-A nucleosomes [zhao_oligomerization_2019].```

### <span style="color:black">Add sequence to cenH3_(Homo_sapiens)</span>

```The conserved residue K124 locaded in histone fold of CENP-A plays a crucial role in regulating centromeric function. During the cell cycle, K124 undergoes cyclic modifications [bui_internal_2017]. In G1/S phase, K124ac compacts the nucleosome, interfering with CENP-C binding (crucial for kinetochore assembly) while simultaneously promoting chromatin decondensation for centromere replication [bui_internal_2017]. During S phase, K124me stabilizes nucleosomes. Furthermore, K124 mutations disrupt both mitosis and replication, demonstrating its critical role in centromere epigenetic regulation [bui_internal_2017].```

In [36]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    "WHERE h.id='cenH3_(Homo_sapiens)'"
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df

Unnamed: 0,id,level,taxonomic_span,taxonomic_span_id,description,parent,id.1,summary,taxonomy,genes,...,knock_out,function,sequence,localization,deposition,structure,interactions,disease,caveats,relations
0,cenH3_(Homo_sapiens),variant,Homo sapiens,9606,158,cenH3_(Mammalia),158,cenH3(Homo_sapiens) is a centromere-specific h...,,,...,CENP-A depletion at different stages of the ce...,CENP-A nucleosomes are essential for chromosom...,,CENP-A in human chromosomes usually localized ...,Deposition of CENP-A into centromeric nucleoso...,CENP-A confers enhanced flexibility and elasti...,CENP-A nucleosomes stabilize the inner kinetoc...,,,


In [37]:
df[["summary", "interactions", "function", "deposition", "structure", "sequence"]]

Unnamed: 0,summary,interactions,function,deposition,structure,sequence
0,cenH3(Homo_sapiens) is a centromere-specific h...,CENP-A nucleosomes stabilize the inner kinetoc...,CENP-A nucleosomes are essential for chromosom...,Deposition of CENP-A into centromeric nucleoso...,CENP-A confers enhanced flexibility and elasti...,


In [39]:
current_deposition_desc = df["deposition"].values[0]
current_deposition_desc

'Deposition of CENP-A into centromeric nucleosomes during the late telophase/early G1 phase of the cell cycle mediated by four major and some minor factors and regulators, which are important for identifying centromeric localization and limit the process to a single round [pan_mechanism_2019, xu_gross_2023]. The specific CENP-A chaperone HJURP (Holliday Junction Recognition Protein) plays a key role in stabilizing the binding of CENP-A to histone H4 [foltz_centromere_2009, dunleavy_hjurp_2009, shuaib_hjurp_2010]. To target centromeres and load a new CENP-A HJURP form a stoichiometric complex with the two-subunit Mis18 complex (Mis18α and Mis18β) and Mis18-binding protein 1 (M18BP1) [hayashi_mis16_2004, pan_mechanism_2019]. Among minor proteins required for CENP-A loading there are RSF1, MgcRacGAP, Condensin II, and KAT7 [pan_mechanism_2019]. Furthermore, loading of CENP-A into centromeric nucleosomes contributed by CENP-I and CENP-B proteins that are specifically binds to the centromer

In [43]:
summary_desc = "cenH3_(Homo_sapiens) is a centromere-specific histone variant in human, often called CENP-A (Centromere Protein A), and an important component of active centromere required for chromosome segregation. For a general description see cenH3_(Animals) class."
interactions_desc = "CENP-A nucleosomes stabilize the inner kinetochore known as the CCAN complex (Constitutive Centromere-Associated Network) through direct interaction with CENP-C and CENP-N proteins of this complex [cao_constitutive_2018, pesenti_structure_2022, sridhar_kinetochore_2022, xu_gross_2023]. Human CENP-C has been demonstrated to bind with the CENP-A C-terminal hydrophobic tail, the acidic patch of H2A-H2B and histone H4, thereby facilitating its engagement with all four histone subunits present on the nucleosome surface [allu_structure_2019]. The binding of CENP-C to CENP-A is critical for long-term viability in human RPE-1 cells [watanabe_cdk1-mediated_2019]. This interaction is stabilized by the process of CENP-C phosphorylation mediated by CDK1, which strengthens the complex's structure through the occurrence of an intramolecular interaction [watanabe_cdk1-mediated_2019, walstein_assembly_2021, ariyoshi_cryoem_2021]. Human CENP-N has been observed to bind to CENP-A nucleosomes through a direct recognition of the L1 loop and RG loop of CENP-A [chittori_structural_2018, tian_molecular_2018, ariyoshi_cryoem_2021]. However, cryo-EM studies have revealed that CENP-C and CENP-N bind to a single CENP-A nucleosome in a non-simultaneous manner, thereby demonstrating an asymmetric structure where CENP-C and CENP-N bind to opposite sides of the nucleosome [ariyoshi_cryoem_2021]. Furthermore, in 94% of cases, phosphorylated CENP-C exhibits exclusive binding to the RG-loop of CENP-A, suggesting that this particular interaction is predominant [ariyoshi_cryoem_2021]. In addition, in vitro reconstruction of the human CENP-C protein showed that CENP-C can bind to two centromeric nucleosomes simultaneously [walstein_assembly_2021]. Nevertheless, interactions with CENP-C or CENP-N do not determine the stability of CENP-A nucleosomes in chromatin [cao_constitutive_2018]."
function_desc = "While CENP-A is a crucial centromeric marker, the formation of a functional kinetochore and proper chromosome segregation requires an epigenetic context, including the targeting of the chaperone HJURP and histone PTMs [hara_critical_2017, hori_histone_2014]. Human CENP-A plays an important role in stabilization and retention inner kinetochore during G1 phase of cell cycle [pesenti_structure_2022, hoffmann_cenp-is_2016]. However, mitosis can proceed without CENP-A as long as the CENP-B protein remains stably bound to centromeric sequences, facilitating the assembly of the inner kinetochore [hoffmann_cenp-is_2016]."
deposition_desc = current_deposition_desc + " Notably, in contrast to maintenance, de novo centromere formation (e.g., neocentromere establishment) depends on HJURP-mediated recruitment of CENP-C [tachiwana_hjurp_2015]. The deposition of CENP-A depends not only on protein factors but also on the characteristics of alpha-satellite DNA. CENP-B boxes stabilize CENP-A nucleosomes through interaction with the CENP-B protein [mcnulty_alpha_2018]. During early G1 phase, RNA polymerase II-mediated transcription of alpha-satellite DNA generates non-coding RNAs that facilitate recruitment of the CENP-A/HJURP complex [mcnulty_alpha_2018]. Furthermore, post-translational histone modifications (H3K4me2, H3K36me2, H3K9ac, H4K16ac) establish a permissive chromatin environment and prevent heterochromatin spreading [mcnulty_alpha_2018]."
structure_desc = "CENP-A confers enhanced flexibility and elasticity to nucleosomes, influencing centromeric chromatin organization and ensuring accurate chromosome segregation during mitosis [fedulova_molecular_2024, pitman_minimal_2020, melters_intrinsic_2019, tachiwana_crystal_2011]. Partial nucleosome unwrapping in CENP-A-containing nucleosomes is promoted by its shortened αN-helix [roulland_flexible_2016]. Cryo-EM analysis of human CENP-A nucleosomes revealed asymmetric DNA end flexibility, with the right DNA end being highly dynamic and partially detached from the histone core while the left end remained stably bound, a sequence-dependent behavior confirmed by molecular dynamics simulations [kono_free_2019, boopathi_phase-plate_2020]. Notably, this intrinsic asymmetry can be overcome by antibody stabilization. The single-chain antibody fragment PL2-6 symmetrically stabilizes both DNA ends through allosteric interactions, despite the inherent sequence-dependent asymmetry [zhou_atomic_2019, dogan_cenp-nucleosome_2021]. Molecular dynamics simulations revealed that CENP-A-containing tetramers exhibit enhanced stability but distinct dynamics compared to canonical octameric structures, explaining the unusual properties of CENP-A nucleosomes [zhao_oligomerization_2019]."
sequence_desc = "The conserved residue K124 locaded in histone fold of CENP-A plays a crucial role in regulating centromeric function. During the cell cycle, K124 undergoes cyclic modifications [bui_internal_2017]. In G1/S phase, K124ac compacts the nucleosome, interfering with CENP-C binding (crucial for kinetochore assembly) while simultaneously promoting chromatin decondensation for centromere replication [bui_internal_2017]. During S phase, K124me stabilizes nucleosomes. Furthermore, K124 mutations disrupt both mitosis and replication, demonstrating its critical role in centromere epigenetic regulation [bui_internal_2017]."
query = f'UPDATE histone_description SET summary="{summary_desc}", interactions="{interactions_desc}", function="{function_desc}", deposition="{deposition_desc}", structure="{structure_desc}", sequence="{sequence_desc}" WHERE id=158'
print(query)
cursor.execute(query)

UPDATE histone_description SET summary="cenH3_(Homo_sapiens) is a centromere-specific histone variant in human, often called CENP-A (Centromere Protein A), and an important component of active centromere required for chromosome segregation. For a general description see cenH3_(Animals) class.", interactions="CENP-A nucleosomes stabilize the inner kinetochore known as the CCAN complex (Constitutive Centromere-Associated Network) through direct interaction with CENP-C and CENP-N proteins of this complex [cao_constitutive_2018, pesenti_structure_2022, sridhar_kinetochore_2022, xu_gross_2023]. Human CENP-C has been demonstrated to bind with the CENP-A C-terminal hydrophobic tail, the acidic patch of H2A-H2B and histone H4, thereby facilitating its engagement with all four histone subunits present on the nucleosome surface [allu_structure_2019]. The binding of CENP-C to CENP-A is critical for long-term viability in human RPE-1 cells [watanabe_cdk1-mediated_2019]. This interaction is stabili

In [44]:
query = (
    "SELECT * FROM histone h LEFT JOIN histone_description hd "
    "ON h.description = hd.id "
    "WHERE h.id='cenH3_(Homo_sapiens)'"
)
cursor.execute(query)
df = pd.DataFrame(cursor.fetchall(), columns=[i[0] for i in cursor.description])
df[["summary", "interactions", "function", "deposition", "structure", "sequence"]]

Unnamed: 0,summary,interactions,function,deposition,structure,sequence
0,cenH3_(Homo_sapiens) is a centromere-specific ...,CENP-A nucleosomes stabilize the inner kinetoc...,"While CENP-A is a crucial centromeric marker, ...",Deposition of CENP-A into centromeric nucleoso...,CENP-A confers enhanced flexibility and elasti...,The conserved residue K124 locaded in histone ...


In [45]:
# Make sure data is committed to the database
conn.commit()

# Close connections

In [46]:
cursor.close()
conn.close()
tunnel.stop()