In [1]:
import pandas as pd
import os

In [2]:
s3_prefix = "s3://aegovan-data/pubmed_asbtract/predictions_multi_00/"
s3_data ="s3://aegovan-data/human_output/human_interactions_ppi_v2.json"

In [3]:
local_temp = "temp"
local_temp_pred_dir = os.path.join( local_temp, "pred_results")
local_temp_wk_dir = os.path.join( local_temp, "wk")

In [4]:
!rm -rf $local_temp
!mkdir -p $local_temp_pred_dir
!mkdir -p $local_temp_wk_dir

In [5]:
#!aws s3 cp s3://aegovan-data/pubmed_asbtract/predictions_multi_95/pubmed19n0538.json.txt.json.prediction.json .

In [6]:
import boto3
import glob
from multiprocessing.dummy import Pool as ThreadPool
import argparse
import datetime 
import os


def uploadfile(localpath, s3path):
        """
Uploads a file to s3
        :param localpath: The local path
        :param s3path: The s3 path in format s3://mybucket/mydir/mysample.txt
        """

        bucket, key = get_bucketname_key(s3path)

        if key.endswith("/"):
            key = "{}{}".format(key, os.path.basename(localpath))
        
        s3 = boto3.client('s3')
        
        s3.upload_file(localpath, bucket, key)

def get_bucketname_key(uripath):
    assert uripath.startswith("s3://")

    path_without_scheme = uripath[5:]
    bucket_end_index = path_without_scheme.find("/")

    bucket_name = path_without_scheme
    key = "/"
    if bucket_end_index > -1:
        bucket_name = path_without_scheme[0:bucket_end_index]
        key = path_without_scheme[bucket_end_index + 1:]

    return bucket_name, key


def download_file(s3path, local_dir):
    bucket, key = get_bucketname_key(s3path)
    
    s3 = boto3.client('s3')
    
    local_file = os.path.join(local_dir, s3path.split("/")[-1])
    

    s3.download_file(bucket, key, local_file)
    
def download_object(s3path):
    bucket, key = get_bucketname_key(s3path)
    
    s3 = boto3.client('s3')    

    s3_response_object = s3.get_object(Bucket=bucket, Key=key)
    object_content = s3_response_object['Body'].read()
    
    return len(object_content)



def list_files(s3path_prefix):
    assert s3path_prefix.startswith("s3://")
    assert s3path_prefix.endswith("/")
    
    bucket, key = get_bucketname_key(s3path_prefix)
    
   
   
    s3 = boto3.resource('s3')
    
    bucket = s3.Bucket(name=bucket)

    return ( (o.bucket_name, o.key) for o in bucket.objects.filter(Prefix=key))





def upload_files(local_dir, s3_prefix, num_threads=20):    
    input_tuples = ( (f,  s3_prefix) for f in glob.glob("{}/*".format(local_dir)))
    
    with ThreadPool(num_threads) as pool:
        pool.starmap(uploadfile, input_tuples)
    


def download_files(s3_prefix, local_dir, num_threads=20):    
    input_tuples = ( ("s3://{}/{}".format(s3_bucket,s3_key),  local_dir) for s3_bucket, s3_key in list_files(s3_prefix))
    
    with ThreadPool(num_threads) as pool:
        results = pool.starmap(download_file, input_tuples)
        
        

def download_objects(s3_prefix, num_threads=20):    
    s3_files = ( "s3://{}/{}".format(s3_bucket,s3_key) for s3_bucket, s3_key in list_files(s3_prefix))
    
    with ThreadPool(num_threads) as pool:
        results = pool.map(download_object, s3_files)
        
    return sum(results)/1024
        

def get_directory_size(start_path):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(start_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            # skip if it is symbolic link
            if not os.path.islink(fp):
                total_size += os.path.getsize(fp)
    return total_size

def get_s3file_size(bucket, key):
    s3 = boto3.client('s3')
    response = s3.head_object(Bucket=bucket, Key=key)
    size = response['ContentLength']
    return size
    
def download_files_min_files(s3_prefix, local_dir, min_file_size=310, num_threads=20):    
    input_tuples = ( ("s3://{}/{}".format(s3_bucket,s3_key),  local_dir) for s3_bucket, s3_key in list_files(s3_prefix) if get_s3file_size(s3_bucket, s3_key) > min_file_size )
    
    with ThreadPool(num_threads) as pool:
        results = pool.starmap(download_file, input_tuples)
        


In [7]:
%%time

download_files(s3_prefix, local_temp_pred_dir)

CPU times: user 3min 53s, sys: 1min 52s, total: 5min 46s
Wall time: 4min 31s


In [8]:
!ls -l $local_temp_dir | wc -l

31


In [9]:
threshold_config = {
    "acetylation" : 0.83,
    "deubiquitination" :0.50,
    "methylation" :.85,
    "phosphorylation" : .98,
    "demethylation" :0.0,
    "dephosphorylation" :.85,
    "ubiquitination":0.3
}

In [10]:
%%time 

full_df = None
total_counts = {}
for f in os.listdir(local_temp_pred_dir):
    df = pd.read_json(os.path.join(local_temp_pred_dir, f), orient="records", lines=True )
    
    count_dict = df.groupby(["predicted"])["predicted"].count().to_dict()
    min_dict = df.groupby(["predicted"])["predicted_confidence"].min().to_dict()
    
    for k in count_dict:
        if k not in total_counts:
            total_counts[k] = {}
        total_counts[k]["count"] = total_counts[k].get("count", 0) + count_dict[k]
        total_counts[k]["min"] = min(total_counts[k].get("min", 1.0) , min_dict[k])



    # Filter below threshold items
    high_quality_frames = []
    for k,t in threshold_config.items():
        high_quality_frames.append(df.query("predicted == '{}' and predicted_confidence > {}".format(k, t)))
        
    high_quality_df = pd.concat(high_quality_frames)
    
    
    
    if full_df is None:
        full_df = high_quality_df
    else:
        full_df = pd.concat([high_quality_df, full_df])
        
    
    


CPU times: user 5min 30s, sys: 1min 17s, total: 6min 47s
Wall time: 8min 52s


In [11]:
df_counts = pd.DataFrame(total_counts)

In [12]:
df_counts.head()

Unnamed: 0,acetylation,dephosphorylation,deubiquitination,methylation,other,phosphorylation,ubiquitination,demethylation
count,2835.0,17319.0,1145.0,13143.0,9854011.0,404850.0,354.0,1.0
min,0.166537,0.168799,0.162342,0.17894,0.1734827,0.170796,0.181956,0.377643


In [13]:
full_df.groupby(["predicted"])["predicted"].count().to_dict()

{'acetylation': 16,
 'demethylation': 1,
 'dephosphorylation': 180,
 'deubiquitination': 92,
 'methylation': 1909,
 'phosphorylation': 1252,
 'ubiquitination': 36}

In [14]:
full_df.groupby(["predicted"])["predicted_confidence"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
predicted,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
acetylation,16.0,0.853635,0.01794,0.834194,0.841396,0.846593,0.860633,0.90637
demethylation,1.0,0.377643,,0.377643,0.377643,0.377643,0.377643,0.377643
dephosphorylation,180.0,0.898379,0.031899,0.850141,0.871749,0.892653,0.921433,0.962255
deubiquitination,92.0,0.560631,0.044044,0.50008,0.522097,0.547255,0.592547,0.695426
methylation,1909.0,0.888543,0.023966,0.850032,0.868216,0.886418,0.907715,0.946525
phosphorylation,1252.0,0.98418,0.002718,0.98001,0.981914,0.983852,0.986243,0.991165
ubiquitination,36.0,0.315345,0.01199,0.300192,0.30499,0.313962,0.319787,0.347559


In [15]:
full_df.shape

(3486, 16)

In [16]:
download_file(s3_data, local_temp_wk_dir)



In [17]:
data_file = os.path.join(local_temp_wk_dir, s3_data.split("/")[-1])
data_training_full_df = pd.read_json(data_file)

In [18]:
data_training_full_df.shape

(3381, 7)

In [19]:
full_df.shape

(3486, 16)

In [20]:
data_training_full_df.head(n=2)

Unnamed: 0,interactionId,interactionType,isNegative,participants,pubmedId,pubmedTitle,pubmedabstract
0,1585513,phosphorylation,False,"[{'uniprotid': 'Q10728', 'alias': [['mypt1_rat...",17126281,,Zipper-interacting protein kinase (ZIP kinase)...
1,1585516,phosphorylation,False,"[{'uniprotid': 'O43293-1', 'alias': [['o43293-...",17126281,,Zipper-interacting protein kinase (ZIP kinase)...


In [21]:
full_df.head(n=2)

Unnamed: 0,abstract,acetylation,confidence_scores,demethylation,dephosphorylation,deubiquitination,methylation,normalised_abstract,other,participant1Id,participant2Id,phosphorylation,predicted,predicted_confidence,pubmedId,ubiquitination
395,Fragile X syndrome is one of the most frequent...,0.80431,"{'acetylation': -0.21777061220000002, 'demethy...",0.621809,0.245314,0.514101,33.977565,Fragile X syndrome is one of the most frequent...,1.754072,P00995,Q06787,0.44677,methylation,0.874934,8372665,0.47048
2783,The experiments reported here document that th...,0.286923,"{'acetylation': -1.2485398501, 'demethylation'...",0.181673,0.359273,0.251972,0.231727,The experiments reported here document that th...,4.234165,P06400,P15172,308.552267,phosphorylation,0.981568,8381715,0.248378


In [22]:
full_df[~full_df.pubmedId.isin(data_training_full_df.pubmedId)].shape

(3253, 16)

In [23]:
full_df["PubmedInTrainingData"] = full_df.pubmedId.isin(data_training_full_df.pubmedId)

In [24]:
full_df.query("not PubmedInTrainingData").groupby(["predicted"])["predicted_confidence"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
predicted,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
acetylation,14.0,0.848446,0.009895,0.834194,0.841258,0.845236,0.854201,0.869485
dephosphorylation,133.0,0.888944,0.023922,0.850141,0.870096,0.887317,0.902896,0.943967
deubiquitination,88.0,0.557122,0.040684,0.50008,0.520021,0.545226,0.589662,0.659376
methylation,1888.0,0.888159,0.02365,0.850032,0.868155,0.885885,0.907304,0.944177
phosphorylation,1099.0,0.983993,0.002639,0.98001,0.98181,0.98362,0.986003,0.991165
ubiquitination,31.0,0.314249,0.011414,0.300192,0.304916,0.313602,0.317595,0.347559


In [25]:

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 10000)
pd.set_option('display.max_rows', 100)

import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 12})

In [26]:
full_df.to_csv("predictions.csv", index=False, header=True)

In [27]:
full_df[["abstract", "normalised_abstract", "participant1Id","participant2Id", "pubmedId", "predicted" ,"predicted_confidence" ]].sample(n=20)

Unnamed: 0,abstract,normalised_abstract,participant1Id,participant2Id,pubmedId,predicted,predicted_confidence
7511,"Gene activity is regulated by transcriptional and epigenetic mechanisms. A paper in 2005 by Zhang et al.(1) showed that STAT3 binds to the DNA methyl transferase, DNMT1 and their data indicated that STAT3 may cause epigenetic gene silencing by targeting DNMT1 to the PTPN6 promoter. Now, a paper by Lee et al.(2) has fleshed out the mechanism. They provide evidence that acetylation of STAT3 regulates the binding of DNMT1, CpG DNA methylation and regulation of several genes, including that encoding the estrogen receptor (ESR1) in breast cancer cells.","Gene activity is regulated by transcriptional and epigenetic mechanisms. A paper in 2005 by Zhang et al.(1) showed that P40763 binds to the DNA methyl transferase, P26358 and their data indicated that P40763 may cause epigenetic gene silencing by targeting P26358 to the P29350 promoter. Now, a paper by Lee et al.(2) has fleshed out the mechanism. They provide evidence that acetylation of P40763 regulates the binding of P26358, CpG DNA methylation and regulation of several genes, including that encoding the P03372 (P03372) in breast cancer cells.",P26358,P40763,24058781,methylation,0.870079
13650,"The cells in the preimplantation mammalian embryo undergo several rounds of fast cell division. Whether the known DNA repair pathways are active during these early stages of development where cell division is of primary importance, has not been fully established. Because of the important role of phosphorylated H2A.X (gammaH2A.X) in the DNA damage response as well as its putative role in assembly of embryonic chromatin, we analysed its distribution in the preimplantation mouse embryo. We found that H2A.X is highly phosphorylated throughout preimplantation development in the absence of any induced DNA damage. Moreover, gammaH2A.X levels vary significantly throughout the cell cycle. Interestingly, after the 4-cell stage, we detected high levels of H2A.X phosphorylation in mitosis, where telomeres appeared focally enriched with gammaH2A.X. In contrast, 53BP1, which is known to be recruited to DNA damage sites, is undetectable at mitotic chromosomes at these stages and its localisation changes upon blastocyst formation from mainly nuclear to cytoplasmic. We also show that 53BP1 and gammaH2A.X rarely colocalise, suggesting that the high levels of phosphorylation of H2A.X in the embryo might not be directly linked to the DNA damage response in the embryo. Our data suggest that phosphorylation of H2A.X is an important event in the fast dividing cells of the early embryo in the absence of any induced DNA damage. We discuss the possible consequences of these findings on the genome-wide chromatin remodelling that ocurs in the preimplantation mammalian embryo.","The cells in the preimplantation mammalian embryo undergo several rounds of fast cell division. Whether the known DNA repair pathways are active during these early stages of development where cell division is of primary importance, has not been fully established. Because of the important role of phosphorylated P16104 (gammaH2A.X) in the DNA damage response as well as its putative role in assembly of embryonic chromatin, we analysed its distribution in the preimplantation mouse embryo. We found that P16104 is highly phosphorylated throughout preimplantation development in the absence of any induced DNA damage. Moreover, gammaH2A.X levels vary significantly throughout the cell cycle. Interestingly, after the 4-cell stage, we detected high levels of P16104 phosphorylation in mitosis, where telomeres appeared focally enriched with gammaH2A.X. In contrast, Q12888, which is known to be recruited to DNA damage sites, is undetectable at mitotic chromosomes at these stages and its localisation changes upon blastocyst formation from mainly nuclear to cytoplasmic. We also show that Q12888 and gammaH2A.X rarely colocalise, suggesting that the high levels of phosphorylation of P16104 in the embryo might not be directly linked to the DNA damage response in the embryo. Our data suggest that phosphorylation of P16104 is an important event in the fast dividing cells of the early embryo in the absence of any induced DNA damage. We discuss the possible consequences of these findings on the genome-wide chromatin remodelling that ocurs in the preimplantation mammalian embryo.",P16104,Q12888,19598117,phosphorylation,0.981332
6593,"The non-genomic membrane bound oestrogen receptor (mER) regulates intracellular signals through receptor-ligand interactions. The mER, along with G-protein coupled oestrogen receptor GPR 30 (GPER), induces diverse cell signalling pathways in murine lymphocytes. The mER isoform ER-alpha46 has recently been demonstrated in human B and T lymphocytes as an analogue receptor for chemokine CCL18, the signalling events of which are not clearly understood. Ligand-induced mER and GPER signalling events are shared with BCR, CD19 mediated intracellular signalling through phospholipase C, PIP2/IP3/PI3 mediated activation of Akt, MAP kinase, and mTOR. Oestrogen has the ability to induce CD40-mediated activation of B cells. The complete signalling pathways of mER, GPR30 and their interaction with other signals are targeted areas for novel drug development in B cells during infection, autoimmunity and cancer. Therefore, an in depth investigation is critical for determining shared signal outputs during B cell activation. Here, we focus on the mode of action of membrane bound ER in B cells as therapeutic checkpoints.","The non-genomic Q99527 (Q99527) regulates intracellular signals through receptor-ligand interactions. The Q99527, along with Q99527 (Q99527), induces diverse cell signalling pathways in murine lymphocytes. The Q99527 isoform P0337246 has recently been demonstrated in human B and T lymphocytes as an analogue receptor for chemokine P55774, the signalling events of which are not clearly understood. Ligand-induced Q99527 and Q99527 signalling events are shared with BCR, P15391 mediated intracellular signalling through phospholipase C, PIP2/P19957 mediated activation of Akt, MAP kinase, and P42345. Oestrogen has the ability to induce P25942-mediated activation of B cells. The complete signalling pathways of Q99527, Q99527 and their interaction with other signals are targeted areas for novel drug development in B cells during infection, autoimmunity and cancer. Therefore, an in depth investigation is critical for determining shared signal outputs during B cell activation. Here, we focus on the mode of action of membrane bound ER in B cells as therapeutic checkpoints.",P03372,Q99527,27189345,phosphorylation,0.982047
895,"The retinoblastoma binding protein KDM5A removes methyl marks from lysine 4 of histone H3 (H3K4). Misregulation of KDM5A contributes to the pathogenesis of lung and gastric cancers. In addition to its catalytic jumonji C domain, KDM5A contains three PHD reader domains, commonly recognized as chromatin recruitment modules. It is unknown whether any of these domains in KDM5A have functions beyond recruitment and whether they regulate the catalytic activity of the demethylase. Here using biochemical and nuclear magnetic resonance (NMR)-based structural studies, we show that the PHD1 preferentially recognizes unmethylated H3K4 histone tail, product of KDM5A-mediated demethylation of tri-methylated H3K4 (H3K4me3). Binding of unmodified H3 peptide to the PHD1 stimulates catalytic domain-mediated removal of methyl marks from H3K4me3 peptide and nucleosome substrates. This positive-feedback mechanism--enabled by the functional coupling between a reader and a catalytic domain in KDM5A--suggests a model for the spread of demethylation on chromatin.","The retinoblastoma binding protein P29375 removes methyl marks from lysine 4 of histone H3 (H3K4). Misregulation of P29375 contributes to the pathogenesis of lung and gastric cancers. In addition to its catalytic jumonji C domain, P29375 contains three PHD reader domains, commonly recognized as chromatin recruitment modules. It is unknown whether any of these domains in P29375 have functions beyond recruitment and whether they regulate the catalytic activity of the demethylase. Here using biochemical and nuclear magnetic resonance (NMR)-based structural studies, we show that the Q96KS0 preferentially recognizes unmethylated H3K4 histone tail, product of P29375-mediated demethylation of tri-methylated H3K4 (H3K4me3). Binding of unmodified H3 peptide to the Q96KS0 stimulates catalytic domain-mediated removal of methyl marks from H3K4me3 peptide and nucleosome substrates. This positive-feedback mechanism--enabled by the functional coupling between a reader and a catalytic domain in P29375--suggests a model for the spread of demethylation on chromatin.",P29375,Q96KS0,25686748,methylation,0.866325
15505,"Aberrations in DNA methylation patterns promote changes in gene expression patterns and are invariably associated with neoplasia. DNA methylation is carried out and maintained by several DNA methyltransferases (DNMTs) among which DNMT1 functions as a maintenance methylase while DNMT3a and 3b serve as de novo enzymes. Although DNMT3b has been shown to preferentially target the methylation of DNA sequences residing in pericentric heterochromatin whether it is involved in gene specific methylation remains an open question. To address this issue, we have silenced the expression of DNMT3b in the prostate-derived PC3 cells through RNA interference and subsequently studied the accompanied cellular changes as well as the expression profiles of selected genes.","Aberrations in DNA methylation patterns promote changes in gene expression patterns and are invariably associated with neoplasia. DNA methylation is carried out and maintained by several DNA methyltransferases (DNMTs) among which P26358 functions as a maintenance methylase while 1788;1789 serve as de novo enzymes. Although Q9UBC3 has been shown to preferentially target the methylation of DNA sequences residing in pericentric heterochromatin whether it is involved in gene specific methylation remains an open question. To address this issue, we have silenced the expression of Q9UBC3 in the prostate-derived PC3 cells through RNA interference and subsequently studied the accompanied cellular changes as well as the expression profiles of selected genes.",P26358,Q9UBC3,18798999,methylation,0.939795
12473,"Protein kinase C delta (PKC) is a serine (Ser)/threonine kinase, which regulates numerous cellular processes, including proliferation, differentiation, migration and apoptosis. In the current study, Chinese hamster ovary cells were transfected with either a constitutively activated PKC or a dominant negative PKC, phosphoprotein enrichment, two-dimensional difference gel electrophoresis and mass spectrometry was combined to globally identified candidates of PKC cascade. We found that Bcl-2 associated athanogene 3 (BAG3) was one of the targets of PKC cascade, and BAG3 interacted with PKC in vivo. In addition, we clarified that BAG3 was phosphorylate at Ser187 site in a PKC-dependent manner in vivo. BAG3 has been implicated in multiple cellular functions, including proliferation, differentiation, apoptosis, migration, invasion, macroautophagy and so on. We generated wild-type (WT)-, Ser187Ala (S187A)- or Ser187Asp (S187D)-BAG3 stably expressing FRO cells, and noticed that phosphorylation state of BAG3 influenced FRO morphology. Finally, for the first time, we showed that BAG3 was implicated in epithelial-mesenchymal transition (EMT) procedure, and phosphorylation state at Ser187 site had a critical role in EMT regulation by BAG3. Collectively, the current study indicates that BAG3 is a novel substrate of PKC, and PKC-mediated phosphorylation of BAG3 is implicated in EMT and invasiveness of thyroid cancer cells.","Q05655 (Q05655) is a serine (Ser)/threonine kinase, which regulates numerous cellular processes, including proliferation, differentiation, migration and apoptosis. In the current study, Chinese hamster ovary cells were transfected with either a constitutively activated Q05655 or a dominant negative Q05655, phosphoprotein enrichment, two-dimensional difference gel electrophoresis and mass spectrometry was combined to globally identified candidates of Q05655 cascade. We found that 9531;596 (O95817) was one of the targets of Q05655 cascade, and O95817 interacted with Q05655 in vivo. In addition, we clarified that O95817 was phosphorylate at Ser187 site in a Q05655-dependent manner in vivo. O95817 has been implicated in multiple cellular functions, including proliferation, differentiation, apoptosis, migration, invasion, macroautophagy and so on. We generated wild-type (WT)-, Ser187Ala (S187A)- or Ser187Asp (S187D)-O95817 stably expressing FRO cells, and noticed that phosphorylation state of O95817 influenced FRO morphology. Finally, for the first time, we showed that O95817 was implicated in epithelial-mesenchymal transition (EMT) procedure, and phosphorylation state at Ser187 site had a critical role in EMT regulation by O95817. Collectively, the current study indicates that O95817 is a novel substrate of Q05655, and Q05655-mediated phosphorylation of O95817 is implicated in EMT and invasiveness of thyroid cancer cells.",O95817,Q05655,23108398,phosphorylation,0.983797
12965,"The Dnmt3a DNA methyltransferase is responsible for establishing DNA methylation patterns during mammalian development. We show here that the mouse Dnmt3a DNA methyltransferase is able to transfer the methyl group from S-adenosyl-l-methionine (AdoMet) to a cysteine residue in its catalytic center. This reaction is irreversible and relatively slow. The yield of auto-methylation is increased by addition of Dnmt3L, which functions as a stimulator of Dnmt3a and enhances its AdoMet binding. Auto-methylation was observed in binary Dnmt3a AdoMet complexes. In the presence of CpG containing dsDNA, which is the natural substrate for Dnmt3a, the transfer of the methyl group from AdoMet to the flipped target base was preferred and auto-methylation was not detected. Therefore, this reaction might constitute a regulatory mechanism which could inactivate unused DNA methyltransferases in the cell, or it could simply be an aberrant side reaction caused by the high methyl group transfer potential of AdoMet.","The Q9Y6K1 DNA methyltransferase is responsible for establishing DNA methylation patterns during mammalian development. We show here that the mouse Q9Y6K1 DNA methyltransferase is able to transfer the methyl group from S-adenosyl-l-methionine (AdoMet) to a cysteine residue in its catalytic center. This reaction is irreversible and relatively slow. The yield of auto-methylation is increased by addition of Q9UJW3, which functions as a stimulator of Q9Y6K1 and enhances its AdoMet binding. Auto-methylation was observed in binary Q9Y6K1 AdoMet complexes. In the presence of CpG containing dsDNA, which is the natural substrate for Q9Y6K1, the transfer of the methyl group from AdoMet to the flipped target base was preferred and auto-methylation was not detected. Therefore, this reaction might constitute a regulatory mechanism which could inactivate unused DNA methyltransferases in the cell, or it could simply be an aberrant side reaction caused by the high methyl group transfer potential of AdoMet.",Q9UJW3,Q9Y6K1,21481189,methylation,0.933695
10041,"Proteins are frequently modified by post-translational methylation of lysine residues, catalyzed by S-adenosylmethionine-dependent lysine methyltransferases (KMTs). Lysine methylation of histone proteins has been extensively studied, but it has recently become evident that methylation of non-histone proteins is also abundant and important. The human methyltransferase METTL20 belongs to a group of 10 established and putative human KMTs. We here found METTL20 to be associated with mitochondria and determined that recombinant METTL20 methylated a single protein in extracts from human cells. Using an methyltransferase activity-based purification scheme, we identified the -subunit of the mitochondrially localized electron transfer flavoprotein (ETF) as the substrate of METTL20. Furthermore, METTL20 was found to specifically methylate two adjacent lysine residues, Lys(200) and Lys(203), in ETF both in vitro and in cells. Interestingly, the residues methylated by METTL20 partially overlap with the so-called ""recognition loop"" in ETF, which has been shown to mediate its interaction with various dehydrogenases. Accordingly, we found that METTL20-mediated methylation of ETF in vitro reduced its ability to receive electrons from the medium chain acyl-CoA dehydrogenase and the glutaryl-CoA dehydrogenase. In conclusion, the present study establishes METTL20 as the first human KMT localized to mitochondria and suggests that it may regulate cellular metabolism through modulating the interaction between its substrate ETF and dehydrogenases. Based on the previous naming of similar enzymes, we suggest the renaming of human METTL20 to ETF-KMT.","Proteins are frequently modified by post-translational methylation of lysine residues, catalyzed by S-adenosylmethionine-dependent lysine methyltransferases (KMTs). Lysine methylation of histone proteins has been extensively studied, but it has recently become evident that methylation of non-histone proteins is also abundant and important. The human methyltransferase Q8IXQ9 belongs to a group of 10 established and putative human KMTs. We here found Q8IXQ9 to be associated with mitochondria and determined that recombinant Q8IXQ9 methylated a single protein in extracts from human cells. Using an methyltransferase activity-based purification scheme, we identified the -subunit of the mitochondrially localized electron transfer flavoprotein (ETF) as the substrate of Q8IXQ9. Furthermore, Q8IXQ9 was found to specifically methylate two adjacent lysine residues, Lys(200) and Lys(203), in ETF both in vitro and in cells. Interestingly, the residues methylated by Q8IXQ9 partially overlap with the so-called ""recognition loop"" in ETF, which has been shown to mediate its interaction with various dehydrogenases. Accordingly, we found that Q8IXQ9-mediated methylation of ETF in vitro reduced its ability to receive electrons from the P11310 and the Q92947. In conclusion, the present study establishes Q8IXQ9 as the first human KMT localized to mitochondria and suggests that it may regulate cellular metabolism through modulating the interaction between its substrate ETF and dehydrogenases. Based on the previous naming of similar enzymes, we suggest the renaming of human Q8IXQ9 to ETF-KMT.",Q8IXQ9,Q92947,25416781,methylation,0.917449
12186,"Dimethylation of histone H3 Arg2 (H3R2me2) maintains transcriptional silencing by inhibiting Set1 mediated trimethylation of H3K4. Here we demonstrate that Arg2 is also monomethylated (H3R2me1) in yeast but that its functional characteristics are distinct from H3R2me2: (i) H3R2me1 does not inhibit histone H3 Lys4 (H3K4) methylation; (ii) it is present throughout the coding region of genes; and (iii) it correlates with active transcription. Collectively, these results indicate that different H3R2 methylation states have defined roles in gene expression.","Dimethylation of histone H3 P78540 (H3R2me2) maintains transcriptional silencing by inhibiting O15047 mediated trimethylation of H3K4. Here we demonstrate that P78540 is also monomethylated (H3R2me1) in yeast but that its functional characteristics are distinct from H3R2me2: (i) H3R2me1 does not inhibit histone H3 Lys4 (H3K4) methylation; (ii) it is present throughout the coding region of genes; and (iii) it correlates with active transcription. Collectively, these results indicate that different H3R2 methylation states have defined roles in gene expression.",O15047,P78540,19270702,methylation,0.874299
5037,"RUNX1 (previously termed AML1) is a frequent target of human leukaemia-associated gene aberrations, and it encodes the DNA-binding subunit of the Core-Binding Factor transcription factor complex. RUNX1 expression is essential for the initiation of definitive haematopoiesis, for steady-state thrombopoiesis, and for normal lymphocytes development. Recent studies revealed that protein arginine methyltransferase 1 (PRMT1), which accounts for the majority of the type I PRMT activity in cells, methylates two arginine residues in RUNX1 (R206 and R210), and these modifications inhibit corepressor-binding to RUNX1 thereby enhancing its transcriptional activity. In order to elucidate the biological significance of these methylations, we established novel knock-in mouse lines with non-methylable, double arginine-to-lysine (RTAMR-to-KTAMK) mutations in RUNX1. Homozygous Runx1(KTAMK) (/) (KTAMK) mice are born alive and appear normal during adulthood. However, Runx1(KTAMK) (/) (KTAMK) mice showed a reduction in CD3(+) T lymphoid cells and a decrease in CD4(+) T cells in peripheral lymphoid organs, in comparison to their wild-type littermates, leading to a reduction in the CD4(+) to CD8(+) T-cell ratio. These findings suggest that arginine-methylation of RUNX1 in the RTAMR-motif is dispensable for the development of definitive haematopoiesis and for steady-state platelet production, however this modification affects the role of RUNX1 in the maintenance of the peripheral CD4(+) T-cell population.","Q01196 (previously termed Q01196) is a frequent target of human leukaemia-associated gene aberrations, and it encodes the DNA-binding subunit of the Core-Binding Factor transcription factor complex. Q01196 expression is essential for the initiation of definitive haematopoiesis, for steady-state thrombopoiesis, and for normal lymphocytes development. Recent studies revealed that Q99873 (Q99873), which accounts for the majority of the type I PRMT activity in cells, methylates two arginine residues in Q01196 (R206 and R210), and these modifications inhibit corepressor-binding to Q01196 thereby enhancing its transcriptional activity. In order to elucidate the biological significance of these methylations, we established novel knock-in mouse lines with non-methylable, double arginine-to-lysine (RTAMR-to-KTAMK) mutations in Q01196. Homozygous Q01196(KTAMK) (/) (KTAMK) mice are born alive and appear normal during adulthood. However, Q01196(KTAMK) (/) (KTAMK) mice showed a reduction in CD3(+) T lymphoid cells and a decrease in P01730(+) T cells in peripheral lymphoid organs, in comparison to their wild-type littermates, leading to a reduction in the P01730(+) to P01732(+) T-cell ratio. These findings suggest that arginine-methylation of Q01196 in the RTAMR-motif is dispensable for the development of definitive haematopoiesis and for steady-state platelet production, however this modification affects the role of Q01196 in the maintenance of the peripheral P01730(+) T-cell population.",Q01196,Q99873,26010396,methylation,0.929019
