In [2]:
import pandas as pd
import os

In [3]:
s3_prefix = "s3://aegovan-data/pubmed_asbtract/predictions_multi_00/"
s3_data ="s3://aegovan-data/human_output/human_interactions_ppi_v2.json"

In [4]:
local_temp = "temp"
local_temp_pred_dir = os.path.join( local_temp, "pred_results")
local_temp_wk_dir = os.path.join( local_temp, "wk")

In [None]:
!rm -rf $local_temp
!mkdir -p $local_temp_pred_dir
!mkdir -p $local_temp_wk_dir

In [None]:
#!aws s3 cp s3://aegovan-data/pubmed_asbtract/predictions_multi_95/pubmed19n0538.json.txt.json.prediction.json .

In [18]:
import boto3
import glob
from multiprocessing.dummy import Pool as ThreadPool
import argparse
import datetime 
import os


def uploadfile(localpath, s3path):
        """
Uploads a file to s3
        :param localpath: The local path
        :param s3path: The s3 path in format s3://mybucket/mydir/mysample.txt
        """

        bucket, key = get_bucketname_key(s3path)

        if key.endswith("/"):
            key = "{}{}".format(key, os.path.basename(localpath))
        
        s3 = boto3.client('s3')
        
        s3.upload_file(localpath, bucket, key)

def get_bucketname_key(uripath):
    assert uripath.startswith("s3://")

    path_without_scheme = uripath[5:]
    bucket_end_index = path_without_scheme.find("/")

    bucket_name = path_without_scheme
    key = "/"
    if bucket_end_index > -1:
        bucket_name = path_without_scheme[0:bucket_end_index]
        key = path_without_scheme[bucket_end_index + 1:]

    return bucket_name, key


def download_file(s3path, local_dir):
    bucket, key = get_bucketname_key(s3path)
    
    s3 = boto3.client('s3')
    
    local_file = os.path.join(local_dir, s3path.split("/")[-1])
    

    s3.download_file(bucket, key, local_file)
    
def download_object(s3path):
    bucket, key = get_bucketname_key(s3path)
    
    s3 = boto3.client('s3')    

    s3_response_object = s3.get_object(Bucket=bucket, Key=key)
    object_content = s3_response_object['Body'].read()
    
    return len(object_content)



def list_files(s3path_prefix):
    assert s3path_prefix.startswith("s3://")
    assert s3path_prefix.endswith("/")
    
    bucket, key = get_bucketname_key(s3path_prefix)
    
   
   
    s3 = boto3.resource('s3')
    
    bucket = s3.Bucket(name=bucket)

    return ( (o.bucket_name, o.key) for o in bucket.objects.filter(Prefix=key))





def upload_files(local_dir, s3_prefix, num_threads=20):    
    input_tuples = ( (f,  s3_prefix) for f in glob.glob("{}/*".format(local_dir)))
    
    with ThreadPool(num_threads) as pool:
        pool.starmap(uploadfile, input_tuples)
    


def download_files(s3_prefix, local_dir, num_threads=20):    
    input_tuples = ( ("s3://{}/{}".format(s3_bucket,s3_key),  local_dir) for s3_bucket, s3_key in list_files(s3_prefix))
    
    with ThreadPool(num_threads) as pool:
        results = pool.starmap(download_file, input_tuples)
        
        

def download_objects(s3_prefix, num_threads=20):    
    s3_files = ( "s3://{}/{}".format(s3_bucket,s3_key) for s3_bucket, s3_key in list_files(s3_prefix))
    
    with ThreadPool(num_threads) as pool:
        results = pool.map(download_object, s3_files)
        
    return sum(results)/1024
        

def get_directory_size(start_path):
    total_size = 0
    for dirpath, dirnames, filenames in os.walk(start_path):
        for f in filenames:
            fp = os.path.join(dirpath, f)
            # skip if it is symbolic link
            if not os.path.islink(fp):
                total_size += os.path.getsize(fp)
    return total_size

def get_s3file_size(bucket, key):
    s3 = boto3.client('s3')
    response = s3.head_object(Bucket=bucket, Key=key)
    size = response['ContentLength']
    return size
    
def download_files_min_files(s3_prefix, local_dir, min_file_size=310, num_threads=20):    
    input_tuples = ( ("s3://{}/{}".format(s3_bucket,s3_key),  local_dir) for s3_bucket, s3_key in list_files(s3_prefix) if get_s3file_size(s3_bucket, s3_key) > min_file_size )
    
    with ThreadPool(num_threads) as pool:
        results = pool.starmap(download_file, input_tuples)
        


In [None]:
%%time

download_files(s3_prefix, local_temp_pred_dir)

In [5]:
!ls -l $local_temp_dir | wc -l

30


In [38]:
threshold_config = {
    "acetylation" : 0.83,
    "deubiquitination" :0.50,
    "methylation" :.85,
    "phosphorylation" : .98,
    "demethylation" :0.0,
    "dephosphorylation" :.85,
    "ubiquitination":0.3
}

In [61]:
%%time 

full_df = None
total_counts = {}
for f in os.listdir(local_temp_pred_dir):
    df = pd.read_json(os.path.join(local_temp_pred_dir, f), orient="records", lines=True )
    
    count_dict = df.groupby(["predicted"])["predicted"].count().to_dict()
    min_dict = df.groupby(["predicted"])["predicted_confidence"].min().to_dict()
    
    for k in count_dict:
        if k not in total_counts:
            total_counts[k] = {}
        total_counts[k]["count"] = total_counts[k].get("count", 0) + count_dict[k]
        total_counts[k]["min"] = min(total_counts[k].get("min", 1.0) , min_dict[k])



    # Filter below threshold items
    high_quality_frames = []
    for k,t in threshold_config.items():
        high_quality_frames.append(df.query("predicted == '{}' and predicted_confidence > {}".format(k, t)))
        
    high_quality_df = pd.concat(high_quality_frames)
    
    
    
    if full_df is None:
        full_df = high_quality_df
    else:
        full_df = pd.concat([high_quality_df, full_df])
        
    
    


CPU times: user 5min 48s, sys: 1min 25s, total: 7min 14s
Wall time: 8min 44s


In [None]:
total_counts

In [57]:
full_df.groupby(["predicted"])["predicted"].count().to_dict()

{'acetylation': 16,
 'demethylation': 1,
 'dephosphorylation': 180,
 'deubiquitination': 92,
 'methylation': 1909,
 'phosphorylation': 1252,
 'ubiquitination': 36}

In [55]:
full_df.groupby(["predicted"])["predicted_confidence"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
predicted,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
acetylation,16.0,0.853635,0.01794,0.834194,0.841396,0.846593,0.860633,0.90637
demethylation,1.0,0.377643,,0.377643,0.377643,0.377643,0.377643,0.377643
dephosphorylation,180.0,0.898379,0.031899,0.850141,0.871749,0.892653,0.921433,0.962255
deubiquitination,92.0,0.560631,0.044044,0.50008,0.522097,0.547255,0.592547,0.695426
methylation,1909.0,0.888543,0.023966,0.850032,0.868216,0.886418,0.907715,0.946525
phosphorylation,1252.0,0.98418,0.002718,0.98001,0.981914,0.983852,0.986243,0.991165
ubiquitination,36.0,0.315345,0.01199,0.300192,0.30499,0.313962,0.319787,0.347559


In [42]:
full_df.shape

(3486, 16)

In [43]:
download_file(s3_data, local_temp_wk_dir)



In [44]:
data_file = os.path.join(local_temp_wk_dir, s3_data.split("/")[-1])
data_training_full_df = pd.read_json(data_file)

In [45]:
data_training_full_df.shape

(3381, 7)

In [46]:
full_df.shape

(3486, 16)

In [47]:
data_training_full_df.head(n=2)

Unnamed: 0,interactionId,interactionType,isNegative,participants,pubmedId,pubmedTitle,pubmedabstract
0,1585513,phosphorylation,False,"[{'uniprotid': 'Q10728', 'alias': [['mypt1_rat'], ['Protein phosphatase 1 regulatory subunit 12A'], ['Ppp1r12a'], ['Mbs'], ['Mypt1'], ['Myosin phosphatase-targeting subunit 1'], ['Protein phosphatase myosin-binding subunit'], ['MBSP'], ['Serine/threonine protein phosphatase PP1 smooth muscle regulatory subunit M110'], ['Protein phosphatase subunit 1M']], 'alternative_uniprots': ['Q62937', 'Q9WU33']}, {'uniprotid': 'O43293-2', 'alias': [['o43293-2'], ['Death-associated protein kinase 3'], ['ZIPK-S'], ['DAPK3'], ['ZIPK'], ['DAP-like kinase'], ['MYPT1 kinase'], ['Zipper-interacting protein kinase']], 'alternative_uniprots': []}]",17126281,,"Zipper-interacting protein kinase (ZIP kinase) has been thought to be involved in apoptosis and the C-terminal leucine zipper motif is important for its function. Recent studies have revealed that ZIP kinase also plays a role in regulating myosin phosphorylation. Here, we found novel ZIP kinase isoform in which the C-terminal non-kinase domain containing a leucine zipper is eliminated (hZIPK-S). hZIPK-S binds to myosin phosphatase targeting subunit 1(MYPT1) similar to the long isoform (hZIPK-L). In addition, we found that hZIPK-S as well as hZIPK-L bind to myosin. These results indicate that a leucine zipper is not critical for the binding of ZIP kinase to MYPT1 and myosin. Consistently, hZIPK-S localized with stress-fibers where they co-localized with myosin. The residues 278-311, the C-terminal side of the kinase domain common to the both isoforms, is involved in the binding to MYPT1, while the myosin binding domain is within the kinase domain. These results suggest that the newly found hZIPK-S as well as the long isoform play an important role in the regulation of myosin phosphorylation."
1,1585516,phosphorylation,False,"[{'uniprotid': 'O43293-1', 'alias': [['o43293-1'], ['Death-associated protein kinase 3'], ['ZIPK-L'], ['DAPK3'], ['ZIPK'], ['DAP-like kinase'], ['MYPT1 kinase'], ['Zipper-interacting protein kinase']], 'alternative_uniprots': []}, {'uniprotid': 'Q10728', 'alias': [['mypt1_rat'], ['Protein phosphatase 1 regulatory subunit 12A'], ['Ppp1r12a'], ['Mbs'], ['Mypt1'], ['Myosin phosphatase-targeting subunit 1'], ['Protein phosphatase myosin-binding subunit'], ['MBSP'], ['Serine/threonine protein phosphatase PP1 smooth muscle regulatory subunit M110'], ['Protein phosphatase subunit 1M']], 'alternative_uniprots': ['Q62937', 'Q9WU33']}]",17126281,,"Zipper-interacting protein kinase (ZIP kinase) has been thought to be involved in apoptosis and the C-terminal leucine zipper motif is important for its function. Recent studies have revealed that ZIP kinase also plays a role in regulating myosin phosphorylation. Here, we found novel ZIP kinase isoform in which the C-terminal non-kinase domain containing a leucine zipper is eliminated (hZIPK-S). hZIPK-S binds to myosin phosphatase targeting subunit 1(MYPT1) similar to the long isoform (hZIPK-L). In addition, we found that hZIPK-S as well as hZIPK-L bind to myosin. These results indicate that a leucine zipper is not critical for the binding of ZIP kinase to MYPT1 and myosin. Consistently, hZIPK-S localized with stress-fibers where they co-localized with myosin. The residues 278-311, the C-terminal side of the kinase domain common to the both isoforms, is involved in the binding to MYPT1, while the myosin binding domain is within the kinase domain. These results suggest that the newly found hZIPK-S as well as the long isoform play an important role in the regulation of myosin phosphorylation."


In [48]:
full_df.head(n=2)

Unnamed: 0,abstract,acetylation,confidence_scores,demethylation,dephosphorylation,deubiquitination,methylation,normalised_abstract,other,participant1Id,participant2Id,phosphorylation,predicted,predicted_confidence,pubmedId,ubiquitination
395,"Fragile X syndrome is one of the most frequent causes of hereditary mental retardation. In the past, its diagnosis depended primarily on cytogenetic demonstration of chromosome fragile site Xq27.3. Recently, the gene FMR-1 has been found responsible for this disease. Here a combined method was used to study fragile X syndrome. A fragment (pP1fr) of DNA was subcloned from pE5.1 by polymerase chain reaction. With this probe, DNA samples from two cytogenetically proved families were analyzed by restriction fragment length polymorphisms. It was demonstrated that EcoRI polymorphism was an easy and accurate method for diagnosis of the fragile X syndrome. To study methylation status of patients, another methylation-sensitive enzyme, BssHII, could be used together with EcoRI. The PstI polymorphism of one family was also studied and showed one kb fragment as normal, and detected more precise changes in length. Prominent mosaicism necessary was characteristic in PstI polymorphism. The DNA diagnosis of fragile X syndrome was a reliable method.",0.80431,"{'acetylation': -0.21777061220000002, 'demethylation': -0.4751224399, 'dephosphorylation': -1.4052172422, 'deubiquitination': -0.6653347515, 'methylation': 3.5257004499, 'other': 0.5619400702, 'phosphorylation': -0.8057110734, 'ubiquitination': -0.7540013894}",0.621809,0.245314,0.514101,33.977565,"Fragile X syndrome is one of the most frequent causes of hereditary mental retardation. In the past, its diagnosis depended primarily on cytogenetic demonstration of chromosome fragile site Xq27.3. Recently, the gene Q06787 has been found responsible for this disease. Here a combined method was used to study fragile X syndrome. A fragment (pP1fr) of DNA was subcloned from pE5.1 by polymerase chain reaction. With this probe, DNA samples from two cytogenetically proved families were analyzed by restriction fragment length polymorphisms. It was demonstrated that EcoRI polymorphism was an easy and accurate method for diagnosis of the fragile X syndrome. To study methylation status of patients, another methylation-sensitive enzyme, BssHII, could be used together with EcoRI. The P00995 polymorphism of one family was also studied and showed one kb fragment as normal, and detected more precise changes in length. Prominent mosaicism necessary was characteristic in P00995 polymorphism. The DNA diagnosis of fragile X syndrome was a reliable method.",1.754072,P00995,Q06787,0.44677,methylation,0.874934,8372665,0.47048
2783,"The experiments reported here document that the tumor suppressor retinoblastoma protein (pRB) plays an important role in the production and maintenance of the terminally differentiated phenotype of muscle cells. We show that pRB inactivation, through either phosphorylation, binding to T antigen, or genetic alteration, inhibits myogenesis. Moreover, inactivation of pRB in terminally differentiated cells allows them to reenter the cell cycle. In addition to its involvement in the myogenic activities of MyoD, pRB is also required for the cell growth-inhibitory activity of this myogenic factor. We also show that pRB and MyoD directly bind to each other, both in vivo and in vitro, through a region that involves the pocket and the basic-helix-loop-helix domains, respectively. All the results obtained are consistent with the proposal that the effects of MyoD on the cell cycle and of pRB on the myogenic pathway result from the direct binding of the two molecules.",0.286923,"{'acetylation': -1.2485398501, 'demethylation': -1.7055489540000002, 'dephosphorylation': -1.0236738801, 'deubiquitination': -1.378437835, 'methylation': -1.4621940672, 'other': 1.4431860685, 'phosphorylation': 5.7318912506, 'ubiquitination': -1.392804879}",0.181673,0.359273,0.251972,0.231727,"The experiments reported here document that the tumor suppressor retinoblastoma protein (P06400) plays an important role in the production and maintenance of the terminally differentiated phenotype of muscle cells. We show that P06400 inactivation, through either phosphorylation, binding to T antigen, or genetic alteration, inhibits myogenesis. Moreover, inactivation of P06400 in terminally differentiated cells allows them to reenter the cell cycle. In addition to its involvement in the myogenic activities of P15172, P06400 is also required for the cell growth-inhibitory activity of this myogenic factor. We also show that P06400 and P15172 directly bind to each other, both in vivo and in vitro, through a region that involves the pocket and the basic-helix-loop-helix domains, respectively. All the results obtained are consistent with the proposal that the effects of P15172 on the cell cycle and of P06400 on the myogenic pathway result from the direct binding of the two molecules.",4.234165,P06400,P15172,308.552267,phosphorylation,0.981568,8381715,0.248378


In [49]:
full_df[~full_df.pubmedId.isin(data_training_full_df.pubmedId)].shape

(3253, 16)

In [50]:
full_df["PubmedInTrainingData"] = full_df.pubmedId.isin(data_training_full_df.pubmedId)

In [51]:

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 10000)
pd.set_option('display.max_rows', 100)

import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 12})

In [52]:
full_df.to_csv("predictions.csv", index=False, header=True)

In [54]:
full_df[["abstract", "normalised_abstract", "participant1Id","participant2Id", "pubmedId", "predicted" ,"predicted_confidence" ]].sample(n=20)

Unnamed: 0,abstract,normalised_abstract,participant1Id,participant2Id,pubmedId,predicted,predicted_confidence
20286,"The mitotic cell cycle can be described as an alternation between two states. During mitosis, MPF (mitosis promoting factor) is high and keeps inactive its numerous molecular antagonists. In interphase, MPF is inactivated, and the antagonists prevail. The transition between the two states is ensured by 'helper' molecules that favor one state over the other. It has long been assumed that active MPF (a dimer of cyclin B and cyclin-dependent kinase 1) induces exit from mitosis by activating APC:Cdc20, a ubiquitin ligase responsible for cyclin B degradation. The molecular details have not been fully worked out yet, but recent results show that MPF and the ubiquitin ligase are not involved in a simple negative feedback loop. While it is proven that MPF activates APC, new data suggest that MPF inhibits Cdc20, i.e., that MPF and Cdc20 are antagonists. We introduce this new idea into a published model for cell cycle regulation in Xenopus laevis, and study its dynamical behavior. We show that the new wiring permits oscillations with a simpler and smaller network than previously envisaged and that the antagonism between MPF and Cdc20 suggests a new interpretation of the spindle checkpoint.","The mitotic cell cycle can be described as an alternation between two states. During mitosis, Q13421 (Q13421) is high and keeps inactive its numerous molecular antagonists. In interphase, Q13421 is inactivated, and the antagonists prevail. The transition between the two states is ensured by 'helper' molecules that favor one state over the other. It has long been assumed that active Q13421 (a dimer of cyclin B and P06493) induces exit from mitosis by activating APC:Q12834, a ubiquitin ligase responsible for cyclin B degradation. The molecular details have not been fully worked out yet, but recent results show that Q13421 and the ubiquitin ligase are not involved in a simple negative feedback loop. While it is proven that Q13421 activates APC, new data suggest that Q13421 inhibits Q12834, i.e., that Q13421 and Q12834 are antagonists. We introduce this new idea into a published model for cell cycle regulation in Xenopus laevis, and study its dynamical behavior. We show that the new wiring permits oscillations with a simpler and smaller network than previously envisaged and that the antagonism between Q13421 and Q12834 suggests a new interpretation of the spindle checkpoint.",Q12834,Q13421,15970669,ubiquitination,0.305353
895,"The retinoblastoma binding protein KDM5A removes methyl marks from lysine 4 of histone H3 (H3K4). Misregulation of KDM5A contributes to the pathogenesis of lung and gastric cancers. In addition to its catalytic jumonji C domain, KDM5A contains three PHD reader domains, commonly recognized as chromatin recruitment modules. It is unknown whether any of these domains in KDM5A have functions beyond recruitment and whether they regulate the catalytic activity of the demethylase. Here using biochemical and nuclear magnetic resonance (NMR)-based structural studies, we show that the PHD1 preferentially recognizes unmethylated H3K4 histone tail, product of KDM5A-mediated demethylation of tri-methylated H3K4 (H3K4me3). Binding of unmodified H3 peptide to the PHD1 stimulates catalytic domain-mediated removal of methyl marks from H3K4me3 peptide and nucleosome substrates. This positive-feedback mechanism--enabled by the functional coupling between a reader and a catalytic domain in KDM5A--suggests a model for the spread of demethylation on chromatin.","The retinoblastoma binding protein P29375 removes methyl marks from lysine 4 of histone H3 (H3K4). Misregulation of P29375 contributes to the pathogenesis of lung and gastric cancers. In addition to its catalytic jumonji C domain, P29375 contains three PHD reader domains, commonly recognized as chromatin recruitment modules. It is unknown whether any of these domains in P29375 have functions beyond recruitment and whether they regulate the catalytic activity of the demethylase. Here using biochemical and nuclear magnetic resonance (NMR)-based structural studies, we show that the Q96KS0 preferentially recognizes unmethylated H3K4 histone tail, product of P29375-mediated demethylation of tri-methylated H3K4 (H3K4me3). Binding of unmodified H3 peptide to the Q96KS0 stimulates catalytic domain-mediated removal of methyl marks from H3K4me3 peptide and nucleosome substrates. This positive-feedback mechanism--enabled by the functional coupling between a reader and a catalytic domain in P29375--suggests a model for the spread of demethylation on chromatin.",P29375,Q96KS0,25686748,methylation,0.866325
1462,"Protein arginine methylation has been recently identified as an important form of post-translational modification (PTM). It is carried out by the protein arginine methyltransferase (PRMT) family of enzymes, which in mammals consists of nine members. Among them, PRMT1 is the major arginine methyltransferase and participates in transcription, signal transduction, development and cancer. The function of PRMT1 in craniofacial development remains unclear. We generated Wnt1-Cre;Prmt1","Protein arginine methylation has been recently identified as an important form of post-translational modification (PTM). It is carried out by the protein arginine methyltransferase (PRMT) family of enzymes, which in mammals consists of nine members. Among them, Q99873 is the major arginine methyltransferase and participates in transcription, signal transduction, development and cancer. The function of Q99873 in craniofacial development remains unclear. We generated P04628-Cre;Prmt1",P04628,Q99873,29727702,methylation,0.909696
2895,"MDM2 is an E3 ubiquitin ligase which mediates ubiquitylation and proteasome-dependent degradation of the p53 tumor suppressor protein. Phosphorylation of MDM2 by the protein kinase AKT is thought to regulate MDM2 function in response to survival signals, but there has been uncertainty concerning the identity of the sites phosphorylated by AKT. In the present study, we identify Ser-166, a site previously reported as an AKT target, and Ser-188, a novel site which is the major site of phosphorylation of MDM2 by AKT in vitro. Analysis of MDM2 in cultured cells confirms that Ser-166 and Ser-188 are phosphorylated by AKT in a physiological context.","Q00987 is an E3 ubiquitin ligase which mediates ubiquitylation and proteasome-dependent degradation of the P04637 tumor suppressor protein. Phosphorylation of Q00987 by the protein kinase P31749 is thought to regulate Q00987 function in response to survival signals, but there has been uncertainty concerning the identity of the sites phosphorylated by P31749. In the present study, we identify Ser-166, a site previously reported as an P31749 target, and Ser-188, a novel site which is the major site of phosphorylation of Q00987 by P31749 in vitro. Analysis of Q00987 in cultured cells confirms that Ser-166 and Ser-188 are phosphorylated by P31749 in a physiological context.",P31749,Q00987,15527798,phosphorylation,0.988527
4929,"Homeobox (HOX) transcript antisense RNA (HOTAIR), a long intergenic noncoding RNA (lincRNA), has been reported to play an oncogenic role in various cancers including small cell lung cancer (SCLC). However, it is not known whether HOTAIR can modulate chemoresistance in SCLC. The aim of this study is to investigate the roles of HOTAIR in chemoresistance of SCLC and its possible molecular mechanism. Knockdown of HOTAIR was carried out in SCLC multidrug-resistant cell lines (H69AR and H446AR) and the parental cell lines (H69 and H446) to assess its influence on chemoresistance. The results showed that downregulation of HOTAIR increased cell sensitivity to anticancer drugs through increasing cell apoptosis and cell cycle arrest, and suppressed tumor growth in vivo. Moreover, HOXA1 methylation increased in the resistant cells using bisulfite sequencing PCR. Depletion of HOTAIR reduced HOXA1 methylation by decreasing DNMT1 and DNMT3b expression. The interaction between HOTAIR and HOXA1 was validated by RNA immunoprecipitation. Taken together, our study suggested that HOTAIR mediates chemoresistance of SCLC by regulating HOXA1 methylation and could be utilized as a potential target for new adjuvant therapies against chemoresistance.","Homeobox (HOX) transcript antisense RNA (100124700), a long intergenic noncoding RNA (lincRNA), has been reported to play an oncogenic role in various cancers including small cell lung cancer (7864). However, it is not known whether 100124700 can modulate chemoresistance in 7864. The aim of this study is to investigate the roles of 100124700 in chemoresistance of 7864 and its possible molecular mechanism. Knockdown of 100124700 was carried out in 7864 multidrug-resistant cell lines (H69AR and H446AR) and the parental cell lines (H69 and H446) to assess its influence on chemoresistance. The results showed that downregulation of 100124700 increased cell sensitivity to anticancer drugs through increasing cell apoptosis and cell cycle arrest, and suppressed tumor growth in vivo. Moreover, P49639 methylation increased in the resistant cells using bisulfite sequencing PCR. Depletion of 100124700 reduced P49639 methylation by decreasing P26358 and Q9UBC3 expression. The interaction between 100124700 and P49639 was validated by RNA immunoprecipitation. Taken together, our study suggested that 100124700 mediates chemoresistance of 7864 by regulating P49639 methylation and could be utilized as a potential target for new adjuvant therapies against chemoresistance.",100124700,P49639,26707824,methylation,0.914474
12847,"Here we demonstrate that the dual leucine zipper kinase (DLK) promotes robust regeneration of peripheral axons after nerve injury in mice. Peripheral axon regeneration is accelerated by prior injury; however, DLK KO neurons do not respond to a preconditioning lesion with enhanced regeneration invivo or invitro. Assays for activation of transcription factors in injury-induced proregenerative pathways reveal that loss of DLK abolishes upregulation of p-STAT3 and p-cJun in the cell body after axonal injury. DLK is not required for the phosphorylation of STAT3 at the site of nerve injury but is necessary for retrograde transport of p-STAT3 to the cell body. These data demonstrate that DLK enhances regeneration by promoting a retrograde injury signal that is required for the activation of the neuronal proregenerative program.","Here we demonstrate that the Q12852 (Q12852) promotes robust regeneration of peripheral axons after nerve injury in mice. Peripheral axon regeneration is accelerated by prior injury; however, Q12852 KO neurons do not respond to a preconditioning lesion with enhanced regeneration invivo or invitro. Assays for activation of transcription factors in injury-induced proregenerative pathways reveal that loss of Q12852 abolishes upregulation of p-P40763 and p-cJun in the cell body after axonal injury. Q12852 is not required for the phosphorylation of P40763 at the site of nerve injury but is necessary for retrograde transport of p-P40763 to the cell body. These data demonstrate that Q12852 enhances regeneration by promoting a retrograde injury signal that is required for the activation of the neuronal proregenerative program.",P40763,Q12852,22726832,phosphorylation,0.983138
2687,"MTA2 is a member of metastasis associated family, which is highly expressed in several solid tumors and associated with tumor cells migration and invasion. Here, we report that MTA2 is acetylated at K152 and histone acetyltransferase p300 binds to and acetylates MTA2. Furthermore, mutation of the MTA2 acetylation site inhibits the growth of colorectal cancer cells and migration and invasion of Rat1 fibroblasts. These results reveal a novel post-translational regulation of MTA2 by the way of p300-dependent acetylation, which is important for tumor cells growth and migration and provides a potential target for clinical cancer research.","O94776 is a member of metastasis associated family, which is highly expressed in several solid tumors and associated with tumor cells migration and invasion. Here, we report that O94776 is acetylated at K152 and histone acetyltransferase Q09472 binds to and acetylates O94776. Furthermore, mutation of the O94776 acetylation site inhibits the growth of colorectal cancer cells and migration and invasion of Rat1 fibroblasts. These results reveal a novel post-translational regulation of O94776 by the way of Q09472-dependent acetylation, which is important for tumor cells growth and migration and provides a potential target for clinical cancer research.",O94776,Q09472,24468085,acetylation,0.834194
147,"Insulin has both short- and long-term effects on cellular metabolism. The short-term effects are known to involve the insulin receptor, a protein kinase capable of phosphorylating itself and other proteins. The role of the receptor was elucidated by studies of a mutant insulin receptor which lacked kinase activity and inhibited several actions of insulin. The long-term effects of insulin could be demonstrated by its growth-promoting effect on hepatoma cells, and by the suppression in transfected hepatoma cells of hepatitis B virus antigen production in a dose-dependent manner. The process whereby insulin appears to regulate gene expression is not clearly understood.","P01308 has both short- and long-term effects on cellular metabolism. The short-term effects are known to involve the P06213, a protein kinase capable of phosphorylating itself and other proteins. The role of the receptor was elucidated by studies of a mutant P06213 which lacked kinase activity and inhibited several actions of P01308. The long-term effects of P01308 could be demonstrated by its growth-promoting effect on hepatoma cells, and by the suppression in transfected hepatoma cells of hepatitis B virus antigen production in a dose-dependent manner. The process whereby P01308 appears to regulate gene expression is not clearly understood.",P01308,P06213,2184955,phosphorylation,0.982965
11845,"Protein arginine methyltransferase 5 (PRMT5) has been implicated as a key modulator of lymphomagenesis. Whether PRMT5 has overt oncogenic function in the context of leukemia/lymphoma and whether it represents a therapeutic target remains to be established. We demonstrate that inactivation of PRMT5 inhibits colony-forming activity by multiple oncogenic drivers, including cyclin D1, c-MYC, NOTCH1, and MLL-AF9. Furthermore, we demonstrate that PRMT5 overexpression specifically cooperates with cyclin D1 to drive lymphomagenesis in a mouse model, revealing inherent neoplastic activity. Molecular analysis of lymphomas revealed that arginine methylation of p53 selectively suppresses expression of crucial proapoptotic and antiproliferative target genes, thereby sustaining tumor cell self-renewal and proliferation and bypassing the need for the acquisition of inactivating p53 mutations. Critically, analysis of human tumor specimens reveals a strong correlation between cyclin D1 overexpression and p53 methylation, supporting the biomedical relevance of this pathway.","O14744 (O14744) has been implicated as a key modulator of lymphomagenesis. Whether O14744 has overt oncogenic function in the context of leukemia/lymphoma and whether it represents a therapeutic target remains to be established. We demonstrate that inactivation of O14744 inhibits colony-forming activity by multiple oncogenic drivers, including P24385, P01106, P46531, and Q03164. Furthermore, we demonstrate that O14744 overexpression specifically cooperates with P24385 to drive lymphomagenesis in a mouse model, revealing inherent neoplastic activity. Molecular analysis of lymphomas revealed that arginine methylation of P04637 selectively suppresses expression of crucial proapoptotic and antiproliferative target genes, thereby sustaining tumor cell self-renewal and proliferation and bypassing the need for the acquisition of inactivating P04637 mutations. Critically, analysis of human tumor specimens reveals a strong correlation between P24385 overexpression and P04637 methylation, supporting the biomedical relevance of this pathway.",O14744,P04637,25582697,methylation,0.901107
1617,"Gastric cancer (GC) is one of the most common malignant tumors in the world and microRNAs (miRNAs) play an important role in GC. In this study, we found miR497 played an important role and served as a novel biomarker in GC. Quantitative real-time PCR (qRT-PCR) was used to measure the miR497 expression in GC cell lines and 86paired GC samples and we also analyzed its correlation with GC clinicopathological parameters. A series of cellular function experiments were applied to validate the effects of miR497 on GC. In addition, methylation-specific PCR (MSP) was applied to detect the gene methylation status. Finally, the correlation between miR497 and the target gene was analyzed by western blotting assay. miR497 was reduced obviously in GC cells and tissues and significantly associated with the pathologic stage. Low expression of miR497 significantly inhibited the proliferation, invasion and migration of GC cell lines and accelerated apoptosis. Moreover, we found that the aberrant expression of miR497 may be ascribed to DNA methylation. microRNA.org and luciferase reporter assay suggested that RAF1 was a direct target of miR497 in GC. This study suggested that miR497 could serve as a tumor suppressor and a potential early diagnostic marker of GC by targeting Raf-1 proto-oncogene, serine/threonine kinase (RAF1).","Gastric cancer (GC) is one of the most common malignant tumors in the world and microRNAs (miRNAs) play an important role in GC. In this study, we found 574456 played an important role and served as a novel biomarker in GC. Quantitative real-time PCR (qRT-PCR) was used to measure the 574456 expression in GC cell lines and 86paired GC samples and we also analyzed its correlation with GC clinicopathological parameters. A series of cellular function experiments were applied to validate the effects of 574456 on GC. In addition, methylation-specific PCR (MSP) was applied to detect the gene methylation status. Finally, the correlation between 574456 and the target gene was analyzed by western blotting assay. 574456 was reduced obviously in GC cells and tissues and significantly associated with the pathologic stage. Low expression of 574456 significantly inhibited the proliferation, invasion and migration of GC cell lines and accelerated apoptosis. Moreover, we found that the aberrant expression of 574456 may be ascribed to DNA methylation. microRNA.org and luciferase reporter assay suggested that P04049 was a direct target of 574456 in GC. This study suggested that 574456 could serve as a tumor suppressor and a potential early diagnostic marker of GC by targeting P04049 proto-oncogene, serine/threonine kinase (P04049).",574456,P04049,28586056,methylation,0.885787
