In [1]:
import numpy as np
import pandas as pd

from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score, auc
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import classification_report
from collections import Counter
import re
from tqdm import trange

In [2]:
database_data = pd.read_csv('database_file汇总.csv').drop_duplicates()
database_data

Unnamed: 0,miRNA,miRBase Update ID,disease,Cancer Subtype
0,hsa-miR-106a,hsa-miR-106a-5p,lymphoma,B cell lymphoma
1,hsa-let-7g,hsa-let-7g-5p,lymphoma,B cell lymphoma
2,hsa-miR-17-5p,hsa-miR-17-5p,lymphoma,B cell lymphoma
3,hsa-let-7f,hsa-let-7f-5p,lymphoma,B cell lymphoma
4,hsa-let-7a,hsa-let-7a-5p,lymphoma,B cell lymphoma
...,...,...,...,...
98742,hsa-mir-34a,,Glioblastoma,
98747,hsa-mir-15b,,Stomach Neoplasms,
98748,hsa-mir-16-1,,Stomach Neoplasms,
98749,hsa-mir-16-2,,Stomach Neoplasms,


In [3]:
def new_MDA(results_df, task, balanced = True):
    new_MDA = results_df[results_df['y_true'] == 0][results_df['y_pred'] == 1]
    if balanced:
        new_MDA.to_csv(task + '_balanced_new_MDA.csv')
    else:
        new_MDA.to_csv(task + '_unbalanced_new_MDA.csv')
        
    new_MDA_simplied = new_MDA[['miRNA_y', 'disease_y', 'y_prob']]

    temp = []
    for item in new_MDA_simplied['disease_y'].tolist():
        item = item.replace("['",'').replace("']", "")
        temp.append(item)
    new_MDA_simplied['disease_newMDA'] = temp
    
    return new_MDA, new_MDA_simplied

In [4]:
def database_data_miRNA_in_newMDA_results(database_data, new_MDA_simplied):
    database_data_miRNA_in_results = database_data[database_data['miRNA'].isin(new_MDA_simplied['miRNA_y'])]
    database_data_miRNA_in_results_2 = database_data[database_data['miRBase Update ID'].isin(new_MDA_simplied['miRNA_y'])]
    if database_data_miRNA_in_results_2.shape[0] != 0:
        print('miRBase Update ID shoud be used!!!!!!')
        database_data_miRNA_in_results = pd.concat([database_data_miRNA_in_results, ], axis = 0)
    return database_data_miRNA_in_results

In [5]:
def newMDA_in_database(new_MDA_simplied, database_data_miRNA_in_results):
    
    for i in trange(new_MDA_simplied.shape[0]):
        print('-------------------------{}-th new MDA--------------------------------------------'.format(i))
        miRNA = new_MDA_simplied.iloc[i]['miRNA_y']
        disease_of_miRNA = new_MDA_simplied.iloc[i]['disease_newMDA']
        print('miRNA = {}  ||  disease_of_miRNA = {}'.format(miRNA, disease_of_miRNA))
        disease_of_miRNA = disease_of_miRNA.split(' ')
        database_data_of_miRNA = database_data_miRNA_in_results[database_data_miRNA_in_results['miRNA'] == miRNA]

        for str_ in disease_of_miRNA:
            res = database_data_of_miRNA.loc[database_data_of_miRNA['disease'].str.contains(str_)]
            print(str_, '--> ', res.values, '\n')

In [6]:
def run(task, database_data):
    results = pd.read_csv(task + '_balanced_case_study_0.csv', index_col = 0)
    new_MDA_origin, new_MDA_simplied = new_MDA(results, task, True)
    database_data_miRNA_in_results = database_data_miRNA_in_newMDA_results(database_data, new_MDA_simplied)
    newMDA_in_database(new_MDA_simplied, database_data_miRNA_in_results)
    return results, new_MDA, new_MDA_simplied, database_data_miRNA_in_results

In [7]:
results_Tp, new_MDA_Tp, new_MDA_simplied_Tp, database_data_miRNA_in_results_Tp = run(task = 'Tp', database_data = database_data)

  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
 13%|█▎        | 10/77 [00:00<00:01, 66.69it/s]

-------------------------0-th new MDA--------------------------------------------
miRNA = hsa-mir-324  ||  disease_of_miRNA = Vascular Calcification
Vascular -->  [] 

Calcification -->  [] 

-------------------------1-th new MDA--------------------------------------------
miRNA = hsa-mir-371a  ||  disease_of_miRNA = Vascular Calcification
Vascular -->  [] 

Calcification -->  [] 

-------------------------2-th new MDA--------------------------------------------
miRNA = hsa-mir-548c  ||  disease_of_miRNA = Uterine Cervical Neoplasms
Uterine -->  [] 

Cervical -->  [] 

Neoplasms -->  [['hsa-mir-548c' nan 'Breast Neoplasms' nan]
 ['hsa-mir-548c' nan 'Ovarian Neoplasms' nan]
 ['hsa-mir-548c' nan 'Rectal Neoplasms' nan]] 

-------------------------3-th new MDA--------------------------------------------
miRNA = hsa-mir-640  ||  disease_of_miRNA = Skin Neoplasms
Skin -->  [] 

Neoplasms -->  [['hsa-mir-640' nan 'Gastric Neoplasms' nan]
 ['hsa-mir-640' nan 'Stomach Neoplasms' nan]] 

------

 31%|███       | 24/77 [00:00<00:00, 56.74it/s]



thyroid -->  [] 

carcinoma -->  [] 

-------------------------12-th new MDA--------------------------------------------
miRNA = hsa-mir-320d  ||  disease_of_miRNA = Pancreatic Neoplasms
Pancreatic -->  [] 

Neoplasms -->  [['hsa-mir-320d' nan 'Breast Neoplasms' nan]] 

-------------------------13-th new MDA--------------------------------------------
miRNA = hsa-mir-503  ||  disease_of_miRNA = Ovary Syndrome
Ovary -->  [] 

Syndrome -->  [['hsa-mir-503' nan 'Nephrotic Syndrome' nan]] 

-------------------------14-th new MDA--------------------------------------------
miRNA = hsa-mir-561  ||  disease_of_miRNA = Osteoporosis
Osteoporosis -->  [] 

-------------------------15-th new MDA--------------------------------------------
miRNA = hsa-mir-647  ||  disease_of_miRNA = Osteoarthritis
Osteoarthritis -->  [] 

-------------------------16-th new MDA--------------------------------------------
miRNA = hsa-mir-1181  ||  disease_of_miRNA = Oligodendroglioma
Oligodendroglioma -->  [] 

--

 39%|███▉      | 30/77 [00:00<00:00, 57.54it/s]


miRNA = hsa-mir-1827  ||  disease_of_miRNA = Lupus Erythematosus, Systemic
Lupus -->  [] 

Erythematosus, -->  [] 

Systemic -->  [] 

-------------------------29-th new MDA--------------------------------------------
miRNA = hsa-mir-582  ||  disease_of_miRNA = Lung Neoplasms
Lung -->  [['hsa-mir-582' nan 'Lung Neoplasms' nan]] 

Neoplasms -->  [['hsa-mir-582' nan 'Lung Neoplasms' nan]
 ['hsa-mir-582' nan 'Prostate Neoplasms' nan]] 

-------------------------30-th new MDA--------------------------------------------
miRNA = hsa-mir-151a  ||  disease_of_miRNA = Lung Diseases
Lung -->  [['hsa-mir-151a' nan 'Carcinoma, Lung, Non-Small-Cell' nan]
 ['hsa-mir-151a' nan 'Adenocarcinoma, Lung' nan]
 ['hsa-mir-151a' nan 'Carcinoma, Non-Small-Cell Lung' nan]] 

Diseases -->  [['hsa-mir-151a' nan 'Cardiovascular Diseases [unspecific]' nan]] 

-------------------------31-th new MDA--------------------------------------------
miRNA = hsa-mir-212  ||  disease_of_miRNA = Long QT Syndrome
Long -->  []

 57%|█████▋    | 44/77 [00:00<00:00, 48.98it/s]

 [] 

Chronic-Phase -->  [] 

-------------------------34-th new MDA--------------------------------------------
miRNA = hsa-mir-522  ||  disease_of_miRNA = Leukemia, Myelogenous, Chronic, BCR-ABL Positive
Leukemia, -->  [] 

Myelogenous, -->  [] 

Chronic, -->  [] 

BCR-ABL -->  [] 

Positive -->  [] 

-------------------------35-th new MDA--------------------------------------------
miRNA = hsa-mir-2861  ||  disease_of_miRNA = Leukemia, B-Cell
Leukemia, -->  [] 

B-Cell -->  [] 

-------------------------36-th new MDA--------------------------------------------
miRNA = hsa-mir-641  ||  disease_of_miRNA = Leiomyosarcoma
Leiomyosarcoma -->  [] 

-------------------------37-th new MDA--------------------------------------------
miRNA = hsa-mir-571  ||  disease_of_miRNA = Laryngeal Neoplasms
Laryngeal -->  [] 

Neoplasms -->  [] 

-------------------------38-th new MDA--------------------------------------------
miRNA = hsa-mir-655  ||  disease_of_miRNA = Kidney Neoplasms
Kidney -->  [] 

 79%|███████▉  | 61/77 [00:01<00:00, 52.93it/s]

-->  [] 

Infection -->  [] 

-------------------------47-th new MDA--------------------------------------------
miRNA = hsa-mir-622  ||  disease_of_miRNA = Graves Disease
Graves -->  [] 

Disease -->  [] 

-------------------------48-th new MDA--------------------------------------------
miRNA = hsa-mir-758  ||  disease_of_miRNA = Graft vs Host Disease
Graft -->  [] 

vs -->  [] 

Host -->  [] 

Disease -->  [['hsa-mir-758' nan 'Cardiovascular Diseases [unspecific]' nan]] 

-------------------------49-th new MDA--------------------------------------------
miRNA = hsa-mir-320a  ||  disease_of_miRNA = Glomerulonephritis, IGA
Glomerulonephritis, -->  [] 

IGA -->  [] 

-------------------------50-th new MDA--------------------------------------------
miRNA = hsa-mir-598  ||  disease_of_miRNA = Glomerulonephritis
Glomerulonephritis -->  [] 

-------------------------51-th new MDA--------------------------------------------
miRNA = hsa-mir-648  ||  disease_of_miRNA = Frontotemporal Lobar D

 88%|████████▊ | 68/77 [00:01<00:00, 36.01it/s]

Carcinoma, -->  [['hsa-mir-370' nan 'Carcinoma, Gastric' nan]
 ['hsa-mir-370' nan 'Carcinoma, Oral' nan]
 ['hsa-mir-370' nan 'Carcinoma, Lung, Non-Small-Cell' nan]
 ['hsa-mir-370' nan 'Carcinoma, Hepatocellular' nan]
 ['hsa-mir-370' nan 'Carcinoma, Lung' nan]
 ['hsa-mir-370' nan 'Squamous Cell Carcinoma, Esophageal' nan]
 ['hsa-mir-370' nan
  'Squamous Cell Carcinoma, Laryngeal or Hypopharyngeal' nan]] 

Renal -->  [] 

Cell -->  [['hsa-mir-370' nan 'Carcinoma, Lung, Non-Small-Cell' nan]
 ['hsa-mir-370' nan 'Squamous Cell Carcinoma, Esophageal' nan]
 ['hsa-mir-370' nan
  'Squamous Cell Carcinoma, Laryngeal or Hypopharyngeal' nan]] 

-------------------------67-th new MDA--------------------------------------------
miRNA = hsa-mir-603  ||  disease_of_miRNA = Carcinoma, Renal Cell
Carcinoma, -->  [['hsa-mir-603' nan 'Carcinoma, Breast, Triple Negative' nan]] 

Renal -->  [] 

Cell -->  [] 

-------------------------68-th new MDA--------------------------------------------
miRNA = hsa-mir

100%|██████████| 77/77 [00:01<00:00, 50.01it/s]

Adenoma -->  [] 

-------------------------74-th new MDA--------------------------------------------
miRNA = hsa-mir-451b  ||  disease_of_miRNA = Acute Lung Injury
Acute -->  [] 

Lung -->  [['hsa-mir-451b' nan 'Carcinoma, Lung, Non-Small-Cell' nan]
 ['hsa-mir-451b' nan 'Carcinoma, Non-Small-Cell Lung' nan]] 

Injury -->  [] 

-------------------------75-th new MDA--------------------------------------------
miRNA = hsa-mir-501  ||  disease_of_miRNA = Acquired Immunodeficiency Syndrome
Acquired -->  [] 

Immunodeficiency -->  [] 

Syndrome -->  [] 

-------------------------76-th new MDA--------------------------------------------
miRNA = hsa-mir-1273a  ||  disease_of_miRNA = Abortion, Habitual
Abortion, -->  [] 

Habitual -->  [] 






In [10]:
results_Tm, new_MDA_Tm, new_MDA_simplied_Tm, database_data_miRNA_in_results_Tm = run(task = 'Tm', database_data = database_data)

  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  4%|▍         | 2/51 [00:00<00:02, 18.52it/s]

-------------------------0-th new MDA--------------------------------------------
miRNA = hsa-mir-551a  ||  disease_of_miRNA = Waldenstrom Macroglobulinemia
Waldenstrom -->  [] 

Macroglobulinemia -->  [] 

-------------------------1-th new MDA--------------------------------------------
miRNA = hsa-mir-611  ||  disease_of_miRNA = Waldenstrom Macroglobulinemia
Waldenstrom -->  [] 

Macroglobulinemia -->  [] 

-------------------------2-th new MDA--------------------------------------------
miRNA = hsa-mir-324  ||  disease_of_miRNA = Vascular Calcification
Vascular -->  [] 

Calcification -->  [] 

-------------------------3-th new MDA--------------------------------------------
miRNA = hsa-mir-371a  ||  disease_of_miRNA = Vascular Calcification
Vascular -->  [] 

Calcification -->  [] 

-------------------------4-th new MDA--------------------------------------------
miRNA = hsa-mir-300  ||  disease_of_miRNA = Sepsis
Sepsis -->  [] 

-------------------------5-th new MDA---------------

 24%|██▎       | 12/51 [00:00<00:01, 25.51it/s]

[['hsa-mir-146a' nan 'Pancreatic Neoplasms' nan]
 ['hsa-mir-146a' nan 'Prostate Neoplasms' nan]
 ['hsa-mir-146a' nan 'Gastric Neoplasms' nan]
 ['hsa-mir-146a' nan 'Breast Neoplasms' nan]
 ['hsa-mir-146a' nan 'Thyroid Neoplasms' nan]
 ['hsa-mir-146a' nan 'Ovarian Neoplasms' nan]
 ['hsa-mir-146a' nan 'Gastrointestinal Neoplasms' nan]
 ['hsa-mir-146a' nan 'Esophageal Neoplasms' nan]
 ['hsa-mir-146a' nan 'Nasopharyngeal Neoplasms' nan]
 ['hsa-mir-146a' nan 'Lung Neoplasms' nan]
 ['hsa-mir-146a' nan 'Digestive System Neoplasms' nan]
 ['hsa-mir-146a' nan 'Neoplasms [unspecific]' nan]
 ['hsa-mir-146a' nan 'Skin Neoplasms' nan]
 ['hsa-mir-146a' nan 'Endometrial Neoplasms' nan]
 ['hsa-mir-146a' nan 'Head And Neck Neoplasms' nan]
 ['hsa-mir-146a' nan 'Bladder Neoplasms' nan]
 ['hsa-mir-146a' nan 'Cervical Neoplasms' nan]
 ['hsa-mir-146a' nan 'Colon Neoplasms' nan]
 ['hsa-mir-146a' nan 'Liver Neoplasms' nan]
 ['hsa-mir-146a' nan 'Prostatic Neoplasms' nan]
 ['hsa-mir-146a' nan 'Uterine Cervical Ne

 51%|█████     | 26/51 [00:00<00:00, 34.11it/s]

-->  [] 

-------------------------16-th new MDA--------------------------------------------
miRNA = hsa-mir-18a  ||  disease_of_miRNA = Neurofibromatosis 2
Neurofibromatosis -->  [] 

2 -->  [['hsa-mir-18a' nan 'Diabetes Mellitus, Type 2' nan]] 

-------------------------17-th new MDA--------------------------------------------
miRNA = hsa-mir-431  ||  disease_of_miRNA = Myocytes, Cardiac
Myocytes, -->  [] 

Cardiac -->  [] 

-------------------------18-th new MDA--------------------------------------------
miRNA = hsa-mir-181d  ||  disease_of_miRNA = Musculoskeletal Abnormalities
Musculoskeletal -->  [] 

Abnormalities -->  [] 

-------------------------19-th new MDA--------------------------------------------
miRNA = hsa-mir-769  ||  disease_of_miRNA = Mastocytosis, Systemic
Mastocytosis, -->  [] 

Systemic -->  [] 

-------------------------20-th new MDA--------------------------------------------
miRNA = hsa-mir-151a  ||  disease_of_miRNA = Lung Diseases
Lung -->  [['hsa-mir-151a'

 61%|██████    | 31/51 [00:00<00:00, 34.83it/s]

miRNA = hsa-mir-516a  ||  disease_of_miRNA = Ischemic Preconditioning
Ischemic -->  [] 

Preconditioning -->  [] 

-------------------------27-th new MDA--------------------------------------------
miRNA = hsa-mir-29a  ||  disease_of_miRNA = Huntington Disease
Huntington -->  [] 

Disease -->  [['hsa-mir-29a' nan 'Parkinson Disease' nan]
 ['hsa-mir-29a' nan 'Alzheimer Disease' nan]
 ['hsa-mir-29a' nan 'Machado-Joseph Disease' nan]
 ['hsa-mir-29a' nan 'Liver Diseases [unspecific]' nan]
 ['hsa-mir-29a' nan 'Crohn Disease' nan]
 ['hsa-mir-29a' nan 'Kidney Diseases [unspecific]' nan]
 ['hsa-mir-29a' nan 'Vascular Disease [unspecific]' nan]
 ['hsa-mir-29a' nan 'Heart Valve Disease' nan]
 ['hsa-mir-29a' nan 'Mitochondrial Metabolism Disease' nan]
 ['hsa-mir-29a' nan 'Autoimmune Diseases [unspecific]' nan]
 ['hsa-mir-29a' nan 'Kidney Diseases' nan]
 ['hsa-mir-29a' nan 'Liver Diseases' nan]] 

-------------------------28-th new MDA--------------------------------------------
miRNA = hsa-mir-19

100%|██████████| 51/51 [00:00<00:00, 52.70it/s]

Fibrosis -->  [] 

-------------------------36-th new MDA--------------------------------------------
miRNA = hsa-mir-423  ||  disease_of_miRNA = Endometriosis
Endometriosis -->  [] 

-------------------------37-th new MDA--------------------------------------------
miRNA = hsa-mir-15b  ||  disease_of_miRNA = Diabetic Nephropathies
Diabetic -->  [['hsa-mir-15b' nan 'Diabetic Nephropathy' nan]] 

Nephropathies -->  [] 

-------------------------38-th new MDA--------------------------------------------
miRNA = hsa-mir-378f  ||  disease_of_miRNA = Diabetes Complications
Diabetes -->  [] 

Complications -->  [] 

-------------------------39-th new MDA--------------------------------------------
miRNA = hsa-mir-198  ||  disease_of_miRNA = Cystic Fibrosis
Cystic -->  [] 

Fibrosis -->  [] 

-------------------------40-th new MDA--------------------------------------------
miRNA = hsa-mir-1184  ||  disease_of_miRNA = Colorectal Neoplasms, Hereditary Nonpolyposis
Colorectal -->  [] 

Neoplasms




In [11]:
results_Td, new_MDA_Td, new_MDA_simplied_Td, database_data_miRNA_in_results_Td = run(task = 'Td', database_data = database_data)

  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
  4%|▍         | 3/68 [00:00<00:02, 27.30it/s]

-------------------------0-th new MDA--------------------------------------------
miRNA = hsa-mir-324  ||  disease_of_miRNA = Vascular Calcification
Vascular -->  [] 

Calcification -->  [] 

-------------------------1-th new MDA--------------------------------------------
miRNA = hsa-mir-582  ||  disease_of_miRNA = Thyroid Neoplasms
Thyroid -->  [] 

Neoplasms -->  [['hsa-mir-582' nan 'Lung Neoplasms' nan]
 ['hsa-mir-582' nan 'Prostate Neoplasms' nan]] 

-------------------------2-th new MDA--------------------------------------------
miRNA = hsa-mir-342  ||  disease_of_miRNA = Stroke
Stroke -->  [] 

-------------------------3-th new MDA--------------------------------------------
miRNA = hsa-mir-640  ||  disease_of_miRNA = Skin Neoplasms
Skin -->  [] 

Neoplasms -->  [['hsa-mir-640' nan 'Gastric Neoplasms' nan]
 ['hsa-mir-640' nan 'Stomach Neoplasms' nan]] 

-------------------------4-th new MDA--------------------------------------------
miRNA = hsa-mir-98  ||  disease_of_miRNA = S

 19%|█▉        | 13/68 [00:00<00:01, 31.11it/s]

Salivary -->  [['hsa-mir-15a' nan 'Salivary Gland Neoplasms' nan]] 

Gland -->  [['hsa-mir-15a' nan 'Salivary Gland Neoplasms' nan]] 

Neoplasms -->  [['hsa-mir-15a' nan 'Pituitary Neoplasms' nan]
 ['hsa-mir-15a' nan 'Prostate Neoplasms' nan]
 ['hsa-mir-15a' nan 'Kidney Neoplasms' nan]
 ['hsa-mir-15a' nan 'Breast Neoplasms' nan]
 ['hsa-mir-15a' nan 'Neoplasms [unspecific]' nan]
 ['hsa-mir-15a' nan 'Hematologic Neoplasms' nan]
 ['hsa-mir-15a' nan 'Gastric Neoplasms' nan]
 ['hsa-mir-15a' nan 'Colon Neoplasms' nan]
 ['hsa-mir-15a' nan 'Laryngeal Neoplasms' nan]
 ['hsa-mir-15a' nan 'Head And Neck Neoplasms' nan]
 ['hsa-mir-15a' nan 'Lung Neoplasms' nan]
 ['hsa-mir-15a' nan 'Myeloproliferative Neoplasms' nan]
 ['hsa-mir-15a' nan 'Pancreatic Neoplasms' nan]
 ['hsa-mir-15a' nan 'Salivary Gland Neoplasms' nan]
 ['hsa-mir-15a' nan 'Bladder Neoplasms' nan]
 ['hsa-mir-15a' nan 'Esophageal Neoplasms' nan]
 ['hsa-mir-15a' nan 'Prostatic Neoplasms' nan]
 ['hsa-mir-15a' nan 'Head and Neck Neoplasms' 

 29%|██▉       | 20/68 [00:00<00:01, 31.47it/s]

Pigmented -->  [] 

-------------------------14-th new MDA--------------------------------------------
miRNA = hsa-mir-4257  ||  disease_of_miRNA = Nervous System Diseases
Nervous -->  [] 

System -->  [] 

Diseases -->  [] 

-------------------------15-th new MDA--------------------------------------------
miRNA = hsa-mir-449b  ||  disease_of_miRNA = Myocytes, Cardiac
Myocytes, -->  [] 

Cardiac -->  [] 

-------------------------16-th new MDA--------------------------------------------
miRNA = hsa-mir-181d  ||  disease_of_miRNA = Musculoskeletal Abnormalities
Musculoskeletal -->  [] 

Abnormalities -->  [] 

-------------------------17-th new MDA--------------------------------------------
miRNA = hsa-mir-938  ||  disease_of_miRNA = Muscular Dystrophy, Facioscapulohumeral
Muscular -->  [] 

Dystrophy, -->  [] 

Facioscapulohumeral -->  [] 

-------------------------18-th new MDA--------------------------------------------
miRNA = hsa-mir-449b  ||  disease_of_miRNA = Mouth Neoplasms
M

 34%|███▍      | 23/68 [00:00<00:01, 26.29it/s]

miRNA = hsa-mir-769  ||  disease_of_miRNA = Mastocytosis, Systemic
Mastocytosis, -->  [] 

Systemic -->  [] 

-------------------------21-th new MDA--------------------------------------------
miRNA = hsa-mir-654  ||  disease_of_miRNA = Lymphoma, Large-Cell, Anaplastic
Lymphoma, -->  [] 

Large-Cell, -->  [] 

Anaplastic -->  [] 

-------------------------22-th new MDA--------------------------------------------
miRNA = hsa-mir-1293  ||  disease_of_miRNA = Lymphoma
Lymphoma -->  [] 

-------------------------23-th new MDA--------------------------------------------
miRNA = hsa-mir-151a  ||  disease_of_miRNA = Lung Diseases
Lung -->  [['hsa-mir-151a' nan 'Carcinoma, Lung, Non-Small-Cell' nan]
 ['hsa-mir-151a' nan 'Adenocarcinoma, Lung' nan]
 ['hsa-mir-151a' nan 'Carcinoma, Non-Small-Cell Lung' nan]] 

Diseases -->  [['hsa-mir-151a' nan 'Cardiovascular Diseases [unspecific]' nan]] 

-------------------------24-th new MDA--------------------------------------------
miRNA = hsa-mir-19a  ||

 49%|████▊     | 33/68 [00:00<00:01, 33.17it/s]

Acute -->  [['hsa-mir-150' nan 'Leukemia, Myeloid, Acute' nan]
 ['hsa-mir-150' nan 'Leukemia, Lymphoblastic, Acute' nan]
 ['hsa-mir-150' nan 'Leukemia, Acute' nan]
 ['hsa-mir-150' nan 'Acute Myocardial Infarction' nan]] 

-------------------------27-th new MDA--------------------------------------------
miRNA = hsa-mir-641  ||  disease_of_miRNA = Leiomyosarcoma
Leiomyosarcoma -->  [] 

-------------------------28-th new MDA--------------------------------------------
miRNA = hsa-mir-1280  ||  disease_of_miRNA = Leiomyosarcoma
Leiomyosarcoma -->  [] 

-------------------------29-th new MDA--------------------------------------------
miRNA = hsa-mir-655  ||  disease_of_miRNA = Kidney Neoplasms
Kidney -->  [] 

Neoplasms -->  [['hsa-mir-655' nan 'Pituitary Neoplasms' nan]] 

-------------------------30-th new MDA--------------------------------------------
miRNA = hsa-mir-520f  ||  disease_of_miRNA = Ischemia
Ischemia -->  [] 

-------------------------31-th new MDA-----------------------

 66%|██████▌   | 45/68 [00:01<00:00, 38.52it/s]

 -->  [] 

-------------------------36-th new MDA--------------------------------------------
miRNA = hsa-mir-448  ||  disease_of_miRNA = Helplessness, Learned
Helplessness, -->  [] 

Learned -->  [] 

-------------------------37-th new MDA--------------------------------------------
miRNA = hsa-mir-147a  ||  disease_of_miRNA = Hearing Loss
Hearing -->  [] 

Loss -->  [] 

-------------------------38-th new MDA--------------------------------------------
miRNA = hsa-mir-611  ||  disease_of_miRNA = HCMV Infection
HCMV -->  [] 

Infection -->  [] 

-------------------------39-th new MDA--------------------------------------------
miRNA = hsa-mir-758  ||  disease_of_miRNA = Graft vs Host Disease
Graft -->  [] 

vs -->  [] 

Host -->  [] 

Disease -->  [['hsa-mir-758' nan 'Cardiovascular Diseases [unspecific]' nan]] 

-------------------------40-th new MDA--------------------------------------------
miRNA = hsa-mir-200c  ||  disease_of_miRNA = Gout
Gout -->  [] 

-------------------------4

 74%|███████▎  | 50/68 [00:01<00:00, 35.78it/s]

Endometriosis -->  [] 

-------------------------47-th new MDA--------------------------------------------
miRNA = hsa-mir-30c  ||  disease_of_miRNA = Encephalomyelitis, Autoimmune, Experimental
Encephalomyelitis, -->  [] 

Autoimmune, -->  [] 

Experimental -->  [] 

-------------------------48-th new MDA--------------------------------------------
miRNA = hsa-mir-1293  ||  disease_of_miRNA = Digestive System Neoplasms
Digestive -->  [] 

System -->  [] 

Neoplasms -->  [] 

-------------------------49-th new MDA--------------------------------------------
miRNA = hsa-mir-873  ||  disease_of_miRNA = Diabetic Retinopathy
Diabetic -->  [] 

Retinopathy -->  [] 

-------------------------50-th new MDA--------------------------------------------
miRNA = hsa-mir-15b  ||  disease_of_miRNA = Diabetic Nephropathies
Diabetic -->  [['hsa-mir-15b' nan 'Diabetic Nephropathy' nan]] 

Nephropathies -->  [] 

-------------------------51-th new MDA--------------------------------------------
miRNA = 

 87%|████████▋ | 59/68 [00:01<00:00, 35.18it/s]

Disorders, -->  [] 

Pervasive -->  [] 

-------------------------54-th new MDA--------------------------------------------
miRNA = hsa-mir-337  ||  disease_of_miRNA = Cervical Neoplasms
Cervical -->  [['hsa-mir-337' nan 'Carcinoma, Cervical' nan]] 

Neoplasms -->  [['hsa-mir-337' nan 'Gastric Neoplasms' nan]
 ['hsa-mir-337' nan 'Salivary Gland Neoplasms' nan]
 ['hsa-mir-337' nan 'Colorectal Neoplasms' nan]] 

-------------------------55-th new MDA--------------------------------------------
miRNA = hsa-mir-625  ||  disease_of_miRNA = Central Nervous System Diseases
Central -->  [] 

Nervous -->  [] 

System -->  [] 

Diseases -->  [] 

-------------------------56-th new MDA--------------------------------------------
miRNA = hsa-mir-655  ||  disease_of_miRNA = Carcinoma, Squamous Cell
Carcinoma, -->  [['hsa-mir-655' nan 'Carcinoma, Renal Cell' nan]
 ['hsa-mir-655' nan 'Squamous Cell Carcinoma, Oral' nan]
 ['hsa-mir-655' nan 'Carcinoma, Hepatocellular' nan]
 ['hsa-mir-655' nan 'Squamou

100%|██████████| 68/68 [00:01<00:00, 36.35it/s]

Adrenocortical -->  [] 

Adenoma -->  [] 

-------------------------65-th new MDA--------------------------------------------
miRNA = hsa-mir-181d  ||  disease_of_miRNA = Adenocarcinoma
Adenocarcinoma -->  [] 

-------------------------66-th new MDA--------------------------------------------
miRNA = hsa-mir-451b  ||  disease_of_miRNA = Acute Lung Injury
Acute -->  [] 

Lung -->  [['hsa-mir-451b' nan 'Carcinoma, Lung, Non-Small-Cell' nan]
 ['hsa-mir-451b' nan 'Carcinoma, Non-Small-Cell Lung' nan]] 

Injury -->  [] 

-------------------------67-th new MDA--------------------------------------------
miRNA = hsa-mir-1273a  ||  disease_of_miRNA = Abortion, Habitual
Abortion, -->  [] 

Habitual -->  [] 






# 整理

In [14]:
results_Td

Unnamed: 0,miRNA_x,disease_x,y_true,y_pred,y_prob,miRNA_y,disease_y
8048,116,383,1.0,1.0,0.999104,hsa-mir-483,['Wounds and Injuries']
8049,9,383,1.0,1.0,0.999097,hsa-mir-143,['Wounds and Injuries']
8037,148,383,1.0,1.0,0.999097,hsa-mir-9,['Wounds and Injuries']
8036,14,383,1.0,1.0,0.999079,hsa-mir-21,['Wounds and Injuries']
8034,10,383,1.0,1.0,0.998415,hsa-mir-145,['Wounds and Injuries']
...,...,...,...,...,...,...,...
8554,482,1,0.0,0.0,0.001152,hsa-mir-570,"['Abortion, Habitual']"
8543,24,1,0.0,0.0,0.001151,hsa-mir-126,"['Abortion, Habitual']"
8551,164,1,0.0,0.0,0.001149,hsa-let-7i,"['Abortion, Habitual']"
8553,454,1,0.0,0.0,0.001148,hsa-mir-767,"['Abortion, Habitual']"


In [13]:
results_Td[results_Td['y_true'] == 1][results_Td['y_pred'] == 1]

  """Entry point for launching an IPython kernel.


Unnamed: 0,miRNA_x,disease_x,y_true,y_pred,y_prob,miRNA_y,disease_y
8048,116,383,1.0,1.0,0.999104,hsa-mir-483,['Wounds and Injuries']
8049,9,383,1.0,1.0,0.999097,hsa-mir-143,['Wounds and Injuries']
8037,148,383,1.0,1.0,0.999097,hsa-mir-9,['Wounds and Injuries']
8036,14,383,1.0,1.0,0.999079,hsa-mir-21,['Wounds and Injuries']
8034,10,383,1.0,1.0,0.998415,hsa-mir-145,['Wounds and Injuries']
...,...,...,...,...,...,...,...
3543,4,2,1.0,1.0,0.998640,hsa-mir-198,['Acquired Immunodeficiency Syndrome']
3546,6,2,1.0,1.0,0.998612,hsa-mir-29b,['Acquired Immunodeficiency Syndrome']
8537,2,1,1.0,1.0,0.999107,hsa-mir-196a,"['Abortion, Habitual']"
8541,3,1,1.0,1.0,0.999105,hsa-mir-499a,"['Abortion, Habitual']"


# Nouse

In [80]:
new_MDA_Tp_balanced_simplied_2 = pd.DataFrame(new_MDA_Tp_balanced_simplied.groupby('miRNA_y')['disease_newMDA'].apply(list))
database_data_miRNA_in_Tp_balanced_simplied = pd.DataFrame(database_data_miRNA_in_Tp_balanced.groupby('miRNA')['disease'].apply(list))
new_MDA_Tp_balanced__database_data__merged = pd.merge(new_MDA_Tp_balanced_simplied_2, database_data_miRNA_in_Tp_balanced_simplied, left_index = True, right_index = True)
new_MDA_Tp_balanced__database_data__merged

Unnamed: 0_level_0,disease_newMDA,disease
miRNA_y,Unnamed: 1_level_1,Unnamed: 2_level_1
hsa-mir-100,[Cocaine-Related Disorders],"[Carcinoma, Adrenocortical, Muscle Atrophy, At..."
hsa-mir-10b,[Sepsis],"[Breast Neoplasms, Leukemia, Huntington Diseas..."
hsa-mir-1181,[Oligodendroglioma],"[Hepatitis C Virus Infection, Pancreatic Neopl..."
hsa-mir-1245a,[Hepatitis],[Breast Neoplasms]
hsa-mir-1273a,"[Abortion, Habitual]","[Adenovirus Infection, Adenoviridae Infections]"
...,...,...
hsa-mir-758,[Graft vs Host Disease],"[HELLP Syndrome, Systemic Lupus Erythematosus,..."
hsa-mir-769,"[Mastocytosis, Systemic]","[Lupus Vulgaris, Squamous Cell Carcinoma, Oral..."
hsa-mir-873,[Diabetic Retinopathy],"[Ectopic Pregnancy, Liver Cirrhosis, Johne Dis..."
hsa-mir-922,"[Nevus, Pigmented]","[Breast Neoplasms, Alzheimer Disease, Carcinom..."


In [81]:
new_MDA_table = new_MDA_Tp_balanced__database_data__merged['disease_newMDA']
database_data_table = new_MDA_Tp_balanced__database_data__merged['disease']
new_MDA_table

miRNA_y
hsa-mir-100      [Cocaine-Related Disorders]
hsa-mir-10b                         [Sepsis]
hsa-mir-1181             [Oligodendroglioma]
hsa-mir-1245a                    [Hepatitis]
hsa-mir-1273a           [Abortion, Habitual]
                            ...             
hsa-mir-758          [Graft vs Host Disease]
hsa-mir-769         [Mastocytosis, Systemic]
hsa-mir-873           [Diabetic Retinopathy]
hsa-mir-922               [Nevus, Pigmented]
hsa-mir-942                       [Ischemia]
Name: disease_newMDA, Length: 75, dtype: object

In [133]:
temp.loc[temp[0].str.contains(s)].values

array([['Muscular Disorders, Atrophic']], dtype=object)

In [111]:
newMDA_disease = new_MDA_table[0][0].split(' ')
temp = pd.DataFrame(database_data_table[0])
for s in newMDA_disease:
    print(s)
    print(temp.loc[temp[0].str.contains(s)])

Cocaine-Related
Empty DataFrame
Columns: [0]
Index: []
Disorders
                               0
66  Muscular Disorders, Atrophic


In [94]:
database_disease = ','.join(database_data_table[0])
database_disease

'Carcinoma, Adrenocortical,Muscle Atrophy,Atherosclerosis,Adrenal Cortex Neoplasms,Breast Neoplasms,Acute Coronary Syndrome,Coronary Atherosclerosis,Diabetes Mellitus, Type 1,Esophageal Neoplasms,Recurrent Spontaneous Abortion,Leukemia-Lymphoma, Precursor T-Cell Lymphoblastic,Carcinoma, Lung, Non-Small-Cell,Squamous Cell Carcinoma, Esophageal,Urinary Bladder Cancer,Carcinoma, Hepatocellular,Bladder Neoplasms,Gastric Neoplasms,Leukemia, Myeloid, Acute,Prostate Neoplasms,Carcinoma, Renal Cell,Ovarian Neoplasms,Osteosarcoma,Heart Failure,Leukemia, Lymphoblastic, Acute,Vulvar Squamous Cell Carcinoma,Neoplasms [unspecific],Pancreatic Neoplasms,Cardiovascular Diseases [unspecific],Colorectal Carcinoma,Inflammation,Carcinoma, Rectal,Peritoneal Dialysis Failure,Vascular Injuries,Carcinoma, Ovarian,Infection [unspecific],Carcinoma, Bladder,Carcinoma, Gastric,Squamous Cell Carcinoma, Head and Neck,Melanoma,Carcinoma, Colon,Pneumonia,Nasopharyngeal Neoplasms,Endometrial Neoplasms,Lung Neoplasms,K

In [90]:
for i in range(len(new_MDA_table[0])):
    for j in range(len(database_data_table[0])):
        a = new_MDA_table[0][i]
        b = database_data_table[0][j]
        temp = re.findall(a, b)
        print(a, '\t', b)

Cocaine-Related Disorders 	 Carcinoma, Adrenocortical
Cocaine-Related Disorders 	 Muscle Atrophy
Cocaine-Related Disorders 	 Atherosclerosis
Cocaine-Related Disorders 	 Adrenal Cortex Neoplasms
Cocaine-Related Disorders 	 Breast Neoplasms
Cocaine-Related Disorders 	 Acute Coronary Syndrome
Cocaine-Related Disorders 	 Coronary Atherosclerosis
Cocaine-Related Disorders 	 Diabetes Mellitus, Type 1
Cocaine-Related Disorders 	 Esophageal Neoplasms
Cocaine-Related Disorders 	 Recurrent Spontaneous Abortion
Cocaine-Related Disorders 	 Leukemia-Lymphoma, Precursor T-Cell Lymphoblastic
Cocaine-Related Disorders 	 Carcinoma, Lung, Non-Small-Cell
Cocaine-Related Disorders 	 Squamous Cell Carcinoma, Esophageal
Cocaine-Related Disorders 	 Urinary Bladder Cancer
Cocaine-Related Disorders 	 Carcinoma, Hepatocellular
Cocaine-Related Disorders 	 Bladder Neoplasms
Cocaine-Related Disorders 	 Gastric Neoplasms
Cocaine-Related Disorders 	 Leukemia, Myeloid, Acute
Cocaine-Related Disorders 	 Prostate Neopl