In [None]:
# /*==========================================================================================*\
# **                        _           _ _   _     _  _         _                            **
# **                       | |__  _   _/ | |_| |__ | || |  _ __ | |__                         **
# **                       | '_ \| | | | | __| '_ \| || |_| '_ \| '_ \                        **
# **                       | |_) | |_| | | |_| | | |__   _| | | | | | |                       **
# **                       |_.__/ \__,_|_|\__|_| |_|  |_| |_| |_|_| |_|                       **
# \*==========================================================================================*/


# -----------------------------------------------------------------------------------------------
# Author: Bùi Tiến Thành - Tien-Thanh Bui (@bu1th4nh)
# Title: playground_data.ipynb
# Date: 2024/11/07 14:39:32
# Description: 
# 
# (c) 2024 bu1th4nh. All rights reserved. 
# Written with dedication in the University of Central Florida, EPCOT and the Magic Kingdom.
# -----------------------------------------------------------------------------------------------

from s3fs import S3FileSystem
import numpy as np
import pandas as pd 
import sys

sys.path.append('../../')

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import roc_curve, auc, accuracy_score, recall_score, f1_score, matthews_corrcoef, roc_auc_score, average_precision_score
from sklearn.svm import SVC

import matplotlib.pyplot as plt

import random

from tqdm import tqdm
from downstream.survival import surv_analysis

key = 'bu1th4nh'
secret = 'ariel.anna.elsa'
endpoint_url = 'http://localhost:9000'

s3 = S3FileSystem(
    anon=False, 
    endpoint_url=endpoint_url,
    key=key,
    secret=secret,
    use_ssl=False
)
storage_options = {
    'key': key,
    'secret': secret,
    'endpoint_url': endpoint_url,
}+

storage_option = storage_options
DATA_PATH = 's3://datasets/LungCancer/processed_3_omics_mRNA_miRNA_methDNA'
SURV_PATH = 's3://datasets/LungCancer/survivalanalysis_testdata_3_omics_mRNA_miRNA_methDNA'
# DATA_PATH = 's3://datasets/LungCancer/processed_2_omics_mRNA_miRNA'
# SURV_PATH = 's3://datasets/LungCancer/survivalanalysis_testdata_2_omics_mRNA_miRNA'



def select_top_features_by_variance(df, top_n=1000):
    variances = df.var(axis=1)
    top_features = variances.nlargest(top_n).index
    return df.loc[top_features]

In [None]:
methDNA = pd.read_parquet(f'{DATA_PATH}/methDNA.parquet', storage_options=storage_option)
miRNA = pd.read_parquet(f'{DATA_PATH}/miRNA.parquet', storage_options=storage_option)
mRNA = pd.read_parquet(f'{DATA_PATH}/mRNA.parquet', storage_options=storage_option)
bipartite = pd.read_parquet(f'{DATA_PATH}/bipart.parquet', storage_options=storage_option)

In [None]:
import seaborn as sns

bipartite = np.random.randint(2, size=(10, 10))


plt.tick_params(
    axis='both',       # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    left=False,        # ticks along the left edge are off
    right=False,       # ticks along the right edge are off
    labelbottom=False, # labels along the bottom edge are off
    labelleft=False    # labels along the left edge are off
)

sns.heatmap(bipartite, annot=False, cmap='Pastel3', square=True, linewidths=.5, cbar=False)
plt.show()

In [None]:
samples = random.sample(miRNA.columns.to_list(), 8)

miRNA = miRNA.loc[random.sample(miRNA.index.tolist(), 5), samples]
mRNA = mRNA.loc[random.sample(mRNA.index.tolist(), 5), samples]
methDNA = methDNA.loc[random.sample(methDNA.index.tolist(), 5), samples]



In [None]:

plt.tick_params(
    axis='both',       # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    left=False,        # ticks along the left edge are off
    right=False,       # ticks along the right edge are off
    labelbottom=False, # labels along the bottom edge are off
    labelleft=False    # labels along the left edge are off
)

sns.heatmap(miRNA.values, annot=False, cmap='Blues', square=True, linewidths=.5, cbar=False)
plt.show()


In [None]:



plt.tick_params(
    axis='both',       # changes apply to the x-axis
    which='both',      # both major and minor ticks are affected
    bottom=False,      # ticks along the bottom edge are off
    top=False,         # ticks along the top edge are off
    left=False,        # ticks along the left edge are off
    right=False,       # ticks along the right edge are off
    labelbottom=False, # labels along the bottom edge are off
    labelleft=False    # labels along the left edge are off
)
sns.heatmap(mRNA.values, annot=False, cmap='Greens', square=True, linewidths=.5, cbar=False)
plt.show()