# DeepASM
## Import packages

In [19]:
import sys

# Python packages for data, stats, and visualization
from matplotlib import pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
import seaborn as sns 

# Machine learning libraries
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler
import tensorflow as tf
from tensorflow import keras
#from tensorflow.keras.models import load_model
from tensorflow.keras import layers
import sklearn
from sklearn import metrics
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split

# Kernel functions
from sklearn.neighbors import KernelDensity
from numpy import asarray
from matplotlib import pyplot
from numpy import exp

# Dimensionality reduction
from sklearn.decomposition import PCA, KernelPCA, NMF, TruncatedSVD
from sklearn.manifold import TSNE, LocallyLinearEmbedding, SpectralEmbedding


 
# Figure parameters
mpl.rcParams['figure.figsize'] = (10, 10)
mpl.rcParams['axes.titlesize'] = 15
mpl.rcParams['axes.labelsize'] = 12
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['ytick.labelsize'] = 12
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

In [20]:
# Print different versions
print(sys.version)
print("TensorFlow version:", tf.__version__)
print("Keras version:", keras.__version__)
print("Numpy version:", np.__version__)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

3.7.12 | packaged by conda-forge | (default, Oct 26 2021, 06:08:53) 
[GCC 9.4.0]
TensorFlow version: 2.7.0
Keras version: 2.7.0
Numpy version: 1.19.5
Num GPUs Available:  1


# This is a title

Hello

In [21]:
!gsutil ls gs://deepasm/$GENOMIC_INTERVAL*bp/encode_training_data/*.json > list_to_download.txt
files_to_download_df = pd.read_csv('list_to_download.txt', header=None)

imported_df = pd.DataFrame()

for index_file in range(1):
    file_name_bucket = files_to_download_df[0][index_file]
    local_file_name = "training_" + str(index_file) + ".json"
    print("Downloading the file as:", local_file_name)
    !gsutil cp $file_name_bucket $local_file_name
    print("Appending file...")
    imported_df = imported_df.append(pd.read_json(local_file_name, lines = True))

Downloading the file as: training_0.json
Copying gs://deepasm/1000bp/encode_training_data/encode_training-000000000000.json...
\ [1 files][257.9 MiB/257.9 MiB]                                                
Operation completed over 1 objects/257.9 MiB.                                    
Appending file...


In [22]:
imported_df

Unnamed: 0,asm_snp,sample_category,sample,chr,region_inf,region_sup,snp_id,snp_pos,wilcoxon_corr_pvalue,asm_region_effect,...,dnase,encode_ChiP_V2,tf_motifs,global_cpg_fm,tot_nb_cpg,tot_nb_reads,read_fm,cpg_fm,cpg_cov,cpg_pos
0,0,1,fibroblast,1,119313001,119314000,rs113337685,119313548,0.85234,0.013,...,0,0,0,0.632,24030712,420944333,"[0, 0, 1, 0.5, 0.667, 1, 1, 1, 1, 1, 1, 1, 0, ...","[0.463, 0.981, 1, 0.796, 0.927, 0.943000000000...","[54, 53, 58, 54, 55, 53]","[119313416, 119313421, 119313464, 119313512, 1..."
1,0,1,fibroblast,1,211213001,211214000,rs4314946,211213286,0.86570,-0.050,...,0,0,0,0.632,24030712,420944333,"[1, 0, 0, 0.5, 0, 0, 0, 0, 0, 0, 0, 0.25, 0, 0...","[0.182, 0.2, 0.23800000000000002, 0.2380000000...","[22, 10, 21, 21, 19, 21, 15, 14, 23, 10, 34, 1...","[211213024, 211213259, 211213269, 211213313, 2..."
2,0,1,fibroblast,4,17661001,17662000,rs2302389,17661889,0.06020,0.600,...,2,0,1,0.632,24030712,420944333,"[1, 0, 0, 0, 1, 1, 1, 1, 0.75, 1, 1, 0.667, 1,...","[0.857, 0.794, 0.967, 0.862, 0.677, 0.903, 0.9...","[28, 34, 61, 29, 31, 31, 32, 31, 33, 44, 40, 4...","[17661167, 17661172, 17661178, 17661234, 17661..."
3,0,1,fibroblast,5,55314001,55315000,rs11741161,55314248,1.00000,0.000,...,1,0,2,0.632,24030712,420944333,"[1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, ...","[0.444, 0.975, 0.97, 0.714, 1, 1, 1, 0.976, 0....","[27, 40, 33, 28, 31, 38, 38, 41, 29, 33, 65]","[55314039, 55314070, 55314084, 55314091, 55314..."
4,0,1,fibroblast,1,81647001,81648000,rs4650362,81647246,0.36542,-0.158,...,1,0,0,0.632,24030712,420944333,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0.667, 0, 1, 0.25,...","[0.191, 0.13, 0.396, 0.396, 0.341, 0.245, 0.42...","[47, 46, 48, 48, 44, 53, 63, 36]","[81647176, 81647266, 81647334, 81647369, 81647..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
214668,0,0,t_cell_male_adult,20,42136001,42137000,rs189211171,42136458,0.22130,-0.190,...,3,10,71,0.768,21515317,254014923,"[0.222, 0, 0, 0, 0, 0, 0, 0.08700000000000001,...","[0.022, 0.011, 0.026000000000000002, 0.028, 0....","[93, 91, 38, 71, 76, 92, 90, 90, 82, 84, 94, 7...","[42136026, 42136050, 42136057, 42136059, 42136..."
214669,0,0,t_cell_male_adult,5,10455001,10456000,rs16884729,10455757,0.96124,-0.009,...,3,0,29,0.768,21515317,254014923,"[1, 1, 0.5, 1, 1, 0.714, 1, 1, 1, 0.8, 1, 1, 1...","[0.9580000000000001, 1, 0.934, 0.925, 0.971, 0...","[48, 34, 61, 40, 35, 97, 33, 52, 31, 42, 88, 4...","[10455097, 10455116, 10455134, 10455139, 10455..."
214670,0,0,t_cell_male_adult,19,21860001,21861000,rs12973337,21860723,0.97654,-0.035,...,2,0,18,0.768,21515317,254014923,"[1, 1, 1, 0, 0.875, 1, 1, 1, 1, 1, 1, 0, 1, 0,...","[0.976, 0.719, 0.6960000000000001, 0.739, 0.61...","[41, 64, 69, 69, 62, 68, 45, 68, 71, 67, 61, 7...","[21860047, 21860417, 21860423, 21860471, 21860..."
214671,0,0,t_cell_male_adult,17,62076001,62077000,rs62070903,62076077,0.77281,-0.008,...,4,0,0,0.768,21515317,254014923,"[1, 1, 1, 1, 0.667, 1, 1, 1, 0.167, 0.5, 0.111...","[0.218, 0.585, 0.034, 0, 0.935, 0.033, 0.146, ...","[55, 53, 58, 51, 46, 30, 48, 31, 54, 47, 30, 2...","[62076007, 62076009, 62076012, 62076052, 62076..."
