In [1]:
import pandas as pd
import numpy as np

import scanpy as sc # to read seurat object (so.Robj)
import scipy
import anndata

import matplotlib.pyplot as plt
import seaborn as sns
import fastcluster

from tqdm import tqdm

#default plt to sns
sns.set(font_scale = 1.5)
sns.set_theme()

import os

from neo4j import GraphDatabase

In [2]:
driver = GraphDatabase.driver(uri = "bolt://localhost:7687", auth = ("neo4j","snf"))
driver.verify_connectivity()

## Constraints

In [3]:
try:
    query = "CREATE CONSTRAINT ON (c:cardiomyocite) ASSERT c.id  IS UNIQUE"      
    info = driver.session().run(name_query)

except: 
    response = driver.session().run("CALL db.constraints").data()
    print("uniqueness constraint already exists:", response)

uniqueness constraint already exists: [{'name': 'constraint_97965fd0', 'description': 'CONSTRAINT ON ( cardiomyocite:cardiomyocite ) ASSERT (cardiomyocite.id) IS UNIQUE', 'details': "Constraint( id=4, name='constraint_97965fd0', type='UNIQUENESS', schema=(:cardiomyocite {id}), ownedIndex=3 )"}]


## Deploying Nodes

In [3]:
#read data
print("ETA: ~40 sec")
h5ad = "../scrna_data/stage5.h5ad"
seurat_clusters = sc.read_h5ad(h5ad)
print("h5ad import successful \n")

#extracting metadata
metadata = sc.get.obs_df(seurat_clusters, keys = ['orig.ident', 'nCount_RNA', 'nFeature_RNA', 'percent.mt', 'percent.rpl', 'percent.rps', 'time', 'location', 'RNA_snn_res.0.5', 'seurat_clusters', 'RNA_snn_res.1.8'])

#genes
gene_index = seurat_clusters.var.index.to_numpy()
print("first 5 genes:", gene_index[0:5])

#metadata
orig_ident = metadata["orig.ident"].to_numpy()
ident_index = metadata.index.to_numpy()
print("sample metadata:", orig_ident[0:5])

#expression matrix
expr_data = scipy.sparse.csc_matrix(seurat_clusters.X)
print("\nexpr_data:", expr_data.shape)

#NOTE: EXPR DATA IS SCIPY SPARSE MATRIX

#SPARSE DATA FRAME CAUSE FILE TOO BIG
#expr_data = expr_data.tocsc()
gene_expr_data = pd.DataFrame.sparse.from_spmatrix(expr_data)
print("gene df:", gene_expr_data.shape)

#replace index, column headings
gene_expr_data.index = ident_index
gene_expr_data.columns = gene_index
gene_expr_data["ident"] = orig_ident


ETA: ~40 sec



This is where adjacency matrices should go now.
  warn(


h5ad import successful 

first 5 genes: ['TTN' 'LOC100513133' 'LOC110257246' 'CTDSP1' 'ANKRD1']
sample metadata: ['AR1_MI28_P30_8064AZ' 'AR1_MI28_P30_8064AZ' 'AR1_MI28_P30_8064AZ'
 'AR1_MI28_P30_8064AZ' 'AR1_MI28_P30_8064AZ']

expr_data: (121239, 2000)
gene df: (121239, 2000)


Filter Data/Metadata

In [4]:
double_injury_stages = set(orig_ident)
double_injury_stages = list(double_injury_stages)

day1 = [i for i in double_injury_stages if "CTL-P1" in i]
control_day28 = [i for i in double_injury_stages if "CTL-P28" in i]
control_day56 = [i for i in double_injury_stages if "CTL-P56" in i]
control_model = [day1, control_day28, control_day56]

print("Control Model:")
print("day1:", day1)
print("control_day28:", control_day28[0:5])
print("control_day56:", control_day56[0:5])


first_injury_day28 = [i for i in double_injury_stages if "AR1_P28" in i]
first_injury_day56 = [i for i in double_injury_stages if "AR1_P56" in i]
single_injury_model = [day1, first_injury_day28, first_injury_day56]

print("\nFirst Injury Model:")
print("day1:", day1)
print("first_injury_day28:", first_injury_day28[0:5])
print("first_injury_day56:", first_injury_day56[0:5])

double_injury_day28 = first_injury_day28
double_injury_day30 = [i for i in double_injury_stages if "AR1_MI28_P30" in i]
double_injury_day35 = [i for i in double_injury_stages if "AR1_MI28_P35" in i]
double_injury_day42 = [i for i in double_injury_stages if "AR1_MI28_P42" in i]
double_injury_day56 = [i for i in double_injury_stages if "AR1_MI28_P56" in i]
double_injury_model = [day1, double_injury_day28, double_injury_day30, double_injury_day35, double_injury_day42, double_injury_day56]

print("\nDouble Injury Model:")
print("day1:", day1)
print("first_injury_day28:", first_injury_day28[0:5])
print("double_injury_day30:", double_injury_day30[0:5])
print("double_injury_day35:", double_injury_day35[0:5])
print("double_injury_day42:", double_injury_day42[0:5])
print("double_injury_day56:", double_injury_day56[0:5])

Control Model:
day1: ['CTL-P1_8026_p1', 'CTL-P1_8094', 'CTL-P1_8095']
control_day28: ['CTL-P28_8046_RZ', 'CTL-P28_8046_BZ']
control_day56: ['CTL-P56_8052_RZ', 'CTL-P56_8052_AZ']

First Injury Model:
day1: ['CTL-P1_8026_p1', 'CTL-P1_8094', 'CTL-P1_8095']
first_injury_day28: ['AR1_P28_8030_RZ', 'AR1_P28_8014BZ', 'AR1_P28_8014RZ', 'AR1_P28_\t8030_CZ']
first_injury_day56: ['AR1_P56_8097CZ', 'AR1_P56_8096RZ', 'AR1_P56_8097RZ', 'AR1_P56_8096CZ']

Double Injury Model:
day1: ['CTL-P1_8026_p1', 'CTL-P1_8094', 'CTL-P1_8095']
first_injury_day28: ['AR1_P28_8030_RZ', 'AR1_P28_8014BZ', 'AR1_P28_8014RZ', 'AR1_P28_\t8030_CZ']
double_injury_day30: ['AR1_MI28_P30_8064RZ', 'AR1_MI28_P30_8064AZ', 'AR1_MI28_P30_8064CZ']
double_injury_day35: ['AR1_MI28_P35_8065AZ', 'AR1_MI28_P35_8095RZ', 'AR1_MI28_P35_8065RZ', 'AR1_MI28_P35_8065CZ', 'AR1_MI28_P35_8095BZ']
double_injury_day42: ['AR1_MI28_P42_8094AZ', 'AR1_MI28_P42_8094BZ', 'AR1_MI28_P42_8094RZ']
double_injury_day56: ['AR1_MI28_P56_7995_BZ', 'AR1_MI28_P56_806

In [5]:
control = []
single_injury = []
double_injury = []

#iterate over each stage
for i in control_model:
    control += i

for i in double_injury_model:
    double_injury += i

for i in single_injury_model:
    single_injury += i

print(double_injury)

['CTL-P1_8026_p1', 'CTL-P1_8094', 'CTL-P1_8095', 'AR1_P28_8030_RZ', 'AR1_P28_8014BZ', 'AR1_P28_8014RZ', 'AR1_P28_\t8030_CZ', 'AR1_MI28_P30_8064RZ', 'AR1_MI28_P30_8064AZ', 'AR1_MI28_P30_8064CZ', 'AR1_MI28_P35_8065AZ', 'AR1_MI28_P35_8095RZ', 'AR1_MI28_P35_8065RZ', 'AR1_MI28_P35_8065CZ', 'AR1_MI28_P35_8095BZ', 'AR1_MI28_P35_8095AZ', 'AR1_MI28_P42_8094AZ', 'AR1_MI28_P42_8094BZ', 'AR1_MI28_P42_8094RZ', 'AR1_MI28_P56_7995_BZ', 'AR1_MI28_P56_8060AZ', 'AR1_MI28_P56_8060IZ', 'AR1_MI28_P56_8060RZ', 'AR1_MI28_P56_7995_RZ']


In [6]:
metadata = metadata.drop(columns=metadata.columns.to_list()[1:], axis = 1)
metadata

Unnamed: 0,orig.ident
AR1_MI28_P30_8064AZ_TTTGTTGTCCATCTGC,AR1_MI28_P30_8064AZ
AR1_MI28_P30_8064AZ_TTTGTTGTCAAATGAG,AR1_MI28_P30_8064AZ
AR1_MI28_P30_8064AZ_TTTGTTGCATGGCCAC,AR1_MI28_P30_8064AZ
AR1_MI28_P30_8064AZ_TTTGTTGAGGGCAGAG,AR1_MI28_P30_8064AZ
AR1_MI28_P30_8064AZ_TTTGGTTTCACGTCCT,AR1_MI28_P30_8064AZ
...,...
FH_Embryo_FH3_TAAGCACTCATTGCGA,FH_Embryo_FH3
FH_Embryo_FH3_TAAGCCACAAGGTCGA,FH_Embryo_FH3
FH_Embryo_FH3_TCACATTTCTGGGCGT,FH_Embryo_FH3
FH_Embryo_FH3_TAAGCCATCGAAACAA,FH_Embryo_FH3


In [7]:
#experiment and day columns
from curses import meta


experiment = []
cell_day = []

for i in tqdm(metadata["orig.ident"], desc = "adding node data"):
    #find metadata
    if i in control: 
        experiment.append("control")
    elif i in single_injury: 
        experiment.append("single_injury")
    elif i in double_injury: 
        experiment.append("double_injury")
    else:
        experiment.append("fetal")

    #find cell day
    if "P" in i:
        day = i[i.find("P"): i.find("P") + 3]
        day = day.strip("_")
        cell_day.append(day)
    else:
        cell_day.append("NA")

metadata["cell_day"] = cell_day
metadata["experiment"] = experiment
metadata["id"] = metadata.index.to_list()
metadata = metadata.reset_index()
metadata

adding node data: 100%|██████████| 121239/121239 [00:00<00:00, 850220.07it/s]


Unnamed: 0,index,orig.ident,cell_day,experiment,id
0,AR1_MI28_P30_8064AZ_TTTGTTGTCCATCTGC,AR1_MI28_P30_8064AZ,P30,double_injury,AR1_MI28_P30_8064AZ_TTTGTTGTCCATCTGC
1,AR1_MI28_P30_8064AZ_TTTGTTGTCAAATGAG,AR1_MI28_P30_8064AZ,P30,double_injury,AR1_MI28_P30_8064AZ_TTTGTTGTCAAATGAG
2,AR1_MI28_P30_8064AZ_TTTGTTGCATGGCCAC,AR1_MI28_P30_8064AZ,P30,double_injury,AR1_MI28_P30_8064AZ_TTTGTTGCATGGCCAC
3,AR1_MI28_P30_8064AZ_TTTGTTGAGGGCAGAG,AR1_MI28_P30_8064AZ,P30,double_injury,AR1_MI28_P30_8064AZ_TTTGTTGAGGGCAGAG
4,AR1_MI28_P30_8064AZ_TTTGGTTTCACGTCCT,AR1_MI28_P30_8064AZ,P30,double_injury,AR1_MI28_P30_8064AZ_TTTGGTTTCACGTCCT
...,...,...,...,...,...
121234,FH_Embryo_FH3_TAAGCACTCATTGCGA,FH_Embryo_FH3,,fetal,FH_Embryo_FH3_TAAGCACTCATTGCGA
121235,FH_Embryo_FH3_TAAGCCACAAGGTCGA,FH_Embryo_FH3,,fetal,FH_Embryo_FH3_TAAGCCACAAGGTCGA
121236,FH_Embryo_FH3_TCACATTTCTGGGCGT,FH_Embryo_FH3,,fetal,FH_Embryo_FH3_TCACATTTCTGGGCGT
121237,FH_Embryo_FH3_TAAGCCATCGAAACAA,FH_Embryo_FH3,,fetal,FH_Embryo_FH3_TAAGCCATCGAAACAA


Deploy Nodes

In [9]:
def create_nodes(tx, uid, orig_ident, cell_day, experiment) ->  None:
    query = "MERGE (p:cardiomyocite {id: $uid, orig_ident: $orig_ident, cell_day: $cell_day, experiment: $experiment})"
    tx.run(query, uid = uid, orig_ident = orig_ident, cell_day = cell_day, experiment = experiment)

In [10]:
for i in tqdm(range(len(metadata)), desc = "deploying nodes"):
    driver.session().execute_write(create_nodes, metadata["index"][i], metadata["orig.ident"][i], metadata["cell_day"][i], metadata["experiment"][i])


deploying nodes:   7%|▋         | 8253/121239 [00:07<01:39, 1133.05it/s]


KeyboardInterrupt: 

## Preprocessing Cosine Similarity Scores as Edges - Only BZ Cardiomyocites in the Double Injury Model

In [8]:
#filter the data
double_injury_gene_expression = gene_expr_data[gene_expr_data["ident"].isin(double_injury_stages)]
double_injury_gene_expression = double_injury_gene_expression.drop("ident", axis=1)
double_injury_gene_expression = double_injury_gene_expression[double_injury_gene_expression.index.str.contains("BZ")]
double_injury_gene_expression

Unnamed: 0,TTN,LOC100513133,LOC110257246,CTDSP1,ANKRD1,RPP30,TPM1,MYH7,DDX5,RBPMS,...,LOC110261361,ELAVL2,LOC110260055,SH3GL2,LOC102166958,TRIM69,CRSP3,SLC25A31,LOC102161303,LOC100515185
AR1_MI28_P35_8095BZ_TTTGTTGCATATCTGG,0.467425,0.332108,0.731574,2.200207,0.613615,-1.013148,-0.407266,-0.572050,0.087421,-0.048497,...,-0.004879,-0.010554,-0.012284,-0.004765,-0.013779,-0.006163,-0.004045,-0.007989,-0.004059,-0.006214
AR1_MI28_P35_8095BZ_TTTGGAGTCATTGTGG,-0.070437,-0.332790,-1.077284,2.216636,-0.051857,-1.013148,0.449856,0.599104,0.660635,-2.139322,...,-0.004879,-0.010554,-0.012284,-0.004765,-0.013779,-0.006163,-0.004045,-0.007989,-0.004059,-0.006214
AR1_MI28_P35_8095BZ_TTTGACTGTCCCACGA,-1.332163,-0.074206,1.611027,2.069917,0.302472,-1.013148,-0.478497,-0.647818,-0.531607,0.656559,...,-0.004879,-0.010554,-0.012284,-0.004765,-0.013779,-0.006163,-0.004045,-0.007989,-0.004059,-0.006214
AR1_MI28_P35_8095BZ_TTTGACTGTATTGACC,-1.814213,-0.622343,1.019114,2.143851,-0.097912,-1.013148,0.047058,0.264348,0.587192,0.207004,...,-0.004879,-0.010554,-0.012284,-0.004765,-0.013779,-0.006163,-0.004045,-0.007989,-0.004059,-0.006214
AR1_MI28_P35_8095BZ_TTTGACTAGCTAAACA,0.114186,-0.662959,-0.092506,2.221168,-0.126373,-1.013148,-1.906602,0.217391,-0.040208,0.409670,...,-0.004879,-0.010554,-0.012284,-0.004765,-0.013779,-0.006163,-0.004045,-0.007989,-0.004059,-0.006214
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CTL-P28_8046_BZ_TTAATCCTCCACAGGC,0.645346,0.711840,-4.131956,-1.158253,0.470615,-1.013148,-1.906602,-2.166894,0.831475,-2.139322,...,-0.004879,-0.010554,-0.012284,-0.004765,-0.013779,-0.006163,-0.004045,-0.007989,-0.004059,-0.006214
CTL-P28_8046_BZ_GATTGGTTCGAGCACC,0.379556,0.201963,0.139636,0.165258,0.837427,-1.013148,-0.523033,0.673109,-0.576902,0.763035,...,-0.004879,-0.010554,-0.012284,-0.004765,-0.013779,-0.006163,-0.004045,-0.007989,-0.004059,-0.006214
CTL-P28_8046_BZ_AAACCCAGTAGATTGA,0.463571,0.373372,0.039541,-0.585707,-1.167797,0.227606,-1.906602,0.566168,0.629144,0.498283,...,-0.004879,-0.010554,-0.012284,-0.004765,-0.013779,-0.006163,-0.004045,-0.007989,-0.004059,-0.006214
CTL-P28_8046_BZ_ACTACGATCAGCGCGT,-0.411874,0.783018,-0.362240,-1.052314,0.524516,0.174271,-1.906602,-0.121845,-0.563511,0.917240,...,-0.004879,-0.010554,-0.012284,-0.004765,-0.013779,-0.006163,-0.004045,-0.007989,-0.004059,-0.006214


In [9]:
double_injury_gene_expression_numpy = double_injury_gene_expression.to_numpy()

The following data is only cardiomyocites that are in the double injury model, and are in the border zone

In [10]:
from sklearn.metrics.pairwise import cosine_similarity
similarity = cosine_similarity(double_injury_gene_expression_numpy)
similarity

array([[ 1.        ,  0.02008631, -0.02810687, ..., -0.01711679,
         0.00483678, -0.02559159],
       [ 0.02008631,  1.        ,  0.04058686, ...,  0.01291435,
        -0.02983589, -0.02592904],
       [-0.02810687,  0.04058686,  1.        , ...,  0.00542966,
         0.00530041, -0.04693524],
       ...,
       [-0.01711679,  0.01291435,  0.00542966, ...,  1.        ,
         0.06883092,  0.06913521],
       [ 0.00483678, -0.02983589,  0.00530041, ...,  0.06883092,
         1.        , -0.01571269],
       [-0.02559159, -0.02592904, -0.04693524, ...,  0.06913521,
        -0.01571269,  1.        ]])

In [11]:
np.shape(similarity)


(17805, 17805)

Calculating the Threshold

In [12]:
flat_list = [x for sublist in similarity for x in sublist]

In [13]:
hello = pd.Series(flat_list)

In [14]:
hello.describe()

count    3.170180e+08
mean     4.605784e-03
std      3.323918e-02
min     -1.375383e-01
25%     -1.777008e-02
50%      1.077527e-03
75%      2.296992e-02
max      1.000000e+00
dtype: float64

In [15]:
len(hello[hello > 1.077527e-03])

158509013

In [16]:
flat_list = np.array(flat_list)

In [17]:
flat_list[flat_list < 1.077527e-03] = 0

In [18]:
flat_list = flat_list.reshape(17805, 17805)

In [19]:
flat_list

array([[1.        , 0.02008631, 0.        , ..., 0.        , 0.00483678,
        0.        ],
       [0.02008631, 1.        , 0.04058686, ..., 0.01291435, 0.        ,
        0.        ],
       [0.        , 0.04058686, 1.        , ..., 0.00542966, 0.00530041,
        0.        ],
       ...,
       [0.        , 0.01291435, 0.00542966, ..., 1.        , 0.06883092,
        0.06913521],
       [0.00483678, 0.        , 0.00530041, ..., 0.06883092, 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.06913521, 0.        ,
        1.        ]])

In [20]:
final_data = pd.DataFrame(data = flat_list, index=double_injury_gene_expression.index, columns=double_injury_gene_expression.index)
final_data

Unnamed: 0,AR1_MI28_P35_8095BZ_TTTGTTGCATATCTGG,AR1_MI28_P35_8095BZ_TTTGGAGTCATTGTGG,AR1_MI28_P35_8095BZ_TTTGACTGTCCCACGA,AR1_MI28_P35_8095BZ_TTTGACTGTATTGACC,AR1_MI28_P35_8095BZ_TTTGACTAGCTAAACA,AR1_MI28_P35_8095BZ_TTTCCTCCATAGAGGC,AR1_MI28_P35_8095BZ_TTTCATGTCCCAGCGA,AR1_MI28_P35_8095BZ_TTTCATGGTTATCTGG,AR1_MI28_P35_8095BZ_TTTCATGCAGGCATGA,AR1_MI28_P35_8095BZ_TTTCAGTCACTAGAGG,...,CTL-P28_8046_BZ_AAACGAATCCCGTAAA,CTL-P28_8046_BZ_TTTGGTTTCTACGGGC,CTL-P28_8046_BZ_CTGCCTATCAACTACG,CTL-P28_8046_BZ_AAACGAACATGCAGGA,CTL-P28_8046_BZ_ATCAGGTTCTGTAACG,CTL-P28_8046_BZ_TTAATCCTCCACAGGC,CTL-P28_8046_BZ_GATTGGTTCGAGCACC,CTL-P28_8046_BZ_AAACCCAGTAGATTGA,CTL-P28_8046_BZ_ACTACGATCAGCGCGT,CTL-P28_8046_BZ_AAACCCACAATGCAGG
AR1_MI28_P35_8095BZ_TTTGTTGCATATCTGG,1.000000,0.020086,0.000000,0.006983,0.030046,0.030198,0.000000,0.000000,0.025581,0.036798,...,0.044584,0.000000,0.032795,0.021407,0.000000,0.026495,0.000000,0.000000,0.004837,0.000000
AR1_MI28_P35_8095BZ_TTTGGAGTCATTGTGG,0.020086,1.000000,0.040587,0.071119,0.009934,0.020271,0.000000,0.063010,0.042199,0.000000,...,0.000000,0.000000,0.038688,0.000000,0.000000,0.000000,0.000000,0.012914,0.000000,0.000000
AR1_MI28_P35_8095BZ_TTTGACTGTCCCACGA,0.000000,0.040587,1.000000,0.014326,0.015996,0.029914,0.004493,0.055542,0.001675,0.072074,...,0.002595,0.007139,0.004742,0.000000,0.000000,0.000000,0.000000,0.005430,0.005300,0.000000
AR1_MI28_P35_8095BZ_TTTGACTGTATTGACC,0.006983,0.071119,0.014326,1.000000,0.038164,0.000000,0.038847,0.046242,0.024361,0.037917,...,0.000000,0.000000,0.000000,0.049563,0.000000,0.013685,0.000000,0.000000,0.025487,0.000000
AR1_MI28_P35_8095BZ_TTTGACTAGCTAAACA,0.030046,0.009934,0.015996,0.038164,1.000000,0.019954,0.031426,0.039679,0.025915,0.056774,...,0.000000,0.000000,0.023063,0.000000,0.000000,0.052052,0.000000,0.000000,0.001511,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
CTL-P28_8046_BZ_TTAATCCTCCACAGGC,0.026495,0.000000,0.000000,0.013685,0.052052,0.000000,0.000000,0.033463,0.008447,0.000000,...,0.000000,0.000000,0.000000,0.010401,0.000000,1.000000,0.063848,0.000000,0.046216,0.000000
CTL-P28_8046_BZ_GATTGGTTCGAGCACC,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.006977,0.000000,0.021305,0.000000,...,0.058177,0.056592,0.039528,0.024927,0.001447,0.063848,1.000000,0.000000,0.000000,0.000000
CTL-P28_8046_BZ_AAACCCAGTAGATTGA,0.000000,0.012914,0.005430,0.000000,0.000000,0.022351,0.000000,0.000000,0.010179,0.000000,...,0.015941,0.069346,0.034999,0.103545,0.065709,0.000000,0.000000,1.000000,0.068831,0.069135
CTL-P28_8046_BZ_ACTACGATCAGCGCGT,0.004837,0.000000,0.005300,0.025487,0.001511,0.043322,0.000000,0.125044,0.000000,0.026337,...,0.000000,0.044030,0.030643,0.037302,0.008159,0.046216,0.000000,0.068831,1.000000,0.000000


## Implementing the Cosine Similarity as Edges

In [38]:
lookup = final_data.index.to_list()
print(len(lookup))
print(lookup[0:5])

17805
['AR1_MI28_P35_8095BZ_TTTGTTGCATATCTGG', 'AR1_MI28_P35_8095BZ_TTTGGAGTCATTGTGG', 'AR1_MI28_P35_8095BZ_TTTGACTGTCCCACGA', 'AR1_MI28_P35_8095BZ_TTTGACTGTATTGACC', 'AR1_MI28_P35_8095BZ_TTTGACTAGCTAAACA']


In [None]:
def create_nodes(tx, uid, orig_ident, cell_day, experiment) ->  None:
    query = "MERGE (p:cardiomyocite {id: $uid, orig_ident: $orig_ident, cell_day: $cell_day, experiment: $experiment})"
    tx.run(query, uid = uid, orig_ident = orig_ident, cell_day = cell_day, experiment = experiment)

In [40]:
for index, datum in tqdm(enumerate(final_data.itertuples())):
    if index > 2:
        break
    print(datum[0:4])

3it [00:00, 653.62it/s]

('AR1_MI28_P35_8095BZ_TTTGTTGCATATCTGG', 1.0000000000000013, 0.020086308215873143, 0.0)
('AR1_MI28_P35_8095BZ_TTTGGAGTCATTGTGG', 0.020086308215873143, 0.9999999999999998, 0.04058686319469985)
('AR1_MI28_P35_8095BZ_TTTGACTGTCCCACGA', 0.0, 0.04058686319469985, 0.9999999999999989)





In [23]:
print("hello")

hello
