In [None]:
import pickle
import pandas as pd
import hashlib

import numpy as np
import os
from ete3 import NCBITaxa
from datetime import datetime

from dotenv import load_dotenv
import ontology_mapper

import shutil
from tqdm import tqdm

from IPython.display import clear_output


## File paths

In [None]:


PARENT_DATA_PATH = '../data'
DATA_PATH = '../data/genelab_data_in_csv_format.pickle'
FILENAME = '20231220.csv'



## Setting Bioportal API and checking the directories

In [None]:
load_dotenv('../.env')
apikey = os.getenv("BIOPORTAL_API_KEY")
if not apikey:
    raise Exception("BIOPORTAL_API_KEY is not set in the .env file!")

NEO4J_DATA = os.getenv('NEO4J_DATA')
node_dir = os.path.join(NEO4J_DATA, 'nodes')
rel_dir = os.path.join(NEO4J_DATA, 'relationships')

shutil.rmtree(node_dir, ignore_errors=True)
shutil.rmtree(rel_dir, ignore_errors=True)

os.makedirs(NEO4J_DATA, exist_ok=True)
os.makedirs(node_dir, exist_ok=True)
os.makedirs(rel_dir, exist_ok=True)




## Get Metadata based on the selection criteria

In [None]:
spoke_relevant_assays = ['RNA Sequencing (RNA-Seq)', 'DNA microarray', 'mass spectrometry', 'nucleotide sequencing', '16S']
organism_to_exclude = ['Viridiplantae', 'Fungi']

metadata_df = pd.read_csv(os.path.join(PARENT_DATA_PATH, FILENAME), header=1)

ncbi = NCBITaxa()
lineage_superkingdom = [None]*metadata_df.shape[0]
lineage_kingdom = [None]*metadata_df.shape[0]
for index, row in metadata_df.iterrows():
    try:
        tax_id = row['organism.term accession number'].split('/')[-1]
        lineage = ncbi.get_lineage(tax_id)
        lineage_names = ncbi.get_taxid_translator(lineage)
        for item in lineage_names:
            rank = list(ncbi.get_rank([item]).values())[0]
            if rank == 'superkingdom':
                lineage_superkingdom[index] = lineage_names[item]
            if rank == 'kingdom':
                lineage_kingdom[index] = lineage_names[item]
    except:
        continue
metadata_df.loc[:, 'lineage_superkingdom'] = lineage_superkingdom
metadata_df.loc[:, 'lineage_kingdom'] = lineage_kingdom

metadata_df = metadata_df[metadata_df['GL-processed']!='False']
metadata_df_ = metadata_df[metadata_df['study assay technology type'].isin(spoke_relevant_assays)]
metadata_df_without_plants_fungi = metadata_df_[~metadata_df_['lineage_kingdom'].isin(organism_to_exclude)]
metadata_df_without_plants_fungi = metadata_df_without_plants_fungi[metadata_df_without_plants_fungi.organism != 'Not Applicable']
metadata_df_without_plants_fungi.dropna(subset=['lineage_kingdom'], inplace=True)
metadata_df_without_plants_fungi.dropna(subset=['organism'], inplace=True)

metadata_df_without_plants_fungi_ = metadata_df_without_plants_fungi.dropna(subset=['material type.term accession number'])
metadata_df_without_plants_fungi_.loc[:,'material_type_id'] = metadata_df_without_plants_fungi_['material type.term accession number'].apply(lambda x:x.split('/')[-1].split('_')[0])

metadata_df_without_plants_fungi_after_uberon_filter = metadata_df_without_plants_fungi_[(metadata_df_without_plants_fungi_.material_type_id.str.startswith('D')) | 
                                   (metadata_df_without_plants_fungi_.material_type_id == 'UBERON') |
                                 (metadata_df_without_plants_fungi_['material_type_id'].str.contains('fma'))|
                                 (metadata_df_without_plants_fungi_['material_type_id'].str.contains('EHDA')) |
                                 (metadata_df_without_plants_fungi_['material_type_id'].str.contains('owl'))]

clear_output()


## Open saved Genelab csv files

In [None]:
%%time

with open(DATA_PATH, 'rb') as f:
    data = pickle.load(f)

In [None]:
study_ids = list(map(lambda x:x['metadata']['accession'], data))
assay_ids = list(map(lambda x:'GLDS'+x.split('OSD')[-1], study_ids))


In [None]:
assay_factor_list = []
for index,item in enumerate(data):
    factors_list = []
    for col in list(item['data'].columns):
        if 'Log2fc_' in col:
            factors_list.append(col.split('Log2fc_')[-1])
    assay_factor_list.append((assay_ids[index], factors_list))
            
assay_factor_list_df = pd.DataFrame(assay_factor_list, columns=['assay_id', 'factor_list'])
assay_factor_list_df
                    

In [None]:


assay_unique_factor_list = []

for index, row in assay_factor_list_df.iterrows():
    item_list = row.factor_list
    unique_list = []
    reverse_list = []
    for item in item_list:
        if item in reverse_list:
            continue
        else:
            unique_list.append(item)
            item_split = item.split(')v(')
            item_split[0] = item_split[0]+')'
            item_split[1] = '('+item_split[1]
            reverse_list.append('v'.join(reversed(item_split)))
    unique_list_new = []
    for item in unique_list:
        if ('(Space Flight' in item) & (item.startswith('(Ground Control')):
            item_split = item.split(')v(')
            new_item = '(' + item_split[1] + 'v' + item_split[0] + ')'
            unique_list_new.append(new_item)
        else:
            unique_list_new.append(item)
    assay_unique_factor_list.append((row['assay_id'], unique_list_new))
    
assay_unique_factor_list_df = pd.DataFrame(assay_unique_factor_list, columns=['assay_id', 'factor_list'])
assay_unique_factor_list_df.loc[:,'accession'] = study_ids

assay_unique_factor_list_df = assay_unique_factor_list_df.explode('factor_list')

assay_unique_factor_list_df = pd.merge(assay_unique_factor_list_df, metadata_df_without_plants_fungi_after_uberon_filter[['accession', 'assay name', 'study assay technology type']], on='accession').drop_duplicates(subset=['assay_id', 'factor_list']).reset_index().drop('index', axis=1)

assay_unique_factor_list_df.loc[:,'factors_1'] = assay_unique_factor_list_df.factor_list.apply(lambda x:x.split(')v(')[0][1:].split('&'))
assay_unique_factor_list_df.loc[:,'factors_2'] = assay_unique_factor_list_df.factor_list.apply(lambda x:x.split(')v(')[1][0:-1].split('&'))


## Considering Assay nodes with only significant measurements


In [None]:
assay_unique_factor_list_with_significant_p_value = []
for index, row in tqdm(assay_unique_factor_list_df.iterrows()):
    data_index = np.where(row['accession']==np.array(study_ids))[0][0]
    df = data[data_index]['data']
    df = df[df['Adj.p.value_'+row['factor_list']] < 0.05]
    if df.shape[0] > 0:
        assay_unique_factor_list_with_significant_p_value.append(row)

assay_unique_factor_list_df = pd.DataFrame(assay_unique_factor_list_with_significant_p_value)


6it [00:00, 45.85it/s]

11it [00:00, 34.28it/s]

21it [00:00, 56.53it/s]

30it [00:00, 65.32it/s]

38it [00:00, 66.73it/s]

46it [00:00, 68.20it/s]

54it [00:01, 49.10it/s]

61it [00:01, 52.67it/s]

71it [00:01, 62.03it/s]

82it [00:01, 71.93it/s]

92it [00:01, 78.25it/s]

104it [00:01, 80.32it/s]

113it [00:01, 78.48it/s]

122it [00:01, 79.27it/s]

132it [00:02, 30.59it/s]

139it [00:03, 19.26it/s]

144it [00:04, 12.44it/s]

148it [00:04, 12.39it/s]

151it [00:05, 10.40it/s]

154it [00:05, 10.32it/s]

157it [00:05, 11.43it/s]

159it [00:05, 10.60it/s]

161it [00:06, 10.94it/s]

163it [00:06,  7.56it/s]

165it [00:07,  5.72it/s]

167it [00:07,  6.18it/s]

168it [00:07,  5.90it/s]

169it [00:07,  6.10it/s]

170it [00:08,  5.82it/s]

172it [00:08,  6.16it/s]

173it [00:08,  5.07it/s]

174it [00:08,  5.20it/s]

175it [00:09,  5.32it/s]

177it [00:09,  7.51it/s]

178it [00:09,  7.59it/s]

179it [00:09,  6.66it/s]

181it [00:09,  8.71it/s]

183it [00:10,  6.16it/s]

184it [00:10,  5.98it/s]

185it [00:10,  5.82it/s]

186it [00:10,  5.57it/s]

187it [00:10,  5.25it/s]

188it [00:11,  4.85it/s]

189it [00:11,  4.90it/s]

191it [00:11,  6.93it/s]

192it [00:11,  6.40it/s]

193it [00:11,  5.30it/s]

194it [00:12,  5.32it/s]

196it [00:12,  5.95it/s]

198it [00:12,  7.71it/s]

199it [00:12,  7.38it/s]

201it [00:12,  7.92it/s]

202it [00:13,  6.46it/s]

203it [00:13,  6.18it/s]

204it [00:13,  5.69it/s]

205it [00:13,  6.02it/s]

206it [00:13,  5.87it/s]

207it [00:14,  5.23it/s]

208it [00:14,  4.57it/s]

209it [00:14,  4.13it/s]

210it [00:15,  3.86it/s]

211it [00:15,  4.18it/s]

212it [00:15,  4.49it/s]

213it [00:15,  4.70it/s]

214it [00:15,  5.21it/s]

215it [00:15,  5.09it/s]

216it [00:16,  5.24it/s]

217it [00:16,  5.04it/s]

218it [00:16,  4.58it/s]

219it [00:16,  4.92it/s]

220it [00:16,  5.18it/s]

222it [00:17,  7.26it/s]

224it [00:17,  7.22it/s]

226it [00:17,  7.63it/s]

227it [00:17,  6.73it/s]

228it [00:18,  5.32it/s]

229it [00:18,  4.37it/s]

230it [00:18,  3.77it/s]

231it [00:19,  3.45it/s]

232it [00:19,  3.56it/s]

233it [00:19,  3.47it/s]

234it [00:20,  3.34it/s]

235it [00:20,  3.80it/s]

236it [00:20,  4.36it/s]

237it [00:20,  4.33it/s]

238it [00:21,  3.91it/s]

239it [00:21,  3.57it/s]

240it [00:21,  3.51it/s]

241it [00:21,  3.57it/s]

242it [00:22,  3.67it/s]

243it [00:22,  3.80it/s]

244it [00:22,  3.81it/s]

246it [00:22,  5.01it/s]

248it [00:23,  5.34it/s]

249it [00:23,  4.68it/s]

250it [00:23,  4.27it/s]

251it [00:24,  3.89it/s]

252it [00:24,  4.56it/s]

253it [00:24,  4.44it/s]

254it [00:24,  4.16it/s]

256it [00:24,  6.44it/s]

258it [00:25,  6.17it/s]

259it [00:25,  5.20it/s]

260it [00:25,  4.98it/s]

261it [00:26,  5.08it/s]

262it [00:26,  5.39it/s]

263it [00:26,  5.61it/s]

264it [00:26,  5.51it/s]

265it [00:26,  5.34it/s]

267it [00:26,  6.19it/s]

268it [00:27,  5.28it/s]

269it [00:27,  4.58it/s]

270it [00:27,  4.12it/s]

272it [00:28,  4.93it/s]

273it [00:28,  4.66it/s]

275it [00:28,  5.41it/s]

277it [00:28,  5.94it/s]

278it [00:29,  5.24it/s]

279it [00:29,  5.20it/s]

280it [00:29,  5.33it/s]

281it [00:29,  5.28it/s]

282it [00:30,  5.30it/s]

283it [00:30,  5.06it/s]

284it [00:30,  4.71it/s]

285it [00:30,  5.56it/s]

287it [00:30,  6.20it/s]

288it [00:31,  5.76it/s]

289it [00:31,  5.14it/s]

290it [00:31,  5.91it/s]

291it [00:31,  6.29it/s]

292it [00:31,  5.28it/s]

293it [00:32,  4.38it/s]

294it [00:32,  4.34it/s]

296it [00:32,  5.38it/s]

298it [00:32,  6.04it/s]

299it [00:33,  5.26it/s]

300it [00:33,  4.77it/s]

301it [00:33,  4.40it/s]

302it [00:34,  3.88it/s]

303it [00:34,  3.75it/s]

305it [00:34,  5.29it/s]

306it [00:34,  4.78it/s]

307it [00:35,  4.75it/s]

308it [00:35,  5.06it/s]

309it [00:35,  4.53it/s]

310it [00:35,  3.93it/s]

311it [00:36,  3.86it/s]

313it [00:36,  5.54it/s]

314it [00:36,  5.84it/s]

315it [00:36,  5.69it/s]

316it [00:36,  4.74it/s]

317it [00:37,  4.25it/s]

318it [00:37,  3.95it/s]

319it [00:37,  3.58it/s]

320it [00:38,  3.48it/s]

321it [00:38,  4.29it/s]

322it [00:38,  3.99it/s]

323it [00:38,  3.99it/s]

324it [00:38,  4.27it/s]

325it [00:39,  3.99it/s]

326it [00:39,  3.62it/s]

327it [00:39,  3.55it/s]

328it [00:40,  3.96it/s]

330it [00:40,  5.13it/s]

331it [00:40,  5.03it/s]

332it [00:40,  4.49it/s]

333it [00:41,  4.14it/s]

334it [00:41,  3.89it/s]

335it [00:41,  3.54it/s]

336it [00:42,  3.48it/s]

337it [00:42,  3.38it/s]

338it [00:42,  3.53it/s]

339it [00:42,  3.84it/s]

340it [00:43,  3.71it/s]

341it [00:43,  3.51it/s]

342it [00:43,  3.45it/s]

343it [00:43,  3.74it/s]

344it [00:44,  4.22it/s]

345it [00:44,  4.38it/s]

346it [00:44,  4.42it/s]

347it [00:44,  4.09it/s]

348it [00:45,  3.84it/s]

349it [00:45,  3.69it/s]

350it [00:45,  3.46it/s]

351it [00:46,  3.41it/s]

352it [00:46,  3.64it/s]

353it [00:46,  3.73it/s]

354it [00:46,  4.24it/s]

355it [00:46,  4.47it/s]

357it [00:47,  5.32it/s]

358it [00:47,  4.74it/s]

359it [00:47,  4.84it/s]

360it [00:47,  4.88it/s]

361it [00:48,  5.06it/s]

362it [00:48,  5.23it/s]

363it [00:48,  5.14it/s]

364it [00:48,  4.80it/s]

365it [00:48,  5.25it/s]

367it [00:49,  6.17it/s]

368it [00:49,  5.20it/s]

369it [00:49,  5.15it/s]

371it [00:49,  6.35it/s]

372it [00:49,  6.85it/s]

373it [00:50,  6.67it/s]

374it [00:50,  6.07it/s]

375it [00:50,  5.83it/s]

376it [00:50,  5.56it/s]

377it [00:50,  4.78it/s]

378it [00:51,  4.63it/s]

379it [00:51,  4.38it/s]

380it [00:51,  3.93it/s]

381it [00:52,  4.02it/s]

383it [00:52,  5.65it/s]

385it [00:52,  6.51it/s]

386it [00:52,  5.73it/s]

387it [00:52,  5.30it/s]

388it [00:53,  5.08it/s]

389it [00:53,  4.44it/s]

390it [00:53,  4.22it/s]

391it [00:53,  4.96it/s]

392it [00:53,  5.35it/s]

393it [00:54,  4.79it/s]

394it [00:54,  4.31it/s]

395it [00:54,  4.40it/s]

396it [00:54,  4.71it/s]

397it [00:55,  4.99it/s]

398it [00:55,  5.28it/s]

399it [00:55,  5.25it/s]

400it [00:55,  5.26it/s]

402it [00:55,  6.09it/s]

403it [00:56,  4.94it/s]

404it [00:56,  4.22it/s]

405it [00:56,  3.99it/s]

406it [00:57,  4.02it/s]

407it [00:57,  4.11it/s]

408it [00:57,  4.19it/s]

409it [00:57,  4.16it/s]

410it [00:57,  4.27it/s]

411it [00:58,  4.76it/s]

412it [00:58,  4.77it/s]

413it [00:58,  4.49it/s]

414it [00:58,  4.81it/s]

415it [00:58,  5.10it/s]

416it [00:59,  5.21it/s]

417it [00:59,  5.18it/s]

418it [00:59,  5.03it/s]

419it [00:59,  4.58it/s]

420it [00:59,  4.89it/s]

421it [01:00,  5.46it/s]

423it [01:00,  6.62it/s]

424it [01:00,  5.75it/s]

425it [01:00,  5.24it/s]

426it [01:01,  4.84it/s]

427it [01:01,  4.22it/s]

428it [01:01,  4.06it/s]

429it [01:01,  4.34it/s]

430it [01:02,  4.48it/s]

431it [01:02,  4.10it/s]

432it [01:02,  3.91it/s]

433it [01:02,  3.77it/s]

434it [01:03,  3.49it/s]

435it [01:03,  3.47it/s]

437it [01:03,  4.61it/s]

438it [01:03,  4.81it/s]

439it [01:04,  4.82it/s]

440it [01:04,  4.42it/s]

441it [01:04,  4.57it/s]

442it [01:04,  4.68it/s]

443it [01:05,  4.79it/s]

444it [01:05,  4.70it/s]

445it [01:05,  4.35it/s]

446it [01:05,  4.59it/s]

448it [01:05,  6.25it/s]

449it [01:06,  5.74it/s]

451it [01:06,  7.80it/s]

452it [01:06,  7.03it/s]

454it [01:06,  6.90it/s]

455it [01:06,  6.65it/s]

456it [01:07,  6.47it/s]

465it [01:07, 21.53it/s]

472it [01:07, 29.17it/s]

487it [01:07, 54.89it/s]

513it [01:07, 102.37it/s]

541it [01:07, 146.52it/s]

559it [01:08, 64.53it/s] 

572it [01:08, 65.11it/s]

584it [01:08, 61.92it/s]

594it [01:08, 61.35it/s]

660it [01:09, 121.19it/s]

673it [01:09, 115.46it/s]

692it [01:09, 124.69it/s]

706it [01:09, 123.81it/s]

719it [01:09, 94.86it/s] 

732it [01:09, 98.57it/s]

746it [01:10, 103.28it/s]

766it [01:10, 115.58it/s]

789it [01:10, 141.20it/s]

805it [01:10, 109.86it/s]

818it [01:10, 107.69it/s]

834it [01:10, 119.08it/s]

848it [01:10, 123.20it/s]

868it [01:10, 141.73it/s]

887it [01:11, 150.52it/s]

910it [01:11, 167.02it/s]

933it [01:11, 183.71it/s]

954it [01:11, 189.61it/s]

975it [01:11, 195.27it/s]

1024it [01:11, 279.35it/s]

1053it [01:11, 261.76it/s]

1080it [01:12, 145.15it/s]

1101it [01:12, 149.02it/s]

1121it [01:12, 138.85it/s]

1139it [01:12, 141.34it/s]

1158it [01:12, 151.51it/s]

1176it [01:12, 157.47it/s]

1194it [01:12, 147.12it/s]

1215it [01:13, 127.16it/s]

1232it [01:13, 136.03it/s]

1247it [01:13, 128.65it/s]

1296it [01:13, 212.91it/s]

1335it [01:15, 45.82it/s] 

1353it [01:22, 10.29it/s]

1366it [01:27,  6.64it/s]

1375it [01:32,  5.09it/s]

1382it [01:34,  4.42it/s]

1387it [01:36,  4.03it/s]

1391it [01:38,  3.65it/s]

1394it [01:40,  3.33it/s]

1396it [01:40,  3.36it/s]

1398it [01:41,  3.17it/s]

1399it [01:42,  3.07it/s]

1400it [01:42,  2.90it/s]

1401it [01:43,  2.78it/s]

1402it [01:43,  2.61it/s]

1403it [01:44,  2.46it/s]

1404it [01:44,  2.49it/s]

1405it [01:44,  2.53it/s]

1406it [01:45,  2.46it/s]

1407it [01:45,  2.41it/s]

1408it [01:46,  2.31it/s]

1409it [01:46,  2.35it/s]

1410it [01:46,  2.44it/s]

1411it [01:47,  2.38it/s]

1413it [01:47,  3.13it/s]

1414it [01:48,  3.09it/s]

1415it [01:48,  2.85it/s]

1416it [01:49,  2.67it/s]

1417it [01:49,  2.48it/s]

1418it [01:49,  2.48it/s]

1419it [01:50,  2.52it/s]

1420it [01:50,  2.44it/s]

1422it [01:51,  3.18it/s]

1423it [01:51,  3.09it/s]

1424it [01:51,  2.83it/s]

1425it [01:52,  2.65it/s]

1426it [01:52,  2.45it/s]

1427it [01:53,  2.46it/s]

1428it [01:53,  2.53it/s]

1429it [01:54,  2.45it/s]

1431it [01:54,  3.19it/s]

1432it [01:54,  3.04it/s]

1433it [01:55,  2.80it/s]

1434it [01:55,  2.64it/s]

1435it [01:56,  2.44it/s]

1436it [01:56,  2.44it/s]

1437it [01:57,  2.41it/s]

1438it [01:57,  2.31it/s]

1439it [01:57,  2.23it/s]

1440it [01:58,  2.15it/s]

1441it [01:58,  2.19it/s]

1442it [01:59,  2.26it/s]

1443it [01:59,  2.20it/s]

1444it [02:00,  2.30it/s]

1446it [02:00,  2.99it/s]

1447it [02:01,  2.73it/s]

1448it [02:01,  2.51it/s]

1449it [02:02,  2.33it/s]

1450it [02:02,  2.32it/s]

1451it [02:02,  2.37it/s]

1452it [02:03,  2.28it/s]

1453it [02:03,  2.38it/s]

1455it [02:04,  3.06it/s]

1456it [02:04,  2.78it/s]

1457it [02:05,  2.54it/s]

1458it [02:05,  2.37it/s]

1459it [02:06,  2.35it/s]

1460it [02:06,  2.37it/s]

1461it [02:07,  2.28it/s]

1462it [02:07,  2.36it/s]

1463it [02:07,  2.98it/s]

1464it [02:07,  2.82it/s]

1465it [02:08,  2.55it/s]

1466it [02:08,  2.36it/s]

1467it [02:09,  2.23it/s]

1468it [02:09,  2.23it/s]

1469it [02:10,  2.25it/s]

1470it [02:10,  2.35it/s]

1471it [02:11,  2.31it/s]

1472it [02:11,  2.33it/s]

1473it [02:11,  2.35it/s]

1474it [02:12,  2.34it/s]

1475it [02:12,  2.41it/s]

1476it [02:13,  2.38it/s]

1477it [02:13,  2.58it/s]

1478it [02:13,  2.55it/s]

1479it [02:14,  2.53it/s]

1480it [02:14,  2.41it/s]

1481it [02:15,  2.40it/s]

1482it [02:15,  2.38it/s]

1483it [02:16,  2.33it/s]

1484it [02:16,  2.39it/s]

1485it [02:16,  2.37it/s]

1486it [02:17,  2.89it/s]

1487it [02:17,  2.79it/s]

1488it [02:17,  2.70it/s]

1489it [02:18,  2.51it/s]

1490it [02:18,  2.48it/s]

1491it [02:19,  2.46it/s]

1492it [02:19,  2.41it/s]

1493it [02:19,  2.46it/s]

1494it [02:20,  2.41it/s]

1496it [02:20,  3.07it/s]

1497it [02:21,  2.93it/s]

1498it [02:21,  2.68it/s]

1499it [02:22,  2.58it/s]

1500it [02:22,  2.41it/s]

1501it [02:23,  2.35it/s]

1502it [02:23,  2.26it/s]

1503it [02:24,  2.21it/s]

1504it [02:24,  2.11it/s]

1505it [02:25,  2.12it/s]

1506it [02:25,  2.11it/s]

1507it [02:25,  2.15it/s]

1509it [02:26,  2.74it/s]

1510it [02:26,  2.59it/s]

1511it [02:27,  2.44it/s]

1512it [02:27,  2.34it/s]

1513it [02:28,  2.19it/s]

1514it [02:28,  2.18it/s]

1515it [02:29,  2.16it/s]

1516it [02:29,  2.19it/s]

1518it [02:30,  2.76it/s]

1519it [02:30,  2.61it/s]

1520it [02:31,  2.47it/s]

1521it [02:31,  2.36it/s]

1522it [02:32,  2.23it/s]

1523it [02:32,  2.21it/s]

1524it [02:33,  2.17it/s]

1525it [02:33,  2.20it/s]

1527it [02:34,  2.79it/s]

1528it [02:34,  2.62it/s]

1529it [02:34,  2.48it/s]

1530it [02:35,  2.30it/s]

1531it [02:35,  2.26it/s]

1532it [02:36,  2.21it/s]

1533it [02:36,  2.22it/s]

1534it [02:37,  2.24it/s]

1535it [02:37,  2.18it/s]

1536it [02:38,  2.25it/s]

1537it [02:38,  2.21it/s]

1539it [02:39,  2.74it/s]

1540it [02:39,  2.56it/s]

1541it [02:40,  2.42it/s]

1542it [02:40,  2.35it/s]

1543it [02:41,  2.33it/s]

1544it [02:41,  2.25it/s]

1545it [02:41,  2.35it/s]

1546it [02:42,  2.27it/s]

1548it [02:42,  2.76it/s]

1549it [02:43,  2.57it/s]

1550it [02:43,  2.43it/s]

1551it [02:44,  2.37it/s]

1552it [02:44,  2.35it/s]

1553it [02:45,  2.25it/s]

1554it [02:45,  2.33it/s]

1555it [02:46,  2.25it/s]

1557it [02:46,  2.76it/s]

1558it [02:47,  2.57it/s]

1559it [02:47,  2.39it/s]

1560it [02:48,  2.27it/s]

1561it [02:48,  2.14it/s]

1562it [02:49,  2.11it/s]

1563it [02:49,  2.08it/s]

1564it [02:50,  2.09it/s]

1565it [02:50,  2.13it/s]

1566it [02:51,  2.05it/s]

1567it [02:51,  2.66it/s]

1568it [02:51,  2.47it/s]

1569it [02:52,  2.30it/s]

1570it [02:52,  2.17it/s]

1571it [02:53,  2.12it/s]

1572it [02:53,  2.07it/s]

1573it [02:54,  2.08it/s]

1574it [02:54,  2.11it/s]

1575it [02:55,  2.06it/s]

1577it [02:55,  2.63it/s]

1578it [02:56,  2.42it/s]

1579it [02:56,  2.26it/s]

1580it [02:57,  2.19it/s]

1581it [02:57,  2.12it/s]

1582it [02:58,  2.10it/s]

1583it [02:58,  2.13it/s]

1584it [02:59,  2.09it/s]

1586it [02:59,  2.60it/s]

1587it [03:00,  2.60it/s]

1588it [03:00,  2.41it/s]

1589it [03:00,  2.44it/s]

1590it [03:01,  2.43it/s]

1591it [03:01,  2.56it/s]

1592it [03:02,  2.44it/s]

1593it [03:02,  2.35it/s]

1594it [03:03,  2.23it/s]

1595it [03:03,  2.88it/s]

1596it [03:03,  2.88it/s]

1597it [03:04,  2.60it/s]

1598it [03:04,  2.58it/s]

1599it [03:04,  2.50it/s]

1600it [03:05,  2.56it/s]

1601it [03:05,  2.42it/s]

1602it [03:06,  2.34it/s]

1603it [03:06,  2.22it/s]

1604it [03:06,  2.90it/s]

1605it [03:07,  2.90it/s]

1606it [03:07,  2.59it/s]

1607it [03:08,  2.55it/s]

1608it [03:08,  2.48it/s]

1609it [03:08,  2.48it/s]

1610it [03:09,  2.38it/s]

1611it [03:09,  2.31it/s]

1612it [03:10,  2.21it/s]

1613it [03:10,  2.77it/s]

1614it [03:10,  2.58it/s]

1615it [03:11,  2.57it/s]

1616it [03:11,  2.56it/s]

1617it [03:11,  2.92it/s]

1618it [03:12,  2.71it/s]

1619it [03:12,  2.49it/s]

1620it [03:13,  2.30it/s]

1621it [03:13,  2.38it/s]

1622it [03:13,  2.81it/s]

1623it [03:14,  2.61it/s]

1624it [03:14,  2.60it/s]

1625it [03:15,  2.57it/s]

1626it [03:15,  2.64it/s]

1627it [03:15,  2.48it/s]

1628it [03:16,  2.36it/s]

1629it [03:16,  2.23it/s]

1630it [03:17,  2.33it/s]

1632it [03:17,  2.89it/s]

1633it [03:18,  2.80it/s]

1634it [03:18,  2.72it/s]

1635it [03:19,  2.63it/s]

1636it [03:19,  2.47it/s]

1637it [03:19,  2.33it/s]

1638it [03:20,  2.21it/s]

1639it [03:20,  2.30it/s]

1640it [03:21,  2.28it/s]

1641it [03:21,  2.19it/s]

1642it [03:22,  2.19it/s]

1643it [03:22,  2.11it/s]

1644it [03:23,  2.15it/s]

1645it [03:23,  2.06it/s]

1646it [03:24,  2.03it/s]

1647it [03:24,  2.03it/s]

1649it [03:25,  2.65it/s]

1650it [03:25,  2.43it/s]

1651it [03:26,  2.37it/s]

1652it [03:26,  2.22it/s]

1653it [03:27,  2.20it/s]

1654it [03:27,  2.12it/s]

1655it [03:28,  2.08it/s]

1656it [03:28,  2.06it/s]

1658it [03:29,  2.69it/s]

1659it [03:29,  2.48it/s]

1660it [03:30,  2.40it/s]

1661it [03:30,  2.26it/s]

1662it [03:31,  2.23it/s]

1663it [03:31,  2.14it/s]

1664it [03:32,  2.07it/s]

1665it [03:32,  2.19it/s]

1666it [03:32,  2.37it/s]

1667it [03:33,  2.35it/s]

1668it [03:33,  2.32it/s]

1669it [03:34,  2.24it/s]

1670it [03:34,  2.29it/s]

1671it [03:35,  2.40it/s]

1672it [03:35,  2.33it/s]

1674it [03:35,  3.10it/s]

1675it [03:36,  3.03it/s]

1676it [03:36,  2.78it/s]

1677it [03:37,  2.65it/s]

1678it [03:37,  2.48it/s]

1679it [03:37,  2.49it/s]

1680it [03:38,  2.53it/s]

1681it [03:38,  2.43it/s]

1683it [03:39,  3.18it/s]

1684it [03:39,  3.05it/s]

1685it [03:40,  2.81it/s]

1686it [03:40,  2.64it/s]

1687it [03:40,  2.44it/s]

1688it [03:41,  2.45it/s]

1689it [03:41,  2.49it/s]

1690it [03:42,  2.39it/s]

1691it [03:42,  2.27it/s]

1692it [03:43,  2.17it/s]

1693it [03:43,  2.20it/s]

1694it [03:44,  2.28it/s]

1695it [03:44,  2.22it/s]

1696it [03:44,  2.33it/s]

1698it [03:45,  2.95it/s]

1699it [03:45,  2.69it/s]

1700it [03:46,  2.48it/s]

1701it [03:46,  2.33it/s]

1702it [03:47,  2.32it/s]

1703it [03:47,  2.37it/s]

1704it [03:48,  2.28it/s]

1705it [03:48,  2.37it/s]

1707it [03:48,  3.01it/s]

1708it [03:49,  2.73it/s]

1709it [03:49,  2.49it/s]

1710it [03:50,  2.32it/s]

1711it [03:50,  2.31it/s]

1712it [03:51,  2.31it/s]

1713it [03:51,  2.34it/s]

1714it [03:52,  2.30it/s]

1715it [03:52,  2.44it/s]

1716it [03:52,  2.70it/s]

1717it [03:53,  2.62it/s]

1718it [03:53,  2.69it/s]

1719it [03:53,  2.68it/s]

1720it [03:54,  3.23it/s]

1721it [03:54,  2.95it/s]

1722it [03:54,  2.78it/s]

1723it [03:55,  2.56it/s]

1724it [03:55,  2.64it/s]

1725it [03:55,  2.99it/s]

1726it [03:56,  2.78it/s]

1727it [03:56,  2.81it/s]

1728it [03:57,  2.72it/s]

1729it [03:57,  2.90it/s]

1730it [03:57,  2.72it/s]

1731it [03:58,  2.64it/s]

1732it [03:58,  2.47it/s]

1733it [03:59,  2.57it/s]

1734it [03:59,  2.45it/s]

1735it [03:59,  2.38it/s]

1736it [04:00,  2.30it/s]

1737it [04:00,  2.22it/s]

1738it [04:01,  2.11it/s]

1739it [04:01,  2.14it/s]

1740it [04:02,  2.12it/s]

1741it [04:02,  2.15it/s]

1743it [04:03,  2.74it/s]

1744it [04:03,  2.59it/s]

1745it [04:04,  2.46it/s]

1746it [04:04,  2.36it/s]

1747it [04:05,  2.23it/s]

1748it [04:05,  2.22it/s]

1749it [04:06,  2.18it/s]

1750it [04:06,  2.23it/s]

1752it [04:07,  2.82it/s]

1753it [04:07,  2.63it/s]

1754it [04:07,  2.50it/s]

1755it [04:08,  2.33it/s]

1756it [04:08,  2.26it/s]

1757it [04:09,  2.20it/s]

1758it [04:09,  2.21it/s]

1759it [04:10,  2.21it/s]

1760it [04:10,  2.15it/s]

1761it [04:11,  2.25it/s]

1762it [04:11,  2.22it/s]

1764it [04:12,  2.74it/s]

1765it [04:12,  2.54it/s]

1766it [04:13,  2.40it/s]

1767it [04:13,  2.34it/s]

1768it [04:14,  2.32it/s]

1769it [04:14,  2.20it/s]

1770it [04:15,  2.28it/s]

1771it [04:15,  2.24it/s]

1773it [04:16,  2.74it/s]

1774it [04:16,  2.53it/s]

1775it [04:17,  2.35it/s]

1776it [04:17,  2.22it/s]

1777it [04:18,  2.11it/s]

1778it [04:18,  2.07it/s]

1779it [04:19,  2.03it/s]

1780it [04:19,  2.06it/s]

1781it [04:20,  2.11it/s]

1782it [04:20,  2.05it/s]

1783it [04:20,  2.63it/s]

1784it [04:21,  2.46it/s]

1785it [04:21,  2.26it/s]

1786it [04:22,  2.12it/s]

1787it [04:22,  2.08it/s]

1788it [04:23,  2.02it/s]

1789it [04:23,  2.04it/s]

1790it [04:24,  2.09it/s]

1791it [04:24,  2.06it/s]

1793it [04:25,  2.59it/s]

1794it [04:25,  2.57it/s]

1795it [04:26,  2.41it/s]

1796it [04:26,  2.41it/s]

1797it [04:26,  2.37it/s]

1798it [04:27,  2.41it/s]

1799it [04:27,  2.35it/s]

1800it [04:28,  2.28it/s]

1801it [04:28,  2.20it/s]

1802it [04:28,  2.81it/s]

1803it [04:29,  2.84it/s]

1804it [04:29,  2.55it/s]

1805it [04:30,  2.52it/s]

1806it [04:30,  2.46it/s]

1807it [04:30,  2.44it/s]

1808it [04:31,  2.34it/s]

1809it [04:31,  2.27it/s]

1810it [04:32,  2.17it/s]

1812it [04:32,  2.72it/s]

1813it [04:33,  2.66it/s]

1814it [04:33,  2.61it/s]

1815it [04:34,  2.61it/s]

1816it [04:34,  2.47it/s]

1817it [04:35,  2.35it/s]

1818it [04:35,  2.22it/s]

1819it [04:35,  2.32it/s]

1820it [04:36,  2.94it/s]

1821it [04:36,  2.72it/s]

1822it [04:36,  2.68it/s]

1823it [04:37,  2.62it/s]

1824it [04:37,  2.56it/s]

1825it [04:38,  2.43it/s]

1826it [04:38,  2.30it/s]

1827it [04:39,  2.20it/s]

1828it [04:39,  2.28it/s]

1829it [04:40,  2.27it/s]

1830it [04:40,  2.20it/s]

1831it [04:40,  2.23it/s]

1832it [04:41,  2.13it/s]

1833it [04:41,  2.15it/s]

1834it [04:42,  2.07it/s]

1835it [04:42,  2.04it/s]

1836it [04:43,  2.04it/s]

1838it [04:43,  2.69it/s]

1839it [04:44,  2.50it/s]

1840it [04:44,  2.42it/s]

1841it [04:45,  2.27it/s]

1842it [04:45,  2.23it/s]

1843it [04:46,  2.13it/s]

1844it [04:46,  2.09it/s]

1845it [04:47,  2.21it/s]

1846it [04:47,  2.37it/s]

1847it [04:48,  2.33it/s]

1848it [04:48,  2.32it/s]

1849it [04:48,  2.26it/s]

1850it [04:49,  2.33it/s]

1851it [04:49,  2.42it/s]

1852it [04:50,  2.38it/s]

1854it [04:50,  3.17it/s]

1855it [04:50,  3.02it/s]

1856it [04:51,  2.78it/s]

1857it [04:51,  2.60it/s]

1858it [04:52,  2.43it/s]

1859it [04:52,  2.45it/s]

1860it [04:53,  2.45it/s]

1861it [04:53,  2.36it/s]

1862it [04:54,  2.25it/s]

1863it [04:54,  2.17it/s]

1864it [04:54,  2.21it/s]

1865it [04:55,  2.28it/s]

1866it [04:55,  2.22it/s]

1867it [04:56,  2.32it/s]

1869it [04:56,  2.97it/s]

1870it [04:57,  2.71it/s]

1871it [04:57,  2.48it/s]

1872it [04:58,  2.32it/s]

1873it [04:58,  2.29it/s]

1874it [04:59,  2.30it/s]

1875it [04:59,  2.40it/s]

1876it [04:59,  2.34it/s]

1877it [05:00,  2.41it/s]

1878it [05:00,  2.50it/s]

1879it [05:01,  2.45it/s]

1880it [05:01,  2.56it/s]

1881it [05:01,  2.55it/s]

1882it [05:01,  3.08it/s]

1883it [05:02,  2.85it/s]

1884it [05:02,  2.79it/s]

1885it [05:03,  2.57it/s]

1886it [05:03,  2.59it/s]

1887it [05:04,  2.42it/s]

1888it [05:04,  2.35it/s]

1889it [05:04,  2.29it/s]

1890it [05:05,  2.21it/s]

1891it [05:05,  2.11it/s]

1892it [05:06,  2.13it/s]

1893it [05:06,  2.12it/s]

1894it [05:07,  2.19it/s]

1896it [05:07,  2.80it/s]

1897it [05:08,  2.64it/s]

1898it [05:08,  2.47it/s]

1899it [05:09,  2.32it/s]

1900it [05:09,  2.27it/s]

1901it [05:10,  2.22it/s]

1902it [05:10,  2.21it/s]

1903it [05:11,  2.22it/s]

1904it [05:11,  2.17it/s]

1905it [05:11,  2.25it/s]

1906it [05:12,  2.20it/s]

1908it [05:12,  2.75it/s]

1909it [05:13,  2.59it/s]

1910it [05:13,  2.41it/s]

1911it [05:14,  2.26it/s]

1912it [05:14,  2.14it/s]

1913it [05:15,  2.11it/s]

1914it [05:15,  2.07it/s]

1915it [05:16,  2.08it/s]

1916it [05:16,  2.10it/s]

1917it [05:17,  2.07it/s]

1919it [05:17,  2.61it/s]

1920it [05:18,  2.60it/s]

1921it [05:18,  2.44it/s]

1922it [05:19,  2.45it/s]

1923it [05:19,  2.43it/s]

1924it [05:20,  2.43it/s]

1925it [05:20,  2.33it/s]

1926it [05:20,  2.28it/s]

1927it [05:21,  2.19it/s]

1928it [05:21,  2.81it/s]

1929it [05:22,  2.63it/s]

1930it [05:22,  2.60it/s]

1931it [05:22,  2.57it/s]

1932it [05:23,  2.52it/s]

1933it [05:23,  2.39it/s]

1934it [05:24,  2.29it/s]

1935it [05:24,  2.18it/s]

1936it [05:25,  2.27it/s]

1937it [05:25,  2.27it/s]

1938it [05:26,  2.19it/s]

1939it [05:26,  2.22it/s]

1940it [05:26,  2.14it/s]

1941it [05:27,  2.14it/s]

1942it [05:27,  2.06it/s]

1943it [05:28,  2.03it/s]

1944it [05:28,  2.17it/s]

1945it [05:29,  2.29it/s]

1946it [05:29,  2.25it/s]

1947it [05:30,  2.25it/s]

1948it [05:30,  2.21it/s]

1949it [05:31,  2.28it/s]

1950it [05:31,  2.30it/s]

1951it [05:31,  2.24it/s]

1952it [05:32,  2.14it/s]

1953it [05:32,  2.10it/s]

1954it [05:33,  2.13it/s]

1955it [05:33,  2.20it/s]

1956it [05:34,  2.31it/s]

1957it [05:34,  2.30it/s]

1958it [05:35,  2.33it/s]

1959it [05:35,  2.26it/s]

1960it [05:35,  2.25it/s]

1961it [05:36,  2.21it/s]

1962it [05:36,  2.13it/s]

1963it [05:37,  2.12it/s]

1964it [05:37,  2.08it/s]

1980it [05:38, 14.94it/s]

2004it [05:38, 37.95it/s]

2020it [05:38, 52.53it/s]

2034it [05:38, 65.68it/s]

2054it [05:38, 89.61it/s]

2075it [05:38, 109.16it/s]

2091it [05:38, 96.18it/s] 

2111it [05:38, 115.66it/s]

2126it [05:39, 107.02it/s]

2143it [05:39, 119.47it/s]

2167it [05:39, 147.42it/s]

2184it [05:39, 132.96it/s]

2200it [05:39, 119.91it/s]

2214it [05:39, 100.05it/s]

2226it [05:39, 103.79it/s]

2238it [05:40, 35.39it/s] 

2247it [05:42, 19.11it/s]

2254it [05:43, 13.39it/s]

2259it [05:44, 11.45it/s]

2263it [05:44, 10.36it/s]

2266it [05:45, 10.38it/s]

2269it [05:45,  9.03it/s]

2271it [05:45,  8.29it/s]

2273it [05:46,  8.03it/s]

2275it [05:46,  7.75it/s]

2276it [05:46,  7.52it/s]

2277it [05:46,  7.37it/s]

2278it [05:46,  7.11it/s]

2279it [05:47,  6.69it/s]

2280it [05:47,  6.52it/s]

2281it [05:47,  6.64it/s]

2282it [05:47,  6.85it/s]

2283it [05:47,  6.54it/s]

2284it [05:47,  6.72it/s]

2285it [05:48,  6.88it/s]

2286it [05:48,  6.70it/s]

2287it [05:48,  6.88it/s]

2288it [05:48,  6.97it/s]

2289it [05:48,  6.53it/s]

2290it [05:48,  6.27it/s]

2291it [05:48,  6.33it/s]

2292it [05:49,  6.16it/s]

2293it [05:49,  5.87it/s]

2294it [05:49,  5.98it/s]

2295it [05:49,  5.69it/s]

2296it [05:49,  5.93it/s]

2297it [05:49,  6.35it/s]

2298it [05:50,  6.23it/s]

2299it [05:50,  5.90it/s]

2300it [05:50,  5.70it/s]

2301it [05:50,  5.63it/s]

2303it [05:50,  7.78it/s]

2304it [05:51,  7.06it/s]

2305it [05:51,  6.58it/s]

2306it [05:51,  6.01it/s]

2307it [05:51,  6.01it/s]

2308it [05:51,  5.70it/s]

2310it [05:52,  6.39it/s]

2312it [05:52,  6.68it/s]

2313it [05:52,  6.21it/s]

2314it [05:52,  5.90it/s]

2315it [05:52,  5.57it/s]

2316it [05:53,  5.61it/s]

2317it [05:53,  5.66it/s]

2318it [05:53,  5.74it/s]

2319it [05:53,  6.00it/s]

2320it [05:53,  5.63it/s]

2321it [05:54,  5.29it/s]

2322it [05:54,  5.43it/s]

2323it [05:54,  5.61it/s]

2324it [05:54,  5.85it/s]

2325it [05:54,  5.90it/s]

2326it [05:54,  6.08it/s]

2327it [05:55,  5.96it/s]

2328it [05:55,  5.74it/s]

2330it [05:55,  6.54it/s]

2331it [05:55,  6.16it/s]

2332it [05:55,  5.62it/s]

2333it [05:56,  5.64it/s]

2334it [05:56,  5.70it/s]

2335it [05:56,  5.88it/s]

2336it [05:56,  5.75it/s]

2338it [05:56,  7.76it/s]

2339it [05:56,  7.10it/s]

2340it [05:57,  6.42it/s]

2341it [05:57,  5.92it/s]

2342it [05:57,  5.57it/s]

2343it [05:57,  5.17it/s]

2344it [05:57,  5.05it/s]

2345it [05:58,  4.83it/s]

2346it [05:58,  5.26it/s]

2347it [05:58,  5.09it/s]

2349it [05:58,  5.72it/s]

2350it [05:59,  5.44it/s]

2351it [05:59,  5.16it/s]

2352it [05:59,  4.80it/s]

2353it [05:59,  4.72it/s]

2354it [05:59,  4.73it/s]

2355it [06:00,  4.73it/s]

2356it [06:00,  4.82it/s]

2357it [06:00,  4.67it/s]

2358it [06:00,  4.49it/s]

2360it [06:00,  6.21it/s]

2361it [06:01,  6.13it/s]

2362it [06:01,  5.96it/s]

2363it [06:01,  5.50it/s]

2364it [06:01,  5.44it/s]

2365it [06:01,  5.23it/s]

2367it [06:02,  6.64it/s]

2368it [06:02,  6.48it/s]

2369it [06:02,  5.77it/s]

2370it [06:02,  5.65it/s]

2371it [06:02,  5.41it/s]

2372it [06:03,  5.27it/s]

2373it [06:03,  5.10it/s]

2375it [06:03,  7.10it/s]

2376it [06:03,  6.95it/s]

2377it [06:03,  6.77it/s]

2378it [06:04,  6.38it/s]

2379it [06:04,  5.74it/s]

2380it [06:04,  5.58it/s]

2381it [06:04,  5.27it/s]

2383it [06:04,  7.08it/s]

2384it [06:04,  6.82it/s]

2385it [06:05,  6.26it/s]

2386it [06:05,  5.94it/s]

2387it [06:05,  5.49it/s]

2388it [06:05,  5.32it/s]

2389it [06:05,  5.00it/s]

2391it [06:06,  6.90it/s]

2393it [06:06,  8.57it/s]

2394it [06:06,  7.35it/s]

2396it [06:06,  7.26it/s]

2398it [06:07,  7.53it/s]

2400it [06:07,  8.17it/s]

2401it [06:07,  7.92it/s]

2402it [06:07,  7.18it/s]

2403it [06:07,  6.93it/s]

2404it [06:07,  6.37it/s]

2405it [06:08,  6.44it/s]

2406it [06:08,  6.50it/s]

2407it [06:08,  6.52it/s]

2408it [06:08,  6.30it/s]

2409it [06:08,  6.45it/s]

2410it [06:08,  6.64it/s]

2411it [06:08,  6.75it/s]

2412it [06:09,  6.52it/s]

2414it [06:09,  7.60it/s]

2415it [06:09,  7.24it/s]

2416it [06:09,  7.06it/s]

2417it [06:09,  7.00it/s]

2418it [06:09,  6.84it/s]

2419it [06:10,  6.63it/s]

2420it [06:10,  6.69it/s]

2421it [06:10,  6.30it/s]

2423it [06:10,  6.93it/s]

2424it [06:10,  6.81it/s]

2425it [06:11,  6.20it/s]

2426it [06:11,  6.21it/s]

2427it [06:11,  6.09it/s]

2429it [06:11,  7.41it/s]

2430it [06:11,  7.30it/s]

2431it [06:11,  6.86it/s]

2432it [06:12,  6.59it/s]

2433it [06:12,  6.25it/s]

2434it [06:12,  5.81it/s]

2436it [06:12,  6.48it/s]

2437it [06:12,  6.22it/s]

2438it [06:13,  5.78it/s]

2439it [06:13,  5.45it/s]

2440it [06:13,  5.56it/s]

2441it [06:13,  5.54it/s]

2442it [06:13,  5.53it/s]

2443it [06:14,  5.66it/s]

2444it [06:14,  5.33it/s]

2445it [06:14,  5.05it/s]

2446it [06:14,  5.04it/s]

2448it [06:14,  5.71it/s]

2449it [06:15,  5.96it/s]

2450it [06:15,  6.16it/s]

2452it [06:15,  6.53it/s]

2454it [06:15,  7.68it/s]

2455it [06:15,  7.37it/s]

2456it [06:16,  6.84it/s]

2457it [06:16,  6.11it/s]

2458it [06:16,  5.88it/s]

2459it [06:16,  5.65it/s]

2460it [06:16,  5.34it/s]

2461it [06:17,  5.33it/s]

2462it [06:17,  5.59it/s]

2463it [06:17,  5.67it/s]

2464it [06:17,  5.93it/s]

2465it [06:17,  5.46it/s]

2466it [06:17,  5.15it/s]

2467it [06:18,  5.57it/s]

2469it [06:18,  6.30it/s]

2470it [06:18,  6.19it/s]

2471it [06:18,  5.77it/s]

2472it [06:18,  5.73it/s]

2473it [06:19,  5.52it/s]

2475it [06:19,  8.13it/s]

2476it [06:19,  7.31it/s]

2477it [06:19,  6.15it/s]

2478it [06:19,  5.33it/s]

2479it [06:20,  5.02it/s]

2480it [06:20,  4.75it/s]

2481it [06:20,  4.61it/s]

2482it [06:20,  5.04it/s]

2484it [06:20,  7.03it/s]

2486it [06:21,  7.82it/s]

2488it [06:21,  8.33it/s]

2490it [06:21,  9.37it/s]

2492it [06:21,  9.30it/s]

2494it [06:21, 10.02it/s]

2496it [06:22,  7.75it/s]

2498it [06:22,  8.92it/s]

2500it [06:22,  8.25it/s]

2501it [06:22,  7.74it/s]

2502it [06:23,  7.55it/s]

2503it [06:23,  7.44it/s]

2504it [06:23,  6.63it/s]

2505it [06:23,  6.33it/s]

2507it [06:23,  7.85it/s]

2508it [06:23,  7.26it/s]

2509it [06:24,  6.49it/s]

2510it [06:24,  6.02it/s]

2515it [06:24, 13.66it/s]

2529it [06:24,  6.58it/s]




In [None]:
def strip_strings_in_list(lst):
    return [s.strip() for s in lst]

assay_unique_factor_list_df['factors_1'] = assay_unique_factor_list_df['factors_1'].apply(strip_strings_in_list)
assay_unique_factor_list_df['factors_2'] = assay_unique_factor_list_df['factors_2'].apply(strip_strings_in_list)


In [None]:
grouped = metadata_df_without_plants_fungi_after_uberon_filter.groupby('accession')[['material type']].agg(list)

new_rows = []

for index, rows in grouped.iterrows():
    materials = rows['material type']
    if len(materials) == 1:
        new_rows.append([rows.name, materials[0], None])
    elif len(materials) == 2:
        new_rows.append([rows.name, materials[0], materials[1]])
        
    

metadata_df_with_material_type = pd.DataFrame(new_rows, columns=['accession', 'material_1', 'material_2'])

clear_output()


In [None]:
assay_node_df = pd.merge(assay_unique_factor_list_df, metadata_df_with_material_type, on='accession')

assay_node_df.loc[:,'factor_list_hash'] = assay_node_df.factor_list.apply(lambda x:hashlib.md5(x.encode()).hexdigest())

assay_node_df.loc[:,'identifier'] = assay_node_df.assay_id + '-' + assay_node_df.factor_list_hash

assay_node_df.rename(columns={'assay name':'name', 
                              'study assay technology type':'technology'
                             }, inplace=True)



In [None]:
study_df = assay_node_df[['accession']]
study_df.rename(columns={'accession':'identifier'}, inplace=True)
study_df.loc[:,'name'] = ''
study_df.loc[:,'organism'] = ''
study_df.loc[:,'taxonomy'] = ''
study_df.loc[:,'strain'] = ''
study_df.loc[:,'duration'] = ''
study_df.loc[:,'duration_unit'] = ''
study_df.drop_duplicates(subset=['identifier'], inplace=True)

SpA_df = assay_node_df[['accession', 'identifier']].drop_duplicates()
SpA_df.rename(columns={'accession':'from', 'identifier':'to'}, inplace=True)

clear_output()

In [None]:
assay_node_df.drop(['assay_id', 'factor_list_hash', 'factor_list', 'accession'], axis=1, inplace=True)
assay_node_df.loc[:,'measurement'] = ''
assay_node_df = assay_node_df[['identifier', 'name', 'technology', 'measurement', 'factors_1', 'factors_2', 'material_1', 'material_2']]

assay_node_df['material_2'].fillna(assay_node_df['material_1'], inplace=True)

clear_output()

In [None]:
%%time
assay_node_df = ontology_mapper.map_ontology(assay_node_df, "material_1", "material_id_1", "UBERON", apikey)
assay_node_df = ontology_mapper.map_ontology(assay_node_df, "material_2", "material_id_2", "UBERON", apikey)


## Creating edge files

In [None]:
AiA_df_1 = assay_node_df[['identifier', 'material_id_1']]
AiA_df_1.rename(columns={'identifier':'from', 'material_id_1':'to'}, inplace=True)
AiA_df_2 = assay_node_df[['identifier', 'material_id_2']]
AiA_df_2.rename(columns={'identifier':'from', 'material_id_2':'to'}, inplace=True)
AiA_df = pd.concat([AiA_df_1, AiA_df_2], ignore_index=True).drop_duplicates()

clear_output()


## Anatomy and Celltype nodes

In [None]:
material_df = AiA_df[['to']].drop_duplicates()
material_df.rename(columns={'to':'identifier'}, inplace=True)
anatomy_df = material_df[material_df.identifier.str.startswith('UBERON')]
celltype_df = material_df[material_df.identifier.str.startswith('CL')]

AiCT_df = AiA_df[AiA_df.to.str.startswith('CL')]
AiA_df = AiA_df[AiA_df.to.str.startswith('UBERON')]




In [None]:
assay_node_df.identifier = assay_node_df.identifier.astype('str')
anatomy_df.identifier = anatomy_df.identifier.astype('str')
celltype_df.identifier = celltype_df.identifier.astype('str')
study_df.identifier = study_df.identifier.astype('str')
AiA_df['from'] = AiA_df['from'].astype('str')
AiA_df['to'] = AiA_df['to'].astype('str')
AiCT_df['from'] = AiCT_df['from'].astype('str')
AiCT_df['to'] = AiCT_df['to'].astype('str')
SpA_df['from'] = SpA_df['from'].astype('str')
SpA_df['to'] = SpA_df['to'].astype('str')

assay_node_df.drop_duplicates(subset=['identifier'], inplace=True)
anatomy_df.drop_duplicates(subset=['identifier'], inplace=True)
celltype_df.drop_duplicates(subset=['identifier'], inplace=True)
study_df.drop_duplicates(subset=['identifier'], inplace=True)

AiA_df.drop_duplicates(inplace=True)
AiCT_df.drop_duplicates(inplace=True)
SpA_df.drop_duplicates(inplace=True)

clear_output()


## Saving node and edge files

In [None]:
today_date = datetime.today()
formatted_date = today_date.strftime("%Y-%d-%m")


assay_node_df.to_csv(os.path.join(node_dir, f'Assay_{formatted_date}.csv'), index=False)
anatomy_df.to_csv(os.path.join(node_dir, f'Anatomy_{formatted_date}.csv'), index=False)
celltype_df.to_csv(os.path.join(node_dir, f'CellType_{formatted_date}.csv'), index=False)
study_df.to_csv(os.path.join(node_dir, f'Study_{formatted_date}.csv'), index=False)

AiA_df.to_csv(os.path.join(rel_dir, f'Assay-INVESTIGATED_AiA-Anatomy_{formatted_date}.csv'), index=False)
AiCT_df.to_csv(os.path.join(rel_dir, f'Assay-INVESTIGATED_AiCT-CellType_{formatted_date}.csv'), index=False)
SpA_df.to_csv(os.path.join(rel_dir, f'Study-PERFORMED_SpA-Assay_{formatted_date}.csv'), index=False)
