In [7]:
import polars as pl
import parse
from collections import Counter

from urllib.parse import urlparse
import os
from pathlib import Path

def get_filename_from_url(url):
    parsed_url = urlparse(url)
    return os.path.basename(parsed_url.path)

def parse_4dn(tsv_4dn):
    return pl.read_csv(tsv_4dn, separator = "\t", comment_prefix = "#")

def download_command_4dn(tsv_4dn, access_key_id = "<access_key_id>", access_key_secret = "<access_key_secret>"):
    for line in open(tsv_4dn):
        command_key = "Suggested command to download:"
        if command_key in line:
            pattern = "{}" + command_key + "{command}"
            raw_command = parse.parse(pattern, line)["command"].strip()
            formatted = raw_command.replace("<access_key_id>", access_key_id).replace("<access_key_secret>", access_key_secret)
            return formatted

def rename(tsv_4dn, label_read = lambda read: f"R{read}", update_with = {}):
    for row in parse_4dn("4dn.tsv").iter_rows(named = True):
        original = get_filename_from_url(row["File Download URL"])
        suffix = "".join(Path(original).suffixes)
        prefix = "_".join([
            row["Condition"],
            str(row["Bio Rep No"]),
            str(row["Tech Rep No"]),
            label_read(row["Paired End"])
        ]).replace(" ", "")
        rename = prefix + suffix
        yield (Path(original), Path(rename))

for original, rename in rename("4dn.tsv"):
    if original.exists():
        original.rename(rename)
        print(f"Renamed {original} --> {rename}")
    elif rename.exists():
        print(f"{original} already renamed --> {rename}")
    else:
        print(f"Neither {original} nor {rename} found, download command may not have been run yet.")
    
parse_4dn("4dn.tsv")

Neither 4DNFI7G518XA.fastq.gz nor Formaldehyde+DSG,DdeIandDpnII,HFFc6(Tier1)_1_1_R1.fastq.gz found, download command may not have been run yet.
4DNFI9H22RJ3.fastq.gz already renamed --> Formaldehyde+DSG,DdeIandDpnII,HFFc6(Tier1)_1_1_R2.fastq.gz


File Download URL,Experiment Set Accession,Experiment Accession,File Accession,Size (MB),md5sum,File Type,File Format,Bio Rep No,Tech Rep No,Biosource Type,Organism,Related File Relationship,Related File,Paired End,Set Status,File Status,Publication,Experiment Type,Replicate Info,Assay Details,Biosource,Dataset,Condition,In Experiment As,Project,Generating Lab,Experimental Lab,Contributing Lab,Notes,Open Data URL
str,str,str,str,f64,str,str,str,i64,i64,str,str,str,str,i64,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str,str
"""https://data.4dnucleome.org/fi…","""4DNESN49VY8X""","""4DNEXDL7KBH2""","""4DNFI7G518XA""",10851.94,"""c603ea160a9fe8a5198f2156cddaf1…","""reads""","""fastq""",1,1,"""immortalized cell line""","""human""","""paired with""","""4DNFI9H22RJ3""",1,"""released""","""released""","""Akgol Oksuz B et al. (2021)""","""in situ Hi-C""","""Biorep 1, Techrep 1""","""DdeI and DpnII""","""HFFc6 (Tier 1)""","""Hi-C on HFF cells - protocol v…","""Formaldehyde+DSG, DdeI and Dpn…","""raw file""","""4DN""","""Job Dekker, UMMS""","""Job Dekker, UMMS""","""N/A""","""N/A""","""https://4dn-open-data-public.s…"
"""https://data.4dnucleome.org/fi…","""4DNESN49VY8X""","""4DNEXDL7KBH2""","""4DNFI9H22RJ3""",11122.08,"""ddd6fae650a825b05cd47d318333c3…","""reads""","""fastq""",1,1,"""immortalized cell line""","""human""","""paired with""","""4DNFI7G518XA""",2,"""released""","""released""","""Akgol Oksuz B et al. (2021)""","""in situ Hi-C""","""Biorep 1, Techrep 1""","""DdeI and DpnII""","""HFFc6 (Tier 1)""","""Hi-C on HFF cells - protocol v…","""Formaldehyde+DSG, DdeI and Dpn…","""raw file""","""4DN""","""Job Dekker, UMMS""","""Job Dekker, UMMS""","""N/A""","""N/A""","""https://4dn-open-data-public.s…"


In [64]:
!touch 4DNFI7G518XA.fastq.gz 4DNFI9H22RJ3.fastq.gz