In [2]:
import pandas as pd
import os
import shutil
from tqdm.notebook import tqdm

In [11]:
import ftplib
import ssl


class ImplicitFTP_TLS(ftplib.FTP_TLS):
    """FTP_TLS subclass that automatically wraps sockets in SSL to support implicit FTPS."""

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._sock = None

    @property
    def sock(self):
        """Return the socket."""
        return self._sock

    @sock.setter
    def sock(self, value):
        """When modifying the socket, ensure that it is ssl wrapped."""
        if value is not None and not isinstance(value, ssl.SSLSocket):
            value = self.context.wrap_socket(value)
        self._sock = value

In [38]:
def download_biosino(data_links_df, output_dir:str = './'):
    """
    param:data_links_df - pd.DataFrame - should be downloaded from biosino database via Export Data Links
    """
    import pandas as pd
    from tqdm.notebook import tqdm
    import os
    
    if (isinstance(data_links_df, pd.DataFrame)) & (isinstance(output_dir, str)):
    #     Connect to fms.biosino.org with FTP over TLS protocol
        try:
            ftp_client = ImplicitFTP_TLS()
            ftp_client.connect(host='fms.biosino.org', port=2122)
            ftp_client.login(user='avanesyanbogdan@gmail.com', passwd='x$3L!ma$wFfn2m')
            ftp_client.prot_p()
            print('Connected to fms.biosino.org')
        except:
            print('An error occured while connecting to fms.biosino.org')
    #     Obtain run IDs
        run_ids = data_links_df.groupby('run_id').agg({'fileName': list}).to_dict()['fileName']
        id_list = tuple(run_ids.keys())
    #     Download files in output directory
        for run_id in tqdm(id_list, desc = 'Downloading data'):
            local_dir = os.path.join(output_dir, run_id)
            if os.path.isdir(local_dir):
                pass
            else:
                os.makedirs(local_dir)
            for file in run_ids[run_id]:
                local_file = os.path.join(local_dir, file)
                if os.path.isfile(local_file):
                    continue
                else:
                    path_on_server = f'/Public/byrun/{run_id[0:5]}/{run_id[0:7]}/{run_id}/{file}'
                    with open(local_file, 'wb') as local_file:
                        try:
                            print(f'Downloading {run_id}:{file}')
                            ftp_client.retrbinary(f'RETR {path_on_server}' , local_file.write)
                            print(f'{run_id}:{file} successfully downloaded')
                        except Exception as e:
                            if e == 'KeyboardInterrupt':
                                break
                            else:
                                print(f'An error occured while downloading {path_on_server}, trying next file')
                                print(e)
                                continue
    else:
        print(f'{data_links_df} should be a pandas DataFrame')

In [21]:
run_list_selected.groupby('run_id').agg({'fileName': list}).to_dict()['fileName']

['R18050897LR01.R1.fastq.gz']

In [27]:
a = run_list_selected.groupby('sample_id').agg({'fileName': list}).to_dict()['fileName']

In [28]:
a.keys()

dict_keys(['OES047482', 'OES047487', 'OES047490', 'OES047530', 'OES047536', 'OES102275', 'OES102276', 'OES102277', 'OES102278', 'OES102279', 'OES102280', 'OES111222', 'OES111223', 'OES111224', 'OES111225', 'OES113474', 'OES113475'])

In [3]:
run_list = pd.read_table('OEP001143.txt')

In [10]:
run_list[run_list.sample_id == 'OES102279']

Unnamed: 0,project_id,experiment_id,sample_id,run_id,data_id,security,fileName,url,MD5
248,OEP001143,OEX013991,OES102279,OER210020,OED604598,Public,R18050956LR01-shenxianbin_combined_R1.fastq.gz,https://www.biosino.org/download/node/data/pub...,3cd47015b0187a54ba8b9e84abcd8d5c
249,OEP001143,OEX013991,OES102279,OER210020,OED604602,Public,R18050956LR01-shenxianbin_combined_R2.fastq.gz,https://www.biosino.org/download/node/data/pub...,ce26ee996144d8f11b1a6834ea0244dd


In [44]:
run_list['file'] = run_list['fileName'].apply(lambda x: x.split('.')[1])

In [5]:
list_of_bams = [x for x in run_list['fileName'].to_list() if 'bam' in str(x)]

In [6]:
list_of_reads = [x for x in run_list['fileName'].to_list() if 'bam' not in str(x)]

In [7]:
run_list_reads = run_list[run_list['fileName'].isin(list_of_reads)]

In [13]:
run_list_reads.experiment_id.unique()

array(['OEX010523', 'OEX010522', 'OEX013991'], dtype=object)

In [14]:
run_list_reads

Unnamed: 0,project_id,experiment_id,sample_id,run_id,data_id,security,fileName,url,MD5
13,OEP001143,OEX010523,OES047556,OER066269,OED230647,Public,FD0015_R1.fastq.gz,https://www.biosino.org/download/node/data/pub...,c0e26b33e404dfee3dd67a72cd319fc5
14,OEP001143,OEX010523,OES047556,OER066270,OED230618,Public,FD0015_R2.fastq.gz,https://www.biosino.org/download/node/data/pub...,a333fa564c577e9746fe5198577f5c32
15,OEP001143,OEX010523,OES047472,OER066275,OED230609,Public,FD0019_R1.fastq.gz,https://www.biosino.org/download/node/data/pub...,f1da4a9f5cbcd3609c4fe2b5511d4148
16,OEP001143,OEX010523,OES047472,OER066276,OED230735,Public,FD0019_R2.fastq.gz,https://www.biosino.org/download/node/data/pub...,406a2b765cf5c0ba9b05c7c88b74561e
17,OEP001143,OEX010523,OES047327,OER066305,OED230743,Public,FD0044_R1.fastq.gz,https://www.biosino.org/download/node/data/pub...,a28bca2317c64b53ce425402148e8daa
...,...,...,...,...,...,...,...,...,...
283,OEP001143,OEX010523,OES113476,OER221492,OED614676,Public,WGC061536R_combined_R2.fastq.gz,https://www.biosino.org/download/node/data/pub...,5a0f3078fef52ff4a450ef3f89b4c56f
284,OEP001143,OEX010523,OES047183,OER221493,OED614672,Public,WGC070940RB_combined_R1.fastq.gz,https://www.biosino.org/download/node/data/pub...,cc7b2d99284b57fb5fbdb2916e80ff81
285,OEP001143,OEX010523,OES047183,OER221494,OED614677,Public,WGC070940RB_combined_R2.fastq.gz,https://www.biosino.org/download/node/data/pub...,c940f60dc6d51539b347f2ac77e24730
286,OEP001143,OEX010523,OES047347,OER221495,OED614673,Public,WGC097350R_combined_R1.fastq.gz,https://www.biosino.org/download/node/data/pub...,274fd9882c8193e28882f0668feba506


In [16]:
molecular_type = {
    'OEX010522': 'WES',
    'OEX010521': 'WES',
    'OEX010523': 'RNASeq',
    'OEX013991': 'RNASeq'
}

In [17]:
tissue = {
   'OEX010522': 'normal',
   'OEX010521': 'normal',
   'OEX010523': 'normal',
   'OEX013991': 'tumor' 
}

In [23]:
run_list_reads

Unnamed: 0,project_id,experiment_id,sample_id,run_id,data_id,security,fileName,url,MD5
13,OEP001143,OEX010523,OES047556,OER066269,OED230647,Public,FD0015_R1.fastq.gz,https://www.biosino.org/download/node/data/pub...,c0e26b33e404dfee3dd67a72cd319fc5
14,OEP001143,OEX010523,OES047556,OER066270,OED230618,Public,FD0015_R2.fastq.gz,https://www.biosino.org/download/node/data/pub...,a333fa564c577e9746fe5198577f5c32
15,OEP001143,OEX010523,OES047472,OER066275,OED230609,Public,FD0019_R1.fastq.gz,https://www.biosino.org/download/node/data/pub...,f1da4a9f5cbcd3609c4fe2b5511d4148
16,OEP001143,OEX010523,OES047472,OER066276,OED230735,Public,FD0019_R2.fastq.gz,https://www.biosino.org/download/node/data/pub...,406a2b765cf5c0ba9b05c7c88b74561e
17,OEP001143,OEX010523,OES047327,OER066305,OED230743,Public,FD0044_R1.fastq.gz,https://www.biosino.org/download/node/data/pub...,a28bca2317c64b53ce425402148e8daa
...,...,...,...,...,...,...,...,...,...
283,OEP001143,OEX010523,OES113476,OER221492,OED614676,Public,WGC061536R_combined_R2.fastq.gz,https://www.biosino.org/download/node/data/pub...,5a0f3078fef52ff4a450ef3f89b4c56f
284,OEP001143,OEX010523,OES047183,OER221493,OED614672,Public,WGC070940RB_combined_R1.fastq.gz,https://www.biosino.org/download/node/data/pub...,cc7b2d99284b57fb5fbdb2916e80ff81
285,OEP001143,OEX010523,OES047183,OER221494,OED614677,Public,WGC070940RB_combined_R2.fastq.gz,https://www.biosino.org/download/node/data/pub...,c940f60dc6d51539b347f2ac77e24730
286,OEP001143,OEX010523,OES047347,OER221495,OED614673,Public,WGC097350R_combined_R1.fastq.gz,https://www.biosino.org/download/node/data/pub...,274fd9882c8193e28882f0668feba506


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  run_list_reads['runiverse_name'] = 0


In [30]:
run_list_reads['runiverse_name'] = 0
for exp_id in run_list_reads.experiment_id.unique():
    run_list_reads.loc[run_list_reads['experiment_id'] == exp_id,'runiverse_name'] = run_list_reads.loc[run_list_reads['experiment_id'] == exp_id,'fileName'].apply(lambda x: molecular_type[exp_id] + '-' + tissue[exp_id] + '-' + str(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  run_list_reads['runiverse_name'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(ilocs[0], value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_gu

In [76]:
samps_to_files = run_list_reads.groupby('sample_id').agg({'fileName': tuple, 'runiverse_name': tuple})

In [49]:
runs_to_samps = run_list_reads.groupby('sample_id').agg({'run_id': tuple})

In [50]:
runs_to_samps

Unnamed: 0_level_0,run_id
sample_id,Unnamed: 1_level_1
OES047143,"(OER067045, OER067046)"
OES047145,"(OER067073, OER067074)"
OES047150,"(OER067095, OER067096)"
OES047154,"(OER067113, OER067114)"
OES047169,"(OER067093, OER067094)"
...,...
OES111225,"(OER220059, OER220059)"
OES113473,"(OER221489, OER221490)"
OES113474,"(OER221485, OER221486)"
OES113475,"(OER221487, OER221488)"


In [70]:
for i in runs_to_samps.index:
    os.makedirs(f'/uftp/projects/biosino/OEP001143/{i}')

In [71]:
for sample in tqdm(runs_to_samps.index):
    for run in runs_to_samps.loc[sample].values[0]:
        file = os.listdir(f'/uftp/projects/biosino/OEP001143_runs/{run}/')[0]
        print(os.path.basename(f'/uftp/projects/biosino/OEP001143_runs/{run}/{file}'))
        shutil.move(src=f'/uftp/projects/biosino/OEP001143_runs/{run}/{file}',
                   dst=f'/uftp/projects/biosino/OEP001143/{sample}/{file}')

  0%|          | 0/105 [00:00<?, ?it/s]

RNWES025.R1.fastq.gz
RNWES025.R2.fastq.gz
RNWES039.R1.fastq.gz
RNWES039.R2.fastq.gz
RNWES050.R1.fastq.gz
RNWES050.R2.fastq.gz
RNWES059.R1.fastq.gz
RNWES059.R2.fastq.gz
RNWES049.R1.fastq.gz
RNWES049.R2.fastq.gz
WGC070940RB_combined_R1.fastq.gz
WGC070940RB_combined_R2.fastq.gz
RNWES060.R1.fastq.gz
RNWES060.R2.fastq.gz
RNWES031.R1.fastq.gz
RNWES031.R2.fastq.gz
RNWES093.R1.fastq.gz
RNWES093.R2.fastq.gz
HFWES180918A2401-58_S118_L004_R1_001.fastq.gz
HFWES180918A2401-58_S118_L004_R2_001.fastq.gz
RNWES003.R1.fastq.gz
RNWES003.R2.fastq.gz
R18051004LR01.R1.fastq.gz
R18051004LR01.R2.fastq.gz
RNWES111.R1.fastq.gz
RNWES111.R2.fastq.gz
RNWES117.R1.fastq.gz
RNWES117.R2.fastq.gz
RNWES074.R1.fastq.gz
RNWES074.R2.fastq.gz
RNWES020.R1.fastq.gz
RNWES020.R2.fastq.gz
RNWES072.R1.fastq.gz
RNWES072.R2.fastq.gz
RNWES040.R1.fastq.gz
RNWES040.R2.fastq.gz
RNWES017.R1.fastq.gz
RNWES017.R2.fastq.gz
R18050879LR01.R1.fastq.gz
R18050879LR01.R2.fastq.gz
RNWES013.R1.fastq.gz
RNWES013.R2.fastq.gz
RNWES016.R1.fastq.gz
RNW

In [88]:
for directory in os.listdir('/uftp/projects/biosino/OEP001143/'):
    os.makedirs(f'/uftp/projects/biosino/OEP001143/{directory}/downloaded')

In [89]:
for sample in tqdm(samps_to_files.index):
    for i in range(len(samps_to_files.loc[sample, 'fileName'])):
        file = samps_to_files.loc[sample, 'fileName'][i]
        runiverse_name = samps_to_files.loc[sample, 'runiverse_name'][i]
        shutil.move(src=f'/uftp/projects/biosino/OEP001143/{sample}/{file}',
                   dst=f'/uftp/projects/biosino/OEP001143/{sample}/downloaded/{runiverse_name}')

  0%|          | 0/105 [00:00<?, ?it/s]

In [91]:
samps_to_files['correct_name'] = samps_to_files['runiverse_name']

In [96]:
samps_to_files

Unnamed: 0_level_0,fileName,runiverse_name,correct_name
sample_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
OES047143,"(RNWES025.R1.fastq.gz, RNWES025.R2.fastq.gz)","(WES-normal-RNWES025.R1.fastq.gz, WES-normal-R...","(WES-normal-RNWES025.R1.fastq.gz, WES-normal-R..."
OES047145,"(RNWES039.R1.fastq.gz, RNWES039.R2.fastq.gz)","(WES-normal-RNWES039.R1.fastq.gz, WES-normal-R...","(WES-normal-RNWES039.R1.fastq.gz, WES-normal-R..."
OES047150,"(RNWES050.R1.fastq.gz, RNWES050.R2.fastq.gz)","(WES-normal-RNWES050.R1.fastq.gz, WES-normal-R...","(WES-normal-RNWES050.R1.fastq.gz, WES-normal-R..."
OES047154,"(RNWES059.R1.fastq.gz, RNWES059.R2.fastq.gz)","(WES-normal-RNWES059.R1.fastq.gz, WES-normal-R...","(WES-normal-RNWES059.R1.fastq.gz, WES-normal-R..."
OES047169,"(RNWES049.R1.fastq.gz, RNWES049.R2.fastq.gz)","(WES-normal-RNWES049.R1.fastq.gz, WES-normal-R...","(WES-normal-RNWES049.R1.fastq.gz, WES-normal-R..."
...,...,...,...
OES111225,"(R18051006LR01_combined_R1.fastq.gz, R18051006...",(RNASeq-tumor-R18051006LR01_combined_R1.fastq....,(RNASeq-tumor-R18051006LR01_combined_R1.fastq....
OES113473,"(RNWES127.R1.fastq.gz, RNWES127.R2.fastq.gz)","(WES-normal-RNWES127.R1.fastq.gz, WES-normal-R...","(WES-normal-RNWES127.R1.fastq.gz, WES-normal-R..."
OES113474,"(R18050925LR01_R1.fastq.gz, R18050925LR01_R2.f...","(RNASeq-normal-R18050925LR01_R1.fastq.gz, RNAS...","(RNASeq-normal-R18050925LR01_R1.fastq.gz, RNAS..."
OES113475,"(R18050969LR01_R1.fastq.gz, R18050969LR01_R2.f...","(RNASeq-normal-R18050969LR01_R1.fastq.gz, RNAS...","(RNASeq-normal-R18050969LR01_R1.fastq.gz, RNAS..."


In [97]:
for sample in samps_to_files.index:
    samps_to_files.loc[sample, 'correct_name'] = tuple(map(lambda x: x.replace('normal', 'tumor'),samps_to_files.loc[sample, 'correct_name']))

In [101]:
for sample in samps_to_files.index:
    for file in os.listdir(f'/uftp/projects/biosino/OEP001143/{sample}/downloaded/'):
        file_correct = file.replace('normal', 'tumor')
        shutil.move(src=f'/uftp/projects/biosino/OEP001143/{sample}/downloaded/{file}',
                   dst=f'/uftp/projects/biosino/OEP001143/{sample}/downloaded/{file_correct}')

In [13]:
names = []

with open('1_read.txt', 'r') as f:
    lines = f.readlines()
    for line in lines:
        names.append(line.strip())

In [15]:
run_list_selected = run_list[run_list.sample_id.isin(names)]

In [16]:
run_list_selected

Unnamed: 0,project_id,experiment_id,sample_id,run_id,data_id,security,fileName,url,MD5
99,OEP001143,OEX010523,OES047482,OER066811,OED231851,Public,R18050897LR01.R1.fastq.gz,https://www.biosino.org/download/node/data/pub...,1dae0a1da4c740c945dd5bf49755d473
100,OEP001143,OEX010523,OES047482,OER066812,OED231831,Public,R18050897LR01.R2.fastq.gz,https://www.biosino.org/download/node/data/pub...,199ebcc46cd31b8bff09bf17bb55913d
101,OEP001143,OEX010523,OES047487,OER066817,OED231255,Public,R18050900LR01.R1.fastq.gz,https://www.biosino.org/download/node/data/pub...,83703290c6ffcdcc6f1b01e5eeea6a49
102,OEP001143,OEX010523,OES047487,OER066818,OED231824,Public,R18050900LR01.R2.fastq.gz,https://www.biosino.org/download/node/data/pub...,8f62ac8171fcda1e44c106925d09bfdf
103,OEP001143,OEX010523,OES047490,OER066821,OED231830,Public,R18050902LR01.R1.fastq.gz,https://www.biosino.org/download/node/data/pub...,2c2e3341be0c9105976d52fe25b11d21
104,OEP001143,OEX010523,OES047490,OER066822,OED231826,Public,R18050902LR01.R2.fastq.gz,https://www.biosino.org/download/node/data/pub...,8d1d890de3717c18a936e644f19fa5b9
113,OEP001143,OEX010523,OES047530,OER066863,OED231791,Public,R18050945LR01.R1.fastq.gz,https://www.biosino.org/download/node/data/pub...,0f61248785f24cb06237a4b7f243d65b
114,OEP001143,OEX010523,OES047530,OER066864,OED231787,Public,R18050945LR01.R2.fastq.gz,https://www.biosino.org/download/node/data/pub...,82efaec95a268bced2ebb17a3ff527fb
115,OEP001143,OEX010523,OES047536,OER066867,OED231790,Public,R18050949LR01.R1.fastq.gz,https://www.biosino.org/download/node/data/pub...,ff5b9f86f4774ba75fbc6fa92fb62bcf
116,OEP001143,OEX010523,OES047536,OER066868,OED231795,Public,R18050949LR01.R2.fastq.gz,https://www.biosino.org/download/node/data/pub...,14c2e865cffb304949d4fcd8eca8aa04


In [40]:
download_biosino(data_links_df=run_list_selected, output_dir='/uftp/projects/biosino/selected/')

Connected to fms.biosino.org


Downloading data:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading OER210017:R18050872LR01-zhangyuqing_combined_R1.fastq.gz
An error occured while downloading /Public/byrun/OER21/OER2100/OER210017/R18050872LR01-zhangyuqing_combined_R1.fastq.gz, trying next file
550 /Public/byrun/OER21/OER2100/OER210017/R18050872LR01-zhangyuqing_combined_R1.fastq.gz: No such file or directory.


In [41]:
runs_to_samps_selected = run_list_selected.groupby('sample_id').agg({'run_id': tuple})

In [42]:
runs_to_samps_selected

Unnamed: 0_level_0,run_id
sample_id,Unnamed: 1_level_1
OES047482,"(OER066811, OER066812)"
OES047487,"(OER066817, OER066818)"
OES047490,"(OER066821, OER066822)"
OES047530,"(OER066863, OER066864)"
OES047536,"(OER066867, OER066868)"
OES102275,"(OER210016, OER210016)"
OES102276,"(OER210017, OER210017)"
OES102277,"(OER210018, OER210018)"
OES102278,"(OER210019, OER210019)"
OES102279,"(OER210020, OER210020)"


In [None]:
for i in runs_to_samps_selected.index:
    os.makedirs(f'/uftp/projects/biosino/selected/{i}/')

In [61]:
for i in runs_to_samps_selected.index:
    os.makedirs(f'/uftp/projects/biosino/selected/{i}/downloaded')
    os.makedirs(f'/uftp/projects/biosino/selected/{i}/input-raw')

In [63]:
for sample in tqdm(runs_to_samps_selected.index):
    for run in runs_to_samps_selected.loc[sample].values[0]:
        if os.path.isdir(f'/uftp/projects/biosino/selected/{run}/'):
            file = os.listdir(f'/uftp/projects/biosino/selected/{run}/')[0]
            if '.R1.' in file:
                print(f'{file} copied')
                shutil.copyfile(src=f'/uftp/projects/biosino/selected/{run}/{file}',
                                dst=f'/uftp/projects/biosino/selected/{sample}/downloaded/RNASeq-tumor_1.fastq.gz')
                shutil.copyfile(src=f'/uftp/projects/biosino/selected/{run}/{file}',
                                dst=f'/uftp/projects/biosino/selected/{sample}/input-raw/RNASeq-tumor_1.fastq.gz')
            elif '.R2.' in file:
                print(f'{file} copied')
                shutil.copyfile(src=f'/uftp/projects/biosino/selected/{run}/{file}',
                                dst=f'/uftp/projects/biosino/selected/{sample}/downloaded/RNASeq-tumor_2.fastq.gz')
                shutil.copyfile(src=f'/uftp/projects/biosino/selected/{run}/{file}',
                                dst=f'/uftp/projects/biosino/selected/{sample}/input-raw/RNASeq-tumor_2.fastq.gz')

  0%|          | 0/17 [00:00<?, ?it/s]

R18050897LR01.R1.fastq.gz copied
R18050897LR01.R2.fastq.gz copied
R18050900LR01.R1.fastq.gz copied
R18050900LR01.R2.fastq.gz copied
R18050902LR01.R1.fastq.gz copied
R18050902LR01.R2.fastq.gz copied
R18050945LR01.R1.fastq.gz copied
R18050945LR01.R2.fastq.gz copied
R18050949LR01.R1.fastq.gz copied
R18050949LR01.R2.fastq.gz copied


In [None]:
for sample in tqdm(runs_to_samps_selected.index):
    for run in runs_to_samps_selected.loc[sample].values[0]:
        file = os.listdir(f'/uftp/projects/biosino/selected/{run}/')[0]
        print(os.path.basename(f'/uftp/projects/biosino/OEP001143_runs/{run}/{file}'))
        shutil.move(src=f'/uftp/projects/biosino/OEP001143_runs/{run}/{file}',
                   dst=f'/uftp/projects/biosino/OEP001143/{sample}/{file}')