In [None]:
import numpy as np
import pandas as pd
import os
import csv
from pathlib import Path
from pprint import pprint
from collections import defaultdict
from datetime import datetime
from urllib.parse import urlparse, parse_qs
from io import StringIO

### This block contains a few utility functions ###

Tool to find a uuid in a directory path

In [None]:
def is_uuid(s):
    return s and len(s) == 32 and all([c in '0123456789abcdef' for c in list(s)])

def get_uuid(s):
    if s.startswith(('http:', 'https:')):
        parsed = urlparse(s)
        if parsed.netloc == 'app.globus.org':
            origin_path = parse_qs(urlparse(s).query)['origin_path'][0]
            return get_uuid(origin_path)
        else:
            raise RuntimeError(f'Unrecognized URL {s}')
    else:
        words = s.split('/')
        while words:
            if is_uuid(words[0]):
                return words[0]
            else:
                words = words[1:]

#for idx, row in metadata_df.iterrows():
#    print(get_uuid(row['data_path']))

In [None]:
def get_true_stem(some_path):
    true_stem = Path(some_path.stem)
    while true_stem != Path(true_stem.stem):
        true_stem = Path(true_stem.stem)
    return true_stem

In [None]:
def reformat_datetime(dt_str):
    return datetime.fromisoformat(dt_str).strftime("%Y-%m-%d %H:%M")

### End utility functions ###

### Begin block of mapping data used to associate UCSD SNARE uuids with contributors files ###

This is based on a text table supplied by Chris on 4/9/2021

In [None]:
# See below for frozen version of the results of this block
tuples = []
for line in open('/tmp/UCSD_SNARE_Contributor_tsvs_replacement_key.txt'):
    words = line.split()
    uuid, target_fname = words[1].split('/')
    src_fname = words[2] + '.tsv'
    tuples.append((uuid, target_fname, src_fname))
pprint(tuples)

In [None]:
UCSD_SNARE_tuples = [('1b1b6bb2e8d1c91fe7ab411348304822',
  'lapmap_20200317_contributors.tsv',
  'lapmap_20200317_fixed_contributors.tsv'),
 ('1d9ec91d0e3b5236fac5ea40d92ae1b8',
  'lapmap_20200317_contributors.tsv',
  'lapmap_20200317_fixed_contributors.tsv'),
 ('3e20c22cdeeeb0a1ba369f6160d9a27a',
  'lapmap_20200317_contributors.tsv',
  'lapmap_20200317_fixed_contributors.tsv'),
 ('433edf96a3d6dfa3d27a7a2dba4311e4',
  'lapmap_20200317_contributors.tsv',
  'lapmap_20200317_fixed_contributors.tsv'),
 ('5ecf18db4c89ce07ae9839d682f7c3d7',
  'lapmap_20200317_contributors.tsv',
  'lapmap_20200317_fixed_contributors.tsv'),
 ('6e9e4da81edfe046b3acf3b49e7593c0',
  'lapmap_20200317_contributors.tsv',
  'lapmap_20200317_fixed_contributors.tsv'),
 ('7c0779e09e3567dc488bfe475a991c8c',
  'lapmap_20200317_contributors.tsv',
  'lapmap_20200317_fixed_contributors.tsv'),
 ('8ac048296fb13f444aa3b9e7ce75ae68',
  'lapmap_20200317_contributors.tsv',
  'lapmap_20200317_fixed_contributors.tsv'),
 ('9d571c28084dd24814785965c0a6918b',
  'lapmap_20200317_contributors.tsv',
  'lapmap_20200317_fixed_contributors.tsv'),
 ('a25253e7f83d46abc0429850a1ecbc9a',
  'lapmap_20200317_contributors.tsv',
  'lapmap_20200317_fixed_contributors.tsv'),
 ('bbdda69f7d867e20486f66ac2ebbd5b4',
  'lapmap_20200317_contributors.tsv',
  'lapmap_20200317_fixed_contributors.tsv'),
 ('cfef0d2cc877edd714721cdf0b47229d',
  'lapmap_20200317_contributors.tsv',
  'lapmap_20200317_fixed_contributors.tsv'),
 ('e8a24492e83dfe73596b36c6bdca6807',
  'lapmap_20200317_contributors.tsv',
  'lapmap_20200317_fixed_contributors.tsv'),
 ('eb6515292e1a97cd6837d46958171e3f',
  'lapmap_20200317_contributors.tsv',
  'lapmap_20200317_fixed_contributors.tsv'),
 ('311467b33584cbf449a2e6926503154e',
  'lapmap_20191217_contributors.tsv',
  'lapmap_20191217_fixed_contributors.tsv'),
 ('33655ae2e5076c0c67d64a79d4102027',
  'lapmap_20191217_contributors.tsv',
  'lapmap_20191217_fixed_contributors.tsv'),
 ('44a7b949c2fc2fc88f23132b187aea98',
  'lapmap_20191217_contributors.tsv',
  'lapmap_20191217_fixed_contributors.tsv'),
 ('69fc6f9949c81de1aa323faa7c978fd8',
  'lapmap_20191217_contributors.tsv',
  'lapmap_20191217_fixed_contributors.tsv'),
 ('82664d972ad6b39d2030b4c28d2d04e4',
  'lapmap_20191217_contributors.tsv',
  'lapmap_20191217_fixed_contributors.tsv'),
 ('84a0f4f3c0ef0310a2246999419216c7',
  'lapmap_20191217_contributors.tsv',
  'lapmap_20191217_fixed_contributors.tsv'),
 ('a16519c83eb0de1d63965a7911b227ec',
  'lapmap_20191217_contributors.tsv',
  'lapmap_20191217_fixed_contributors.tsv'),
 ('e651ff64216e1a8f22c9f3585a4c544b',
  'lapmap_20191217_contributors.tsv',
  'lapmap_20191217_fixed_contributors.tsv'),
 ('0811138fb7801d5b95ca499cce6948ad',
  'bukmap_20200211_contributors.tsv',
  'bukmap_20200211_fixed_contributors.tsv'),
 ('1c9f7cf927e7dd9f4a8286aaedd86c1f',
  'bukmap_20200211_contributors.tsv',
  'bukmap_20200211_fixed_contributors.tsv'),
 ('24b35dc16ef7c1b50b8467ed4000db89',
  'bukmap_20200211_contributors.tsv',
  'bukmap_20200211_fixed_contributors.tsv'),
 ('363fba574f66ec21060939b3b9333ffc',
  'bukmap_20200211_contributors.tsv',
  'bukmap_20200211_fixed_contributors.tsv'),
 ('4dd21b1f34d608f4d3174ea562745bf1',
  'bukmap_20200211_contributors.tsv',
  'bukmap_20200211_fixed_contributors.tsv'),
 ('4fbe3d35e7bdfcda46fbce3f281e0612',
  'bukmap_20200211_contributors.tsv',
  'bukmap_20200211_fixed_contributors.tsv'),
 ('65edb64fde0094fc4143e8146b4aab07',
  'bukmap_20200211_contributors.tsv',
  'bukmap_20200211_fixed_contributors.tsv'),
 ('6d42c11aedb24a81a10e002508ee45bf',
  'bukmap_20200211_contributors.tsv',
  'bukmap_20200211_fixed_contributors.tsv'),
 ('8b6b962a12a1f3fdc9025c86364960b5',
  'bukmap_20200211_contributors.tsv',
  'bukmap_20200211_fixed_contributors.tsv'),
 ('8c73ca17d4bdd209029ae296ef369d79',
  'bukmap_20200211_contributors.tsv',
  'bukmap_20200211_fixed_contributors.tsv'),
 ('9000e0af00e1975653ed26d11557c5a5',
  'bukmap_20200211_contributors.tsv',
  'bukmap_20200211_fixed_contributors.tsv'),
 ('90c9327e4da2c5dbc6321e2dde9f8397',
  'bukmap_20200211_contributors.tsv',
  'bukmap_20200211_fixed_contributors.tsv'),
 ('a1fc283a437e8c50319f633e7b0faf42',
  'bukmap_20200211_contributors.tsv',
  'bukmap_20200211_fixed_contributors.tsv'),
 ('b8b1aef1ef402d09f16013824b5c7b4d',
  'bukmap_20200211_contributors.tsv',
  'bukmap_20200211_fixed_contributors.tsv'),
 ('ba36fb1ce457a447307acfa1f5417efc',
  'bukmap_20200211_contributors.tsv',
  'bukmap_20200211_fixed_contributors.tsv'),
 ('ce8c786f386cd6c6085b1a25add6c1ca',
  'bukmap_20200211_contributors.tsv',
  'bukmap_20200211_fixed_contributors.tsv'),
 ('331289a39c7117c3c13e92f83dd4460f',
  'bukmap_20190822_contributors.tsv',
  'bukmap_20190822_fixed_contributors.tsv'),
 ('49756755a4ccb12481d4668833cec16c',
  'bukmap_20190822_contributors.tsv',
  'bukmap_20190822_fixed_contributors.tsv'),
 ('81ab1f8ff2cb528f249b7fa6d81c58a2',
  'bukmap_20190822_contributors.tsv',
  'bukmap_20190822_fixed_contributors.tsv'),
 ('8dd7afd6b033e14890a5e475290198b3',
  'bukmap_20190822_contributors.tsv',
  'bukmap_20190822_fixed_contributors.tsv'),
 ('99ee34b1899f4d5ea0e5145d6e00c52b',
  'bukmap_20190822_contributors.tsv',
  'bukmap_20190822_fixed_contributors.tsv'),
 ('b108b7b962d9033b6d669c7cd171300c',
  'bukmap_20190822_contributors.tsv',
  'bukmap_20190822_fixed_contributors.tsv'),
 ('ca9707a6bfa016a3c0414b1a296c4eac',
  'bukmap_20190822_contributors.tsv',
  'bukmap_20190822_fixed_contributors.tsv'),
 ('dd6580b59213a0fd80554731d2c181c6',
  'bukmap_20190822_contributors.tsv',
  'bukmap_20190822_fixed_contributors.tsv'),
 ('e9b56e258a20a3136150ff3916b2e3ef',
  'bukmap_20190822_contributors.tsv',
  'bukmap_20190822_fixed_contributors.tsv'),
 ('fdcb4022b4094d1016e48c6d4e3ca783',
  'bukmap_20190822_contributors.tsv',
  'bukmap_20190822_fixed_contributors.tsv')]


Now create a fake metadata.tsv file describing this set of files.  Hopefully the file created here
already exists in the input directory for this traunche. 

In [None]:
rec_l = []
for uuid, raw_name, fixed_name in UCSD_SNARE_tuples:
    rec = {'data_path': uuid, 'contributors_path': raw_name, 'assay_type': 'SNAREseq', 'do_not_save_this_uuid': True}
    rec_l.append(rec)
tmp_df = pd.DataFrame(rec_l)
#display(tmp_df)
tmp_df.to_csv(Path(input_dir) / 'UCSD_R2_SNARE' / 'tsv' / 'fake-metadata.tsv',
              header=True, sep='\t', index=False)

### End block intended to reconstruct inputs for UCSD_R2_SNARE traunche ###

This just provides the path and filename for the multi-line metadata.tsv file to be split.  You'll have to update it for your environment.

In [None]:
# Where the constructed tree of metadata files will go
build_tree_root = Path("/home/welling/git/hubmap/ingest-pipeline/src/ingest-pipeline/misc/tools/build_tree_root")

In [None]:
input_dir = "/home/welling/git/hubmap/ingest-pipeline/src/ingest-pipeline/md/all_md"
#base_path = Path(input_dir) / 'UCSD_R2_snRNAseq'
#base_path = Path(input_dir) / 'UFLA_R2_10X'
#base_path = Path(input_dir) / 'UFLA_R2_CODEX'
#base_path = Path(input_dir) / 'STAN_R2_snRNA'
#base_path = Path(input_dir) / 'CALT_R2_sciATAC'
base_path = Path(input_dir) / 'UCSD_R2_SNARE'
#in_fname = os.path.join(input_dir, "UFLA_CODEX_LY_Metadata_110920.tsv")
#for path in base_path.glob('**/*.xlsx'):
#    print(path.stem)

In [None]:
df_d = {}
metadata_df = None
for path in base_path.glob('**/*.xlsx'):
    print(path)
    df = pd.read_excel(path)
    true_stem = get_true_stem(path)
    df_d[true_stem] = df
    if 'assay_type' in df.columns:
        if metadata_df is None:
            metadata_df = df.copy()
        else:
            metadata_df = metadata_df.append(df)
    print(f'{true_stem} -> {df_d[true_stem].columns}')

for path in base_path.glob('**/*.tsv'):
    print(path)
    df = pd.read_csv(path, sep='\t')
    true_stem = get_true_stem(path)
    df_d[true_stem] = df
    if 'assay_type' in df.columns:
        if metadata_df is None:
            metadata_df = df.copy()
        else:
            metadata_df = metadata_df.append(df)
    print(f'{true_stem} -> {df_d[true_stem].columns}')

# special logic needed to straighten out CALT sciATAC
for key in df_d:
    if 'contributors' in str(key):
        df_d[Path('contributors')] = df_d[key]
        break

In [None]:
for key in df_d:
    print(f'#### {key} ####')
    display(df_d[key].head())

In [None]:
assert metadata_df is not None, "metadata file not found"

In [None]:
metadata_df.columns

In [None]:
metadata_df

### Begin block of mapping data used to guess uuids from tissue display ids, used to reconstruct Stanford snRNAseq ###

In [None]:
samp_to_uuid_map = {}
stan_snrna_uuid_map = {  # maps sample TMC ID to uuid
    "STAN0008-LI-1-1":"a04d0138ed6b28810c5afa01d392bbd5",  # misplaced
    "STAN0008-LI-2-1":"a078805198f9f7f022b83de898a608a9",  # misplaced
    "STAN0008-LI-3-1":"c3d36358b184be55ac977abea5755447",  # misplaced
    "STAN0008-LI-4-1":"57288d8a0a9374ea83f90582df8eafa2",  # misplaced
    "STAN0007-LI-1-1":"f1937797246fa4592bff6166d7666de5",  # misplaced
    "STAN0007-LI-3-1":"e18254c67c8a0bb625f3748a2501a0bb",  # misplaced
    "STAN0007-LI-4-1":"492574f47224661fe8674f60373e44f4",  # misplaced
    "STAN0007-LI-2-1":"da93581ef554e25ec1c7a12500a56b74",  # misplaced
}
samp_to_uuid_map.update(stan_snrna_uuid_map)
calt_sciatacseq_txt = """
0bf827ea01b64963d39a10cac69bc386,CALT0006-HT-2
2d4d2f368c6f74cc3aa17177924003b8,CALT0012-HT-1
48da185436f006156d7e5c1941bfb147,CALT0005-SP-1
58ebb89caf1512e9452d1f9e0e1efa8e,CALT0003-HT
616a1aa904dfb1299f86910db2a20fbe,CALT0011-LV-1
76bfd5a517c681e5f672fecff2057111,CALT0012-HT-2
8ea82dc9f26bb2c01f19ddd19b3812b6,CALT0004-PA-2
8f6b8e19c21a664d67a467c3a08b5630,CALT0003-HT-2
93cc8c450db50a224dce243a43131d3c,CALT0010-HT-1
a0df02bda8befa491f86b0d41f2810ed,CALT0005-RL-1
a6a7f2b0b419aefb6f8ffb9bfa9ce7d5,CALT0004-RL-1
acaf75b8292db4a79dc14e3021742217,CALT0005-HT-1
ad26d1046084c5640f911a84e5cd0cee,CALT0003-HT-5
b2db3414cedf8805d20df3cf753842ca,CALT0011-HT-1
bc19b2d489ddef9e135a67bcc9746695,CALT0006-PA-1
bd435ed6aa55e9f57d783ce630d746bf,CALT0003-HT-3
bf88e07da70ee088e31c7f568e41b196,CALT0011-HT-2
d4fc9da8a21cbb323d55f38983fb3dbb,CALT0006-HT-1
dd39ed081ffc887d85fc8225c71b37dc,CALT0009-HT-1
e4b371ea3ed4c3ca77791b34b829803f,CALT0004-HT-1
ead5cc01250b4f9ea73dd91503c313a5,CALT0007-HT-1
eb4958e8b5dd073e8a4a80bd613b2d64,CALT0009-LV-1
f1b9f55b12e16d1e11a5ebbd863b5787,CALT0005-PA-1
"""

calt_sciatacseq_map = {}
for line in StringIO(calt_sciatacseq_txt):
    words = line.strip().split(',')
    if len(words) == 2:
        uuid, samp = words
        calt_sciatacseq_map[samp] = uuid
samp_to_uuid_map.update(calt_sciatacseq_map)
pprint(samp_to_uuid_map)

### End block of mapping data for Stanford snRNAseq uuids ###

## The following block produces inv_uuid_map, used for the special case of UCSD snRNAseq data ##

In [None]:
#
# /tmp/junk.txt below was running a 'find -type f -print' on the HIVE host and
# grepping for 'contributors'
#
#uuid_map = {}
#for line in open('/tmp/junk.txt'):
#    words = line.strip().split('/')
#    uuid_map[words[1]] = words[-1]
uuid_map = {
 '0487454555924b54dd3f5b5232e3c77e': 'BUKMAP_20190529L_10X-R_contributors.tsv',
 '05197e30394fd88affff0a9c214c8c4c': 'LAPMAP_20200317J_10X-R_contributors.tsv',
 '065295e6e58b1d3555a261c1bfe3b3fe': 'LAPMAP_20200317M_10X-R_contributors.tsv',
 '06ff98c01295ca5ea504a676f73f9a09': 'BUKMAP_20200304B_10X-R_contributors.tsv',
 '0736735768692d6ca0cd96149b743be1': 'LAPMAP_20200317N_10X-R_contributors.tsv',
 '0c3ce767d87527f41fd705fd469390a0': 'BUKMAP_20200302A_10X-R_contributors.tsv',
 '17be751d961c5baf6c1dbe2e70c5d93c': 'BUKMAP_20191104B_10X-R_contributors.tsv',
 '1b83223cf1f4446c625adbfb375ab3fd': 'BUKMAP_20200205D_10X-R_contributors.tsv',
 '24eaa9730abe57c1c22f74573b846a6f': 'BUKMAP_20191029_10X-R_contributors.tsv',
 '26b642ddbae00e7ff6570ddd57557e26': 'LAPMAP_20200317I_10X-R_contributors.tsv',
 '2d27debfce3d25040af54fb77b25427b': 'BUKMAP_20200707A_10X-R_contributors.tsv',
 '3b1490026022f850e4d3c3fb5e2283c9': 'LAPMAP_20191217E_10X-R_contributors.tsv',
 '3fe18ec025f612ca2c5308d4c234da50': 'LAPMAP_20200317H_10X-R_contributors.tsv',
 '46e8ffd2350efd19f771c6fb6a51f6cc': 'BUKMAP_20200304F_10X-R_contributors.tsv',
 '488f364142c308a9692e0b529f6697dd': 'BUKMAP_20190822F_10X-R_contributors.tsv',
 '4bef8fa6eab2d3eb8734bf418c0634ef': 'BUKMAP_20190829B_10X-R_contributors.tsv',
 '4ea7a4cf1a6ff0df0cc33c1236633112': 'BUKMAP_20200205F_10X-R_contributors.tsv',
 '68e6dfa4807ca615883f73a5067115cb': 'LAPMAP_20200317L_10X-R_contributors.tsv',
 '69a0ada10f4f119f99ce5f66cf3b1a94': 'BUKMAP_20200702C_10X-R_contributors.tsv',
 '6a75230d8d1063fcc8568537212211f5': 'BUKMAP_20200302B_10X-R_contributors.tsv',
 '8e5c8f0cc61aad4fcbc5cc119bdf4c96': 'LAPMAP_20200317K_10X-R_contributors.tsv',
 '9049f48b97dc5edc737b67783a47e918': 'BUKMAP_20200702D_10X-R_contributors.tsv',
 '99c5c80509be87d2356d19a9ed8b22ff': 'BUKMAP_20191104A_10X-R_contributors.tsv',
 'a8652e9e3c545e61e1ffe9d54a8f1fd2': 'LAPMAP_20191217G_10X-R_contributors.tsv',
 'c686b93a809ec1f54a0d96bc25d3d207': 'BUKMAP_20190607L_10X-R_contributors.tsv',
 'cd887a6beabc794992876ad7ee591f69': 'BUKMAP_20200304A_10X-R_contributors.tsv',
 'cfc125d6d916f121e92a8406a0502a38': 'BUKMAP_20200707C_10X-R_contributors.tsv',
 'ec88a6b161dce97a2361b1479c69a036': 'BUKMAP_20191009_10X-R_contributors.tsv',
 'f1b130f1200ae1fabe56cb506245490c': 'BUKMAP_20191010_10X-R_contributors.tsv',
 'fa6d9c732c7f239422ec6b357136fcd4': 'BUKMAP_20200707B_10X-R_contributors.tsv',
 'fd0c0fcde5a331c9dfff52b520c7d792': 'BUKMAP_20200205A_10X-R_contributors.tsv'
}
pprint(uuid_map)

In [None]:
# Checking that we can use filenames as unique keys
counts = defaultdict(int)
for key, val in uuid_map.items():
    counts[val] += 1
for key, val in counts.items():
    print(f'{key}: {val}')

In [None]:
inv_uuid_map = {val:key for key, val in uuid_map.items()}

## End block of special-purpose support code for UCSD snRNAseq ##

In [None]:
def fix_antibodies_df(df):
    """
    This adds columns to get the antibodies dataframe past the current versions of the
    antibodies.tsv table schema.
    """
    column_names = [elt for elt in df.columns]
    assert 'conjugated_cat_number' in column_names, 'conjugated_cat_number is not present'
    offset = [idx for idx, val in enumerate(column_names) if val == 'conjugated_cat_number'][0]
    new_column_names = (['version']
                        + column_names[:offset]
                        + ['concentration_value', 'concentration_unit']
                        + column_names[offset:])
    new_column_names = (column_names[:offset]
                        + column_names[offset:])
    print(new_column_names)
    out_df = df.copy().reindex(columns=new_column_names)
    #out_df['version'] = 2
    return out_df

#print([k for k in df_d])
#display(fix_antibodies_df(df_d[Path('UFLA_antibodies_121120')]).head())

### This block does the actual job of creating the directory tree of updated .tsv files. ###

In [None]:
assert not build_tree_root.exists(), f'delete or move aside {build_tree_root}'
build_tree_root.mkdir()
for idx, row in metadata_df.iterrows():
    c_p = row['contributors_path']
    if row['contributors_path'] in inv_uuid_map:
        uuid = inv_uuid_map[row['contributors_path']]
    elif 'tissue_id' in row and row['tissue_id'] in samp_to_uuid_map:
        uuid = samp_to_uuid_map[row['tissue_id']]
    else:
        uuid = get_uuid(row['data_path'])
    if not uuid:
        print(f'No uuid found for record {idx}')
        continue
    print(f'row {idx} -> {uuid}')
    uuid_path = build_tree_root / uuid
    uuid_path.mkdir()
    path_str = row['contributors_path']
    if path_str.startswith('/'):  # common error
        path_str = path_str[1:]
    contributors_path = Path(path_str)
    if 'antibodies_path' in row:
        path_str = row['antibodies_path']
        if path_str.startswith('/'):  # common error
            path_str = path_str[1:]
        antibodies_path = Path(path_str)
        row['antibodies_path'] = str(Path('extras').joinpath(antibodies_path))
    else:
        antibodies_path = None
    print(contributors_path.stem)
    print([k for k in df_d])
    assert get_true_stem(contributors_path) in df_d, f"Cannot find contributors dataframe {contributors_path}"
    row['contributors_path'] = str(Path('extras').joinpath(contributors_path))
    row['data_path'] = '.'
    for col in metadata_df.columns:
        if col.endswith('_datetime'):
            row[col] = reformat_datetime(str(row[col]))
    row_df = pd.DataFrame([row])
    if 'do_not_save_this_uuid' not in row or not row['do_not_save_this_uuid']:
        row_df.to_csv(uuid_path / f'{uuid}-metadata.tsv', header=True, sep='\t', index=False)
    (uuid_path / 'extras').mkdir()
    df_d[get_true_stem(contributors_path)].to_csv(uuid_path / row['contributors_path'],
                                                  header=True, sep='\t', index=False)
    if antibodies_path:
        df = df_d[get_true_stem(antibodies_path)]
        fix_antibodies_df(df).to_csv(uuid_path / row['antibodies_path'],
                                     header=True, sep='\t', index=False)


### This block should get reintegrated into the flow ###

In [None]:
recs = []
with open(in_fname, 'r', newline='') as f:
    dialect = csv.Sniffer().sniff(f.read(128))
    f.seek(0)
    reader = csv.DictReader(f, dialect=dialect)
    for row in reader:
        recs.append({k : v for k, v in row.items()})

Check what we've read.  Beware of columns named '' or ' ' - they indicate a trailing tab and thus an empty column in the input file!

In [None]:
print(reader.fieldnames)

In [None]:
print(recs[0])

In [None]:
print(dialect.delimiter)  # should be tab