# Build `barcode_runs.csv` file
Tyler had created the file [barcode_runs_orig-names.csv](barcode_runs_orig-names.csv) where samples have a completely original name.
This notebook makes a version of this file where the information is separated into different columns for things like the bin number, date, concentration, etc.

In [1]:
import numpy

import pandas as pd

from IPython.display import display, HTML

df = (
    pd.read_csv('barcode_runs_orig-names.csv')
    .assign(
        date=lambda x: x['sample'].str.split('_').str[0],
        sample_type=lambda x: numpy.where(x['Sortseq_bin'].notnull(), 'SortSeq', 'TiteSeq'),
        sort_bin=lambda x: x['sample'].str.extract('(\d+)$'),
        concentration=lambda x: x['sample'].str.extract('_s(\d+)\-'),
        sample=lambda x: x['sample_type'] +
                         numpy.where(x['concentration'].notnull(), '_' + x['concentration'], '') +
                         '_bin' + x['sort_bin']
        )
    [['library', 'sample', 'sample_type', 'sort_bin', 'concentration', 'date', 'number_cells', 'R1']]
    )

assert len(df.groupby(['library', 'sample'])) == len(df)

df.to_csv('barcode_runs.csv', index=False)

display(HTML(df.head().to_html()))

Unnamed: 0,library,sample,sample_type,sort_bin,concentration,date,number_cells,R1
0,lib1,SortSeq_bin1,SortSeq,1,,200416,6600000,/shared/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin1/200416_lib1_FITCbin1_TGGAACAA_L001_R1_001.fastq.gz; /fh/fast/bloom_j/SR/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin1/200416_lib1_FITCbin1_TGGAACAA_L001_R1_002.fastq.gz; /fh/fast/bloom_j/SR/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin1/200416_lib1_FITCbin1_TGGAACAA_L001_R1_003.fastq.gz; /fh/fast/bloom_j/SR/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin1/200416_lib1_FITCbin1_TGGAACAA_L001_R1_004.fastq.gz; /fh/fast/bloom_j/SR/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin1/200416_lib1_FITCbin1_TGGAACAA_L001_R1_005.fastq.gz; /fh/fast/bloom_j/SR/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin1/200416_lib1_FITCbin1_TGGAACAA_L002_R1_001.fastq.gz; /fh/fast/bloom_j/SR/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin1/200416_lib1_FITCbin1_TGGAACAA_L002_R1_002.fastq.gz; /fh/fast/bloom_j/SR/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin1/200416_lib1_FITCbin1_TGGAACAA_L002_R1_003.fastq.gz; /fh/fast/bloom_j/SR/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin1/200416_lib1_FITCbin1_TGGAACAA_L002_R1_004.fastq.gz; /fh/fast/bloom_j/SR/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin1/200416_lib1_FITCbin1_TGGAACAA_L002_R1_005.fastq.gz
1,lib1,SortSeq_bin2,SortSeq,2,,200416,3060000,/shared/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin2/200416_lib1_FITCbin2_TGGCTTCA_L001_R1_001.fastq.gz; /fh/fast/bloom_j/SR/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin2/200416_lib1_FITCbin2_TGGCTTCA_L001_R1_002.fastq.gz; /fh/fast/bloom_j/SR/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin2/200416_lib1_FITCbin2_TGGCTTCA_L002_R1_001.fastq.gz; /fh/fast/bloom_j/SR/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin2/200416_lib1_FITCbin2_TGGCTTCA_L002_R1_002.fastq.gz
2,lib1,SortSeq_bin3,SortSeq,3,,200416,2511000,/shared/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin3/200416_lib1_FITCbin3_TGGTGGTA_L001_R1_001.fastq.gz; /shared/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin3/200416_lib1_FITCbin3_TGGTGGTA_L001_R1_002.fastq.gz; /shared/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin3/200416_lib1_FITCbin3_TGGTGGTA_L002_R1_001.fastq.gz; /shared/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin3/200416_lib1_FITCbin3_TGGTGGTA_L002_R1_002.fastq.gz
3,lib1,SortSeq_bin4,SortSeq,4,,200416,2992000,/shared/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin4/200416_lib1_FITCbin4_TTCACGCA_L001_R1_001.fastq.gz; /shared/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin4/200416_lib1_FITCbin4_TTCACGCA_L001_R1_002.fastq.gz; /shared/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin4/200416_lib1_FITCbin4_TTCACGCA_L002_R1_001.fastq.gz; /shared/ngs/illumina/tstarr/200427_D00300_0952_AHFCLCBCX3/Unaligned/Project_tstarr/Sample_200416_lib1_FITCbin4/200416_lib1_FITCbin4_TTCACGCA_L002_R1_002.fastq.gz
4,lib2,SortSeq_bin1,SortSeq,1,,200416,6420000,/shared/ngs/illumina/tstarr/200427_D00300_0953_BHFCMKBCX3/Unaligned/Project_tstarr/Sample_200416_lib2_FITCbin1/200416_lib2_FITCbin1_TGGAACAA_L001_R1_001.fastq.gz; /shared/ngs/illumina/tstarr/200427_D00300_0953_BHFCMKBCX3/Unaligned/Project_tstarr/Sample_200416_lib2_FITCbin1/200416_lib2_FITCbin1_TGGAACAA_L001_R1_002.fastq.gz; /shared/ngs/illumina/tstarr/200427_D00300_0953_BHFCMKBCX3/Unaligned/Project_tstarr/Sample_200416_lib2_FITCbin1/200416_lib2_FITCbin1_TGGAACAA_L001_R1_003.fastq.gz; /shared/ngs/illumina/tstarr/200427_D00300_0953_BHFCMKBCX3/Unaligned/Project_tstarr/Sample_200416_lib2_FITCbin1/200416_lib2_FITCbin1_TGGAACAA_L001_R1_004.fastq.gz; /shared/ngs/illumina/tstarr/200427_D00300_0953_BHFCMKBCX3/Unaligned/Project_tstarr/Sample_200416_lib2_FITCbin1/200416_lib2_FITCbin1_TGGAACAA_L002_R1_001.fastq.gz; /shared/ngs/illumina/tstarr/200427_D00300_0953_BHFCMKBCX3/Unaligned/Project_tstarr/Sample_200416_lib2_FITCbin1/200416_lib2_FITCbin1_TGGAACAA_L002_R1_002.fastq.gz; /shared/ngs/illumina/tstarr/200427_D00300_0953_BHFCMKBCX3/Unaligned/Project_tstarr/Sample_200416_lib2_FITCbin1/200416_lib2_FITCbin1_TGGAACAA_L002_R1_003.fastq.gz; /shared/ngs/illumina/tstarr/200427_D00300_0953_BHFCMKBCX3/Unaligned/Project_tstarr/Sample_200416_lib2_FITCbin1/200416_lib2_FITCbin1_TGGAACAA_L002_R1_004.fastq.gz
