## Alignment of peaks from several _Vitis_ cultivars

Alignment of peak lists based on m/z relative differences (below a ppm threshold). 

Requirements:

- metabolinks


In [1]:
from collections import OrderedDict
from metabolinks import align, read_data_from_xcel
from metabolinks.similarity import mz_similarity
import metabolinks as mtl

### Set up metadata descriptions

In [2]:
data_folder = 'data'
header_row = 3

data = {
    'CAN': {'filename': 'CAN (14, 15, 16).xlsx',
            'names'   : {'sample_names': '14 15 16'.split(), 'labels' : 'CAN'}},
    'CS':  {'filename': 'CS (29, 30, 31).xlsx',
            'names'   : {'sample_names': '29 30 31'.split(), 'labels' : 'CS'}},
    'LAB':  {'filename': 'LAB (8, 9, 10).xlsx',
            'names'   : {'sample_names': '8  9  10'.split(), 'labels' : 'LAB'}},
    'PN':  {'filename': 'PN (23, 24, 25).xlsx',
            'names'   : {'sample_names': '23 24 25'.split(), 'labels' : 'PN'}},
    'REG':  {'filename': 'REG (38, 39, 40).xlsx',
            'names'   : {'sample_names': '38 39 40'.split(), 'labels' : 'REG'}},
    'RIP':  {'filename': 'RIP (17, 18, 19).xlsx',
            'names'   : {'sample_names': '17 18 19'.split(), 'labels' : 'RIP'}},
    'RL':  {'filename': 'RL (26, 27, 28).xlsx',
            'names'   : {'sample_names': '26 27 28'.split(), 'labels' : 'RL'}},
    'ROT':  {'filename': 'ROT (20, 21, 22).xlsx',
            'names'   : {'sample_names': '20 21 22'.split(), 'labels' : 'ROT'}},
    'RU':  {'filename': 'RU (35, 36, 37).xlsx',
            'names'   : {'sample_names': '35 36 37'.split(), 'labels' : 'RU'}},
    'SYL':  {'filename': 'SYL (11, 12, 13).xlsx',
            'names'   : {'sample_names': '11 12 13'.split(), 'labels' : 'SYL'}},
    'TRI':  {'filename': 'TRI (32, 33, 34).xlsx',
            'names'   : {'sample_names': '32 33 34'.split(), 'labels' : 'TRI'}},
    # these are the new cultivars
    'CFN':  {'filename': 'CFN (10713_1, 10713_2, 10713_3).xlsx',
            'names'   : {'sample_names': '10713-1 10713-2 10713-3'.split(), 'labels' : 'CFN'}},
    'CHT':  {'filename': 'CHT (13514_1, 13514_2, 13514_3).xlsx',
            'names'   : {'sample_names': '13514-1 13514-2 13514-3'.split(), 'labels' : 'CHT'}},
    'SB':  {'filename': 'SB (53211_1, 53211_2, 53211_3).xlsx',
            'names'   : {'sample_names': '53211-1 53211-2 53211-3'.split(), 'labels' : 'SB'}},
}


### Read spectra from Excel files

In [3]:
all_spectra = OrderedDict()

for d, desc in data.items():
    fname = data_folder+'/'+ desc['filename']
    spectra = read_data_from_xcel(fname,
                                     header=header_row,  verbose = True,
                                     **(desc['names']))
    for sheet, spectrum in spectra.items():
        all_spectra[sheet] = spectrum
print('Peak list names (identical to Excel sheet names)')
for name in all_spectra:
    print(name)

------ Reading MS-Excel file - data/CAN (14, 15, 16).xlsx

- 3 tables found in sheet "CAN - NEGATIVO":
       sample
0           I
global      1
581 features

       sample
0         I.1
global      1
590 features

       sample
0         I.2
global      1
515 features


- 3 tables found in sheet "CAN - POSITIVO":
       sample
0           I
global      1
1159 features

       sample
0         I.1
global      1
1424 features

       sample
0         I.2
global      1
1193 features

------ Reading MS-Excel file - data/CS (29, 30, 31).xlsx

- 3 tables found in sheet "CS - NEGATIVO":
       sample
0           I
global      1
768 features

       sample
0         I.1
global      1
838 features

       sample
0         I.2
global      1
710 features


- 3 tables found in sheet "CS - Positivo":
       sample
0           I
global      1
1642 features

       sample
0         I.1
global      1
1599 features

       sample
0         I.2
global      1
1631 features

------ Reading MS-Excel file 

### Alignment of peak lists

#### Align for each mode and cultivar (keep if peak appears in at least 2 samples)

In [4]:
ppmtol = 1.0
min_samples = 2

aligned = {}
for k, s in all_spectra.items():
    print('=======================================')
    print(k)
    print(s)
    aligned[k]  = align(s, ppmtol, min_samples)

CAN - NEGATIVO
[                   I
m/z                 
97.58888   1557223.0
97.59021   1031039.0
97.59192    903612.0
98.36017    590302.0
98.57357   3264189.0
...              ...
861.50880   670842.0
861.71717   935351.0
865.19509   897970.0
896.37823  1150601.0
897.38085   767520.0

[581 rows x 1 columns],                I.1
m/z.1             
97.58888    468344
97.59020    321586
97.59190    296777
98.57358   1171621
98.57521    576750
...            ...
861.51608  1884861
861.72092   662095
862.51878   657480
876.48738   611114
896.38050  1109216

[590 rows x 1 columns],                  I.2
m/z.2               
97.24823    293423.0
97.58892   2070914.0
97.59035    905722.0
97.59190    916971.0
98.36016    459964.0
...              ...
832.48875   729482.0
844.33931  1788071.0
845.34441   983511.0
860.50608   693078.0
865.19671   655016.0

[515 rows x 1 columns]]
------ Aligning tables -------------
 Samples to align: [['I'], ['I.1'], ['I.2']]
- Extracting all features...
  Don

  Done, 3576 groups found
Elapsed time: 00m 02.790s

- 2985 groups were discarded (#samples < 2)
Sample coverage of features
  312 features in 2 samples
  279 features in 3 samples
m/z range (ppm) distribution
  [0.0,0.1[ : 57
  [0.1,0.2[ : 69
  [0.2,0.3[ : 69
  [0.3,0.4[ : 67
  [0.4,0.5[ : 64
  [0.5,0.6[ : 55
  [0.6,0.7[ : 53
  [0.7,0.8[ : 57
  [0.8,0.9[ : 53
  [0.9,1.0[ : 47
  > 1.0     : 0
PN - NEGATIVO
[                   I
m/z                 
97.58889    709044.0
98.57345   1549016.0
98.57511    536107.0
98.57664    561059.0
99.28832   1233276.0
...              ...
622.25013  1864094.0
623.24804   765967.0
644.23572   743980.0
680.21204  1273427.0
680.71493   928267.0

[464 rows x 1 columns],                I.1
m/z.1             
97.58889    940835
97.59197    318870
98.36009    306949
98.57346   1835189
98.57399    474535
...            ...
613.53552   648771
614.21705   613013
622.25169  1232624
680.21269  1047462
680.71687   697314

[508 rows x 1 columns],                  I.

  Done, 3189 groups found
Elapsed time: 00m 03.040s

- 2851 groups were discarded (#samples < 2)
Sample coverage of features
  208 features in 2 samples
  130 features in 3 samples
m/z range (ppm) distribution
  [0.0,0.1[ : 42
  [0.1,0.2[ : 45
  [0.2,0.3[ : 34
  [0.3,0.4[ : 47
  [0.4,0.5[ : 27
  [0.5,0.6[ : 34
  [0.6,0.7[ : 32
  [0.7,0.8[ : 32
  [0.8,0.9[ : 26
  [0.9,1.0[ : 19
  > 1.0     : 0
RL - NEGATIVO
[                   I
m/z                 
97.58879    547566.0
98.36010    892890.0
98.57353   4479187.0
98.57521   1624887.0
98.57660   1243151.0
...              ...
965.87591  1558788.0
966.00279  2820414.0
966.07491  2038824.0
966.14260  1142373.0
966.25952   914243.0

[800 rows x 1 columns],                I.1
m/z.1             
97.58878    800618
97.59182    384755
98.35222    482025
98.36014   1414146
98.57368   5317654
...            ...
965.70575  1380918
965.91319  1116341
966.00349  1818040
966.07160  1195873
966.12596   807666

[801 rows x 1 columns],                  I.

  Done, 5500 groups found
Elapsed time: 00m 05.415s

- 3779 groups were discarded (#samples < 2)
Sample coverage of features
  888 features in 2 samples
  833 features in 3 samples
m/z range (ppm) distribution
  [0.0,0.1[ : 184
  [0.1,0.2[ : 225
  [0.2,0.3[ : 214
  [0.3,0.4[ : 198
  [0.4,0.5[ : 171
  [0.5,0.6[ : 167
  [0.6,0.7[ : 154
  [0.7,0.8[ : 147
  [0.8,0.9[ : 137
  [0.9,1.0[ : 124
  > 1.0     : 0
SYL - NEGATIVE
[                   I
m/z                 
97.24820    436975.0
97.58893   2838014.0
97.59025   1233000.0
97.59190   1191591.0
98.36024    762963.0
...              ...
846.34742   589646.0
860.32863   706677.0
860.51030   849596.0
865.19840   715115.0
941.16553   584990.0

[566 rows x 1 columns],                  I.1
m/z.1               
97.58890   2469917.0
97.59020   1674475.0
97.59096   1053621.0
97.59192   1570858.0
98.36018    583743.0
...              ...
860.32979   781395.0
860.50795   662634.0
861.71875   660800.0
865.19735   764882.0
941.16748   587456.0

[611 r

  Done, 2981 groups found
Elapsed time: 00m 02.982s

- 2718 groups were discarded (#samples < 2)
Sample coverage of features
  170 features in 2 samples
   93 features in 3 samples
m/z range (ppm) distribution
  [0.0,0.1[ : 22
  [0.1,0.2[ : 37
  [0.2,0.3[ : 21
  [0.3,0.4[ : 23
  [0.4,0.5[ : 23
  [0.5,0.6[ : 32
  [0.6,0.7[ : 20
  [0.7,0.8[ : 31
  [0.8,0.9[ : 34
  [0.9,1.0[ : 20
  > 1.0     : 0
CHT - NEGATIVO
[                  I
m/z                
98.57352   262657.0
98.94944   509918.0
98.94996   466834.0
99.65800   876403.0
99.65947   467843.0
...             ...
579.02371  619178.0
597.11155  855136.0
611.28142  697755.0
652.23189  692682.0
654.21362  993151.0

[292 rows x 1 columns],                  I.1
m/z.1               
98.57347    322819.0
98.94943    381788.0
99.54108    205532.0
99.65801    551697.0
107.40325   226883.0
...              ...
577.02481  1050369.0
597.11218   544994.0
611.28293   548990.0
652.23208   687379.0
654.21296   700957.0

[235 rows x 1 columns],      

#### Save, just in case, separating modes

In [5]:
aligned_pos = {name : value for name,value in aligned.items() if name.upper().endswith('POSITIVO')}
aligned_neg = {name : value for name,value in aligned.items() if name.upper().endswith('NEGATIVO')}

#save_aligned_to_excel('aligned_cultivars_positive_1ppm_min2.xlsx', aligned_pos)
#save_aligned_to_excel('aligned_cultivars_negative_1ppm_min2.xlsx', aligned_neg)

For some reason the sample names weren't being passed, so passing the names here manually

In [6]:
for name, value in aligned_pos.items():
    names = data[name.split()[0]]['names']['sample_names']
    value.columns = names
    value = mtl.add_labels(value, data[name.split()[0]]['names']['labels'])
    
for name, value in aligned_neg.items():
    names = data[name.split()[0]]['names']['sample_names']
    value.columns = names
    value = mtl.add_labels(value, data[name.split()[0]]['names']['labels'])

#### Align globally (for each mode), starting from previously aligned peak lists

In [7]:
ppmtol = 1.0
min_samples = 1 #Now it has to be 1
positive = aligned_pos.values()
negative = aligned_neg.values()

aligned_all_pos = align(positive, ppmtol=ppmtol, min_samples=min_samples)
aligned_all_neg = align(negative, ppmtol=ppmtol, min_samples=min_samples)

------ Aligning tables -------------
 Samples to align: [[('CAN', '14'), ('CAN', '15'), ('CAN', '16')], [('CS', '29'), ('CS', '30'), ('CS', '31')], [('LAB', '8'), ('LAB', '9'), ('LAB', '10')], [('PN', '23'), ('PN', '24'), ('PN', '25')], [('REG', '38'), ('REG', '39'), ('REG', '40')], [('RIP', '17'), ('RIP', '18'), ('RIP', '19')], [('RL', '26'), ('RL', '27'), ('RL', '28')], [('ROT', '20'), ('ROT', '21'), ('ROT', '22')], [('RU', '35'), ('RU', '36'), ('RU', '37')], [('TRI', '32'), ('TRI', '33'), ('TRI', '34')], [('CFN', '10713-1'), ('CFN', '10713-2'), ('CFN', '10713-3')], [('CHT', '13514-1'), ('CHT', '13514-2'), ('CHT', '13514-3')], [('SB', '53211-1'), ('SB', '53211-2'), ('SB', '53211-3')]]
- Extracting all features...
  Done, (total 10997 features in 13 samples)
- Grouping and joining...
  Done, 5163 groups found
Elapsed time: 00m 06.069s

Sample coverage of features
 2900 features in 1 samples
  912 features in 2 samples
  557 features in 3 samples
  319 features in 4 samples
  154 featu

#### Save alignments in Excel and CSV files

In [8]:
outdict = {'POSITIVE': aligned_all_pos, 'NEGATIVE': aligned_all_neg}
#save_aligned_to_excel('aligned_1ppm_min2_1ppm.xlsx', outdict)

#aligned_all_pos.to_csv('aligned_1ppm_min2_1ppm_positive.csv', with_labels=True, sep=',')
#aligned_all_neg.to_csv('aligned_1ppm_min2_1ppm_negative.csv', with_labels=True, sep=',')

### REPEAT alignments, this time requiring presence of a peak in all replicas within each label

In [9]:
ppmtol = 1.0
min_samples = 3

aligned = {}
for k, s in all_spectra.items():
    print('=======================================')
    print(k)
    aligned[k]  = align(s, ppmtol, min_samples)

CAN - NEGATIVO
------ Aligning tables -------------
 Samples to align: [['I'], ['I.1'], ['I.2']]
- Extracting all features...
  Done, (total 1686 features in 3 samples)
- Grouping and joining...
  Done, 1022 groups found
Elapsed time: 00m 01.054s

- 833 groups were discarded (#samples < 3)
Sample coverage of features
  189 features in 3 samples
m/z range (ppm) distribution
  [0.0,0.1[ : 1
  [0.1,0.2[ : 2
  [0.2,0.3[ : 6
  [0.3,0.4[ : 11
  [0.4,0.5[ : 47
  [0.5,0.6[ : 54
  [0.6,0.7[ : 37
  [0.7,0.8[ : 8
  [0.8,0.9[ : 12
  [0.9,1.0[ : 11
  > 1.0     : 0
CAN - POSITIVO
------ Aligning tables -------------
 Samples to align: [['I'], ['I.1'], ['I.2']]
- Extracting all features...
  Done, (total 3776 features in 3 samples)
- Grouping and joining...
  Done, 3144 groups found
Elapsed time: 00m 03.257s

- 2944 groups were discarded (#samples < 3)
Sample coverage of features
  200 features in 3 samples
m/z range (ppm) distribution
  [0.0,0.1[ : 27
  [0.1,0.2[ : 19
  [0.2,0.3[ : 28
  [0.3,0.4[ : 

  Done, 946 groups found
Elapsed time: 00m 00.849s

- 590 groups were discarded (#samples < 3)
Sample coverage of features
  356 features in 3 samples
m/z range (ppm) distribution
  [0.0,0.1[ : 15
  [0.1,0.2[ : 48
  [0.2,0.3[ : 79
  [0.3,0.4[ : 94
  [0.4,0.5[ : 20
  [0.5,0.6[ : 29
  [0.6,0.7[ : 29
  [0.7,0.8[ : 17
  [0.8,0.9[ : 14
  [0.9,1.0[ : 11
  > 1.0     : 0
ROT - POSITIVO
------ Aligning tables -------------
 Samples to align: [['I'], ['I.1'], ['I.2']]
- Extracting all features...
  Done, (total 3345 features in 3 samples)
- Grouping and joining...
  Done, 2888 groups found
Elapsed time: 00m 02.218s

- 2739 groups were discarded (#samples < 3)
Sample coverage of features
  149 features in 3 samples
m/z range (ppm) distribution
  [0.0,0.1[ : 10
  [0.1,0.2[ : 17
  [0.2,0.3[ : 21
  [0.3,0.4[ : 23
  [0.4,0.5[ : 12
  [0.5,0.6[ : 19
  [0.6,0.7[ : 16
  [0.7,0.8[ : 10
  [0.8,0.9[ : 12
  [0.9,1.0[ : 9
  > 1.0     : 0
RU - NEGATIVO
------ Aligning tables -------------
 Samples to align: [[

In [10]:
aligned_pos = {name : value for name,value in aligned.items() if name.upper().endswith('POSITIVO')}
aligned_neg = {name : value for name,value in aligned.items() if name.upper().endswith('NEGATIVO')}

#save_aligned_to_excel('aligned_cultivars_positive_1ppm_min3.xlsx', aligned_pos)
#save_aligned_to_excel('aligned_cultivars_negative_1ppm_min3.xlsx', aligned_neg)

For some reason the sample names weren't being passed, so passing the names here manually

In [11]:
for name, value in aligned_pos.items():
    names = data[name.split()[0]]['names']['sample_names']
    value.columns = names
    value = mtl.add_labels(value, data[name.split()[0]]['names']['labels'])
    
for name, value in aligned_neg.items():
    names = data[name.split()[0]]['names']['sample_names']
    value.columns = names
    value = mtl.add_labels(value, data[name.split()[0]]['names']['labels'])

In [12]:
ppmtol = 1.0
min_samples = 1 #Now it has to be 1
positive = aligned_pos.values()
negative = aligned_neg.values()

aligned_all_pos = align(positive, ppmtol=ppmtol, min_samples=min_samples)
aligned_all_neg = align(negative, ppmtol=ppmtol, min_samples=min_samples)

------ Aligning tables -------------
 Samples to align: [[('CAN', '14'), ('CAN', '15'), ('CAN', '16')], [('CS', '29'), ('CS', '30'), ('CS', '31')], [('LAB', '8'), ('LAB', '9'), ('LAB', '10')], [('PN', '23'), ('PN', '24'), ('PN', '25')], [('REG', '38'), ('REG', '39'), ('REG', '40')], [('RIP', '17'), ('RIP', '18'), ('RIP', '19')], [('RL', '26'), ('RL', '27'), ('RL', '28')], [('ROT', '20'), ('ROT', '21'), ('ROT', '22')], [('RU', '35'), ('RU', '36'), ('RU', '37')], [('TRI', '32'), ('TRI', '33'), ('TRI', '34')], [('CFN', '10713-1'), ('CFN', '10713-2'), ('CFN', '10713-3')], [('CHT', '13514-1'), ('CHT', '13514-2'), ('CHT', '13514-3')], [('SB', '53211-1'), ('SB', '53211-2'), ('SB', '53211-3')]]
- Extracting all features...
  Done, (total 5112 features in 13 samples)
- Grouping and joining...
  Done, 2091 groups found
Elapsed time: 00m 01.758s

Sample coverage of features
 1094 features in 1 samples
  333 features in 2 samples
  219 features in 3 samples
  152 features in 4 samples
   81 featur

In [13]:
outdict = {'POSITIVE': aligned_all_pos, 'NEGATIVE': aligned_all_neg}
#save_aligned_to_excel('aligned_1ppm_min3_1ppm.xlsx', outdict)

#aligned_all_pos.to_csv('aligned_1ppm_min3_1ppm_positive.csv', with_labels=True, sep=',')
#aligned_all_neg.to_csv('aligned_1ppm_min3_1ppm_negative.csv', with_labels=True, sep=',')

In [14]:
ppmtol = 2.0
min_samples = 3
positive = aligned_pos.values()
negative = aligned_neg.values()

aligned_all_pos = align(positive, ppmtol=ppmtol, min_samples=min_samples)
aligned_all_neg = align(negative, ppmtol=ppmtol, min_samples=min_samples)

------ Aligning tables -------------
 Samples to align: [[('CAN', '14'), ('CAN', '15'), ('CAN', '16')], [('CS', '29'), ('CS', '30'), ('CS', '31')], [('LAB', '8'), ('LAB', '9'), ('LAB', '10')], [('PN', '23'), ('PN', '24'), ('PN', '25')], [('REG', '38'), ('REG', '39'), ('REG', '40')], [('RIP', '17'), ('RIP', '18'), ('RIP', '19')], [('RL', '26'), ('RL', '27'), ('RL', '28')], [('ROT', '20'), ('ROT', '21'), ('ROT', '22')], [('RU', '35'), ('RU', '36'), ('RU', '37')], [('TRI', '32'), ('TRI', '33'), ('TRI', '34')], [('CFN', '10713-1'), ('CFN', '10713-2'), ('CFN', '10713-3')], [('CHT', '13514-1'), ('CHT', '13514-2'), ('CHT', '13514-3')], [('SB', '53211-1'), ('SB', '53211-2'), ('SB', '53211-3')]]
- Extracting all features...
  Done, (total 5112 features in 13 samples)
- Grouping and joining...
  Done, 1895 groups found
Elapsed time: 00m 01.735s

- 1222 groups were discarded (#samples < 3)
Sample coverage of features
  188 features in 3 samples
  148 features in 4 samples
   80 features in 5 samp

In [15]:
outdict = {'POSITIVE': aligned_all_pos, 'NEGATIVE': aligned_all_neg}
#save_aligned_to_excel('aligned_1ppm_min3_2ppm.xlsx', outdict)

#aligned_all_pos.to_csv('aligned_1ppm_min3_2ppm_positive.csv', with_labels=True, sep=',')
#aligned_all_neg.to_csv('aligned_1ppm_min3_2ppm_negative.csv', with_labels=True, sep=',')

### Aligning all 39 samples together (not aligning replicates first)

First, putting all samples in the same list with the correct sample_names

In [16]:
posi = []
nega = []
for k, s in all_spectra.items():
    if k.upper().endswith('POSITIVO'):
        s[0].columns = [data[k.split()[0]]['names']['sample_names'][0]]
        s[1].columns = [data[k.split()[0]]['names']['sample_names'][1]]
        s[2].columns = [data[k.split()[0]]['names']['sample_names'][2]]
        posi.append(s[0])
        posi.append(s[1])
        posi.append(s[2])
    if k.upper().endswith('NEGATIVO'):
        s[0].columns = [data[k.split()[0]]['names']['sample_names'][0]]
        s[1].columns = [data[k.split()[0]]['names']['sample_names'][1]]
        s[2].columns = [data[k.split()[0]]['names']['sample_names'][2]]
        nega.append(s[0])
        nega.append(s[1])
        nega.append(s[2])

In [17]:
ppmtol = 1.0
min_samples = 2

aligned_all_positive = align(posi, ppmtol, min_samples)
aligned_all_negative = align(nega, ppmtol, min_samples)

------ Aligning tables -------------
 Samples to align: [['14'], ['15'], ['16'], ['29'], ['30'], ['31'], ['8'], ['9'], ['10'], ['23'], ['24'], ['25'], ['38'], ['39'], ['40'], ['17'], ['18'], ['19'], ['26'], ['27'], ['28'], ['20'], ['21'], ['22'], ['35'], ['36'], ['37'], ['32'], ['33'], ['34'], ['10713-1'], ['10713-2'], ['10713-3'], ['13514-1'], ['13514-2'], ['13514-3'], ['53211-1'], ['53211-2'], ['53211-3']]
- Extracting all features...
  Done, (total 67786 features in 39 samples)
- Grouping and joining...
  Done, 37569 groups found
Elapsed time: 00m 30.894s

- 28717 groups were discarded (#samples < 2)
Sample coverage of features
 3635 features in 2 samples
 1743 features in 3 samples
  958 features in 4 samples
  627 features in 5 samples
  433 features in 6 samples
  282 features in 7 samples
  236 features in 8 samples
  182 features in 9 samples
  128 features in 10 samples
  116 features in 11 samples
   95 features in 12 samples
   62 features in 13 samples
   39 features in 14 

In [18]:
aligned_all_positive = mtl.add_labels(aligned_all_positive,
                                      ['CAN','CS','LAB','PN','REG','RIP','RL','ROT','RU','TRI','CFN','CHT','SB'])
aligned_all_negative = mtl.add_labels(aligned_all_negative,
                                      ['CAN','CS','LAB','PN','REG','RIP','RL','ROT','RU','TRI','CFN','CHT','SB'])

In [19]:
#aligned_all_positive.to_csv('aligned_1ppm_min1-2_1ppm_positive.csv', with_labels=True, sep=',')
#aligned_all_negative.to_csv('aligned_1ppm_min1-2_1ppm_negative.csv', with_labels=True, sep=',')

### Comparing with the same alignment made with the previous version of align

In [20]:
def read_aligned_files(filename):
    """Short function to read the aligned files fast while putting the MultiIndex in the correct order for the CDL accessor."""
    df = pd.read_csv(filename, header = None, index_col = [0])
    df.index.name = 'm/z'
    mi = pd.concat([df.iloc[1, :],df.iloc[0, :]], axis = 'columns')
    mi = pd.MultiIndex.from_frame(mi)
    final_file = pd.read_csv(filename, header = [0,1], index_col = [0])
    final_file.columns = mi
    return final_file
import pandas as pd

In [21]:
aligned_all_pos2 = read_aligned_files('aligned_1ppm_min1-2_1ppm_positive.csv')
aligned_all_neg2 = read_aligned_files('aligned_1ppm_min1-2_1ppm_negative.csv')

In [22]:
aligned_all_negative.columns.names = ['Label','Sample']

In [23]:
from pandas.testing import assert_frame_equal
assert_frame_equal(aligned_all_negative, aligned_all_neg2)

### It gives the same results, should it?