### This script will prepare a spreadsheet template for your BOLD Specimen Upload, using data from your GeOMe FIMS spreadsheets. 

In [1]:
import pandas as pd

#### Insert your FIMS spreadsheet names below. (If you are using a GeOMe template, the file will be .xlsx and your data will be the sheet named 'Samples'. If you have the data in a .csv, make sure you use "pd.read_csv" instead of "pd.read_excel" and remove the ", sheet_name='Samples'".)

In [2]:
P01_df = pd.read_excel('FY18SurinameGGI211a_P01.xlsx', sheet_name='Samples')
P02_df = pd.read_excel('FY18SurinameGGI211a_P02.xlsx', sheet_name='Samples')
P03_df = pd.read_excel('FY18SurinameGGI211a_P03.xlsx', sheet_name='Samples')
P04_df = pd.read_excel('FY18SurinameGGI211a_P04.xlsx', sheet_name='Samples')
P05_df = pd.read_excel('FY18SurinameGGI211a_P05.xlsx', sheet_name='Samples')

#### If you have multiple FIMS to include data from, the section below will concatenate them together. Make sure you only include the dataframes you created above.

In [3]:
specimenData_df = pd.concat([P01_df,P02_df,P03_df,P04_df,P05_df])
specimenData_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 467 entries, 0 to 93
Data columns (total 14 columns):
tissueBarcode       467 non-null object
catalogNumber       467 non-null int64
materialSampleID    467 non-null object
phylum              467 non-null object
class               467 non-null object
order               467 non-null object
family              467 non-null object
genus               467 non-null object
species             467 non-null object
scientificName      467 non-null object
country             467 non-null object
locality            467 non-null object
tissuePlate         467 non-null object
tissueWell          467 non-null object
dtypes: int64(1), object(13)
memory usage: 54.7+ KB


####  This section will create the Voucher Info tab. If you have additional information to include (besides Sample ID, Field ID, Museum ID,) you will need to add those columns below. You may need to edit the fields it is pulling from, depending on where your DwC triplicate is on the spreadsheet. The DwC should populate the BOLD field 'Museum ID', and you only need to include either the Museum ID or the Field ID in your upload. (Sample ID is required.) 

In [4]:
voucherInfo_df = pd.DataFrame(specimenData_df, columns = ['materialSampleID', 'materialSampleID', 
                                                          'voucherCatalogNumber'])
voucherInfo_df.columns = ['Sample ID', 'Field ID', 'Museum ID']
voucherInfo_df.head()

Unnamed: 0,Sample ID,Field ID,Museum ID
0,SU18-001,SU18-001,
1,SU18-002,SU18-002,
2,SU18-003,SU18-003,
3,SU18-004,SU18-004,
4,SU18-005,SU18-005,


#### This next field creates the Institution Storing column. You may need to edit the data entered if your samples are not here at SI.

In [5]:
voucherInfo_df['Institution Storing'] = 'National Museum of Natural History, Smithsonian Institution'
voucherInfo_df.head()

Unnamed: 0,Sample ID,Field ID,Museum ID,Institution Storing
0,SU18-001,SU18-001,,"National Museum of Natural History, Smithsonia..."
1,SU18-002,SU18-002,,"National Museum of Natural History, Smithsonia..."
2,SU18-003,SU18-003,,"National Museum of Natural History, Smithsonia..."
3,SU18-004,SU18-004,,"National Museum of Natural History, Smithsonia..."
4,SU18-005,SU18-005,,"National Museum of Natural History, Smithsonia..."


In [6]:
for newcol in ['Collection Code']:
    voucherInfo_df[newcol]=""
voucherInfo_df.head()

Unnamed: 0,Sample ID,Field ID,Museum ID,Institution Storing,Collection Code
0,SU18-001,SU18-001,,"National Museum of Natural History, Smithsonia...",
1,SU18-002,SU18-002,,"National Museum of Natural History, Smithsonia...",
2,SU18-003,SU18-003,,"National Museum of Natural History, Smithsonia...",
3,SU18-004,SU18-004,,"National Museum of Natural History, Smithsonia...",
4,SU18-005,SU18-005,,"National Museum of Natural History, Smithsonia...",


In [7]:
voucherInfo_df = voucherInfo_df[['Sample ID', 'Field ID', 'Museum ID', 'Collection Code', 'Institution Storing']]
voucherInfo_df.head()

Unnamed: 0,Sample ID,Field ID,Museum ID,Collection Code,Institution Storing
0,SU18-001,SU18-001,,,"National Museum of Natural History, Smithsonia..."
1,SU18-002,SU18-002,,,"National Museum of Natural History, Smithsonia..."
2,SU18-003,SU18-003,,,"National Museum of Natural History, Smithsonia..."
3,SU18-004,SU18-004,,,"National Museum of Natural History, Smithsonia..."
4,SU18-005,SU18-005,,,"National Museum of Natural History, Smithsonia..."


#### The section below will create the Taxonomy tab for your upload. If you have additional information to upload, make sure you add them to the lists in field #8 (just below this one) and remove them from the field #9. (The first list in field #8 is pulling the data using the GeOMe column headers, and the second list is renaming them to the BOLD column headers.)

In [8]:
taxonomy_df = pd.DataFrame(specimenData_df, columns = ['materialSampleID', 'phylum', 'genus', 'specificEpithet'])
taxonomy_df.columns = ['Sample ID', 'Phylum', 'Genus', 'Species']
taxonomy_df.head()

Unnamed: 0,Sample ID,Phylum,Genus,Species
0,SU18-001,Chordata,Cyphocharax,
1,SU18-002,Chordata,Serrasalmus,
2,SU18-003,Chordata,Serrasalmus,
3,SU18-004,Chordata,Moenkhausia,
4,SU18-005,Chordata,Moenkhausia,


In [9]:
for newcol in ['Subfamily','Tribe','Subspecies','Identifier','Identifier email','Identification Method',
               'Taxonomy Notes','Class','Order','Family']:
    taxonomy_df[newcol]=""
taxonomy_df.head()

Unnamed: 0,Sample ID,Phylum,Genus,Species,Subfamily,Tribe,Subspecies,Identifier,Identifier email,Identification Method,Taxonomy Notes,Class,Order,Family
0,SU18-001,Chordata,Cyphocharax,,,,,,,,,,,
1,SU18-002,Chordata,Serrasalmus,,,,,,,,,,,
2,SU18-003,Chordata,Serrasalmus,,,,,,,,,,,
3,SU18-004,Chordata,Moenkhausia,,,,,,,,,,,
4,SU18-005,Chordata,Moenkhausia,,,,,,,,,,,


In [10]:
taxonomy_df = taxonomy_df[['Sample ID','Phylum','Class','Order','Family','Subfamily','Tribe','Genus','Species',
                           'Subspecies','Identifier','Identifier email','Identification Method','Taxonomy Notes']]
taxonomy_df.head()

Unnamed: 0,Sample ID,Phylum,Class,Order,Family,Subfamily,Tribe,Genus,Species,Subspecies,Identifier,Identifier email,Identification Method,Taxonomy Notes
0,SU18-001,Chordata,,,,,,Cyphocharax,,,,,,
1,SU18-002,Chordata,,,,,,Serrasalmus,,,,,,
2,SU18-003,Chordata,,,,,,Serrasalmus,,,,,,
3,SU18-004,Chordata,,,,,,Moenkhausia,,,,,,
4,SU18-005,Chordata,,,,,,Moenkhausia,,,,,,


#### The section below will create the Specimen Details tab. As above, if you have any additional data from the FIMS spreadsheet to include, make sure you add those columns to the lists in field #11 and remove them from field #12. 

In [11]:
specimenDetails_df = pd.DataFrame(specimenData_df, columns = ['materialSampleID','sex','tissueType'])
specimenDetails_df.columns = ['Sample ID','Sex','Tissue Descriptor']
specimenDetails_df.head()

Unnamed: 0,Sample ID,Sex,Tissue Descriptor
0,SU18-001,,
1,SU18-002,,
2,SU18-003,,
3,SU18-004,,
4,SU18-005,,


In [12]:
for newcol in ['Life Stage','Reproduction','Extra Info','Notes','Voucher Status',
               'External URLs','Associated Taxa','Associated Specimens']:
    specimenDetails_df[newcol]=""
specimenDetails_df.head()

Unnamed: 0,Sample ID,Sex,Tissue Descriptor,Life Stage,Reproduction,Extra Info,Notes,Voucher Status,External URLs,Associated Taxa,Associated Specimens
0,SU18-001,,,,,,,,,,
1,SU18-002,,,,,,,,,,
2,SU18-003,,,,,,,,,,
3,SU18-004,,,,,,,,,,
4,SU18-005,,,,,,,,,,


In [13]:
specimenDetails_df = specimenDetails_df[['Sample ID','Sex','Reproduction','Life Stage','Extra Info','Notes',
                                         'Voucher Status','Tissue Descriptor','External URLs','Associated Taxa',
                                         'Associated Specimens']]
specimenDetails_df.head()

Unnamed: 0,Sample ID,Sex,Reproduction,Life Stage,Extra Info,Notes,Voucher Status,Tissue Descriptor,External URLs,Associated Taxa,Associated Specimens
0,SU18-001,,,,,,,,,,
1,SU18-002,,,,,,,,,,
2,SU18-003,,,,,,,,,,
3,SU18-004,,,,,,,,,,
4,SU18-005,,,,,,,,,,


#### The following field will create the Voucher Status column. This data may need to be updated if your sample status is not 'vouchered: registered collection'. This is a controlled list and you will need to reference the BOLD handbook for other options. 

In [14]:
specimenDetails_df['Voucher Status'] = 'vouchered:registered collection'
specimenDetails_df.head()

Unnamed: 0,Sample ID,Sex,Reproduction,Life Stage,Extra Info,Notes,Voucher Status,Tissue Descriptor,External URLs,Associated Taxa,Associated Specimens
0,SU18-001,,,,,,vouchered:registered collection,,,,
1,SU18-002,,,,,,vouchered:registered collection,,,,
2,SU18-003,,,,,,vouchered:registered collection,,,,
3,SU18-004,,,,,,vouchered:registered collection,,,,
4,SU18-005,,,,,,vouchered:registered collection,,,,


#### The section below will create the Collection Data tab for your upload. Again, if you need to add or remove information, make sure you edit the column lists in fields #15 and #16.

In [15]:
collectionData_df = pd.DataFrame(specimenData_df, columns = ['materialSampleID',  
                                                             'country', 'locality', 'collectorList',
                                                             'yearCollected'])
collectionData_df.columns = ['Sample ID', 'Country/Ocean', 'Exact Site', 'Collectors', 'Collection Date']
collectionData_df.head()

Unnamed: 0,Sample ID,Country/Ocean,Exact Site,Collectors,Collection Date
0,SU18-001,Suriname,"Suriname, Sipaliwini, Bakaaboto, Saut sur la r...",,
1,SU18-002,Suriname,"Suriname, Sipaliwini, Bakaaboto, Saut sur la r...",,
2,SU18-003,Suriname,"Suriname, Sipaliwini, Bakaaboto, Saut sur la r...",,
3,SU18-004,Suriname,"Suriname, Sipaliwini, Bakaaboto, Saut sur la r...",,
4,SU18-005,Suriname,"Suriname, Sipaliwini, Bakaaboto, Saut sur la r...",,


In [16]:
for newcol in ['State/Province','Latitude','Longitude','Elevation','Region','Sector',
               'Depth','Elevation Precision','Depth Precision','GPS Source','Coordinate Accuracy',
               'Event Time','Collection Date Accuracy','Habitat','Sampling Protocol','Collection Notes','Site Code',
               'Collection Event ID']:
    collectionData_df[newcol]=""
collectionData_df.head()

Unnamed: 0,Sample ID,Country/Ocean,Exact Site,Collectors,Collection Date,State/Province,Latitude,Longitude,Elevation,Region,...,Depth Precision,GPS Source,Coordinate Accuracy,Event Time,Collection Date Accuracy,Habitat,Sampling Protocol,Collection Notes,Site Code,Collection Event ID
0,SU18-001,Suriname,"Suriname, Sipaliwini, Bakaaboto, Saut sur la r...",,,,,,,,...,,,,,,,,,,
1,SU18-002,Suriname,"Suriname, Sipaliwini, Bakaaboto, Saut sur la r...",,,,,,,,...,,,,,,,,,,
2,SU18-003,Suriname,"Suriname, Sipaliwini, Bakaaboto, Saut sur la r...",,,,,,,,...,,,,,,,,,,
3,SU18-004,Suriname,"Suriname, Sipaliwini, Bakaaboto, Saut sur la r...",,,,,,,,...,,,,,,,,,,
4,SU18-005,Suriname,"Suriname, Sipaliwini, Bakaaboto, Saut sur la r...",,,,,,,,...,,,,,,,,,,


In [17]:
collectionData_df = collectionData_df[['Sample ID','Collectors','Collection Date','Country/Ocean','State/Province',
                                       'Region','Sector','Exact Site','Latitude','Longitude','Elevation','Depth',
                                       'Elevation Precision','Depth Precision','GPS Source','Coordinate Accuracy',
                                       'Event Time','Collection Date Accuracy','Habitat','Sampling Protocol',
                                       'Collection Notes','Site Code','Collection Event ID']]
collectionData_df.head()

Unnamed: 0,Sample ID,Collectors,Collection Date,Country/Ocean,State/Province,Region,Sector,Exact Site,Latitude,Longitude,...,Depth Precision,GPS Source,Coordinate Accuracy,Event Time,Collection Date Accuracy,Habitat,Sampling Protocol,Collection Notes,Site Code,Collection Event ID
0,SU18-001,,,Suriname,,,,"Suriname, Sipaliwini, Bakaaboto, Saut sur la r...",,,...,,,,,,,,,,
1,SU18-002,,,Suriname,,,,"Suriname, Sipaliwini, Bakaaboto, Saut sur la r...",,,...,,,,,,,,,,
2,SU18-003,,,Suriname,,,,"Suriname, Sipaliwini, Bakaaboto, Saut sur la r...",,,...,,,,,,,,,,
3,SU18-004,,,Suriname,,,,"Suriname, Sipaliwini, Bakaaboto, Saut sur la r...",,,...,,,,,,,,,,
4,SU18-005,,,Suriname,,,,"Suriname, Sipaliwini, Bakaaboto, Saut sur la r...",,,...,,,,,,,,,,


#### The last field here will create you upload template. You will still need to copy/paste the data from this spreadsheet into the BOLD template (Specimen_Data_Download_BOLD3.1.xls) but everything should be there in the correct order. Note: you will only have to paste the Sample ID field into the first tab, but make sure the order of this column continues to match while you're pasting in the data to the other tabs.

In [18]:
with pd.ExcelWriter('dataForBoldUpload.xlsx') as writer:
    voucherInfo_df.to_excel(writer, sheet_name='Voucher Info', index=False)
    taxonomy_df.to_excel(writer, sheet_name='Taxonomy', index=False)
    specimenDetails_df.to_excel(writer, sheet_name='Specimen Details', index=False)
    collectionData_df.to_excel(writer, sheet_name='Collection Data', index=False)