#This is a notebook AK used to document how demographic data was pulled from from CMS001_CMS002_MergedAnnotations_190325.csv file to generate Table S1 for CMS manuscript

#Set up python tools for pandas processing, read in data file, and check with head.

In [1]:
import pandas as pd
import numpy as np

In [2]:
AllMosData = pd.read_csv('CMS001_CMS002_MergedAnnotations_190325.csv', delimiter=',')

In [3]:
AllMosData.head()

Unnamed: 0,NewIDseqName,OldIDseqName,czbiohub-mosquito_sequences_id,raw_sequence_run_directory,host,sample_type,nucleotide_type,collection_date,collection_lat,collection_long,...,blood_fed,sample_unit,known_organism,detection_method,library_prep,sequencer,rna_dna_input_ng,library_prep_batch,extraction_batch,Notes:
0,CMS001_001_Ra_S1,CMS_001_RNA_A_S1,CMS_001_RNA_A_S1,czbiohub-seqbot/fastqs/180323_NB501961_0089_AH...,mosquito,whole,RNA,2017-10-03T00:00:00Z,37.55697,-122.07938,...,blood fed,1,,,NEBNext Ultra II Directional RNA Library Prep Kit,NextSeq,27.3,,,
1,CMS001_002_Ra_S1,CMS_002_RNA_A_S1,CMS_002_RNA_A_S1,czbiohub-seqbot/fastqs/180412_NB501961_0097_AH...,mosquito,whole,RNA,2017-09-21T00:00:00Z,37.55697,-122.07938,...,blood fed,1,,,NEBNext Ultra II Directional RNA Library Prep Kit,NextSeq,59.8,,,
2,CMS001_003_Ra_S2,CMS_003_RNA_A_S2,CMS_003_RNA_A_S2,czbiohub-seqbot/fastqs/180323_NB501961_0089_AH...,mosquito,whole,RNA,2017-09-19T00:00:00Z,37.55697,-122.07938,...,blood fed,1,,,NEBNext Ultra II Directional RNA Library Prep Kit,NextSeq,88.9,,,
3,CMS001_004_Ra_S2,CMS_004_RNA_A_S2,CMS_004_RNA_A_S2,czbiohub-seqbot/fastqs/180412_NB501961_0097_AH...,mosquito,whole,RNA,2017-09-12T00:00:00Z,37.7152,-122.19431,...,blood fed,1,,,NEBNext Ultra II Directional RNA Library Prep Kit,NextSeq,88.4,,,
4,CMS001_005_Ra_S3,CMS_005_RNA_A_S3,CMS_005_RNA_A_S3,czbiohub-seqbot/fastqs/180323_NB501961_0089_AH...,mosquito,whole,RNA,2017-09-29T00:00:00Z,37.78886,-122.29903,...,blood fed,1,,,NEBNext Ultra II Directional RNA Library Prep Kit,NextSeq,240.7,,,


#Print column names to more easily view all column headers.

In [4]:
print(AllMosData.columns.values)

['NewIDseqName' 'OldIDseqName' 'czbiohub-mosquito_sequences_id'
 'raw_sequence_run_directory' 'host' 'sample_type' 'nucleotide_type'
 'collection_date' 'collection_lat' 'collection_long' 'Habitat'
 'Population' 'collected_by' 'preservation_method' 'sex' 'life_stage'
 'visual_genus' 'visual_species' 'compute_genus' 'compute_species'
 'blood_fed' 'sample_unit' 'known_organism' 'detection_method'
 'library_prep' 'sequencer' 'rna_dna_input_ng' 'library_prep_batch'
 'extraction_batch' 'Notes:']


#Select subset of data relevant for Table S1.

In [5]:
AllMosData[['NewIDseqName','visual_genus','visual_species','compute_genus','compute_species','sex','blood_fed','collected_by','collection_date','collection_lat','collection_long']]

Unnamed: 0,NewIDseqName,visual_genus,visual_species,compute_genus,compute_species,sex,blood_fed,collected_by,collection_date,collection_lat,collection_long
0,CMS001_001_Ra_S1,Culex,erythrothorax,Culex,erythrothorax,female,blood fed,ALCO,2017-10-03T00:00:00Z,37.556970,-122.079380
1,CMS001_002_Ra_S1,Culiseta,particeps,Culiseta,particeps,female,blood fed,ALCO,2017-09-21T00:00:00Z,37.556970,-122.079380
2,CMS001_003_Ra_S2,Culex,pipiens,Culex,pipiens,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380
3,CMS001_004_Ra_S2,Culiseta,incidens,Culiseta,incidens,female,blood fed,ALCO,2017-09-12T00:00:00Z,37.715200,-122.194310
4,CMS001_005_Ra_S3,Culex,erythrothorax,Culex,erythrothorax,female,blood fed,ALCO,2017-09-29T00:00:00Z,37.788860,-122.299030
5,CMS001_006_Ra_S5,Culex,erythrothorax,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380
6,CMS001_007_Ra_S12,Culiseta,particeps,Culiseta,particeps,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380
7,CMS001_008_Ra_S3,Culiseta,particeps,Culiseta,particeps,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380
8,CMS001_009_Ra_S13,Culex,erythrothorax,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380
9,CMS001_010_Ra_S1,Culex,erythrothorax,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380


#Surprising number of NaN for sex field. Use count to investigate. 

In [6]:
AllMosData.groupby('sex').count()

Unnamed: 0_level_0,NewIDseqName,OldIDseqName,czbiohub-mosquito_sequences_id,raw_sequence_run_directory,host,sample_type,nucleotide_type,collection_date,collection_lat,collection_long,...,blood_fed,sample_unit,known_organism,detection_method,library_prep,sequencer,rna_dna_input_ng,library_prep_batch,extraction_batch,Notes:
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
female,91,91,91,91,91,91,91,89,89,89,...,91,91,0,0,91,91,91,0,31,22
male,4,4,4,4,4,4,4,4,4,4,...,4,4,0,0,4,4,3,0,4,4


#These numbers match "female" and "male" grep I did as a sanity check on source data file - quite a few samples from SoCal where no sex was reported. 
##Action item? Identify a sex-specific gene (or set of genes), test if they can be recovered in ground truth males & females, then use this on unsexed samples? 

In [None]:
#Next, checked on Habitat features per discussion in 4/30/19 meeting - there's quite a bit of diversity there!

In [7]:
AllMosData.groupby('Habitat').count()[['collected_by']]

Unnamed: 0_level_0,collected_by
Habitat,Unnamed: 1_level_1
Industrial (waste water treatment plant),4
Agriculture,1
Agriculture & Residential,1
Agriculture; Marsh; Undeveloped,5
Commercial/Industrial/Transportation,2
Commerical/Industrial/Transportation,5
Golf Course,4
Marsh,11
Marsh / Wildlife Refuge / Park,32
Marsh/Swamp,15


#Next, checked on discrepancies between computed species calls and visual calls.

In [8]:
AllMosData['visual_equals_compute'] = AllMosData['visual_species'] == AllMosData['compute_species']

In [9]:
AllMosData.groupby('visual_equals_compute').count()

Unnamed: 0_level_0,NewIDseqName,OldIDseqName,czbiohub-mosquito_sequences_id,raw_sequence_run_directory,host,sample_type,nucleotide_type,collection_date,collection_lat,collection_long,...,blood_fed,sample_unit,known_organism,detection_method,library_prep,sequencer,rna_dna_input_ng,library_prep_batch,extraction_batch,Notes:
visual_equals_compute,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
False,17,17,17,17,5,12,17,3,3,3,...,4,17,0,0,17,17,4,0,8,0
True,144,144,144,144,144,144,144,143,143,143,...,144,144,0,0,144,144,135,0,88,75


#Quick print all column names again.

In [10]:
print(AllMosData.columns.values)

['NewIDseqName' 'OldIDseqName' 'czbiohub-mosquito_sequences_id'
 'raw_sequence_run_directory' 'host' 'sample_type' 'nucleotide_type'
 'collection_date' 'collection_lat' 'collection_long' 'Habitat'
 'Population' 'collected_by' 'preservation_method' 'sex' 'life_stage'
 'visual_genus' 'visual_species' 'compute_genus' 'compute_species'
 'blood_fed' 'sample_unit' 'known_organism' 'detection_method'
 'library_prep' 'sequencer' 'rna_dna_input_ng' 'library_prep_batch'
 'extraction_batch' 'Notes:' 'visual_equals_compute']


#Sort dataframe on 'visual_equals_compute' columns and head to get a look at the 17 discordant samples.

In [11]:
AllMosData.sort_values("visual_equals_compute")[['NewIDseqName', 'visual_equals_compute','visual_genus','visual_species','compute_genus','compute_species']].head(20)

Unnamed: 0,NewIDseqName,visual_equals_compute,visual_genus,visual_species,compute_genus,compute_species
160,CMS002_Water8_Rb_S11_L004,False,,,,
62,CMS001_water3_Qiagen_S26,False,,,,
24,CMS001_025_Ra_S7,False,Culex,tarsalis,Culiseta,incidens
23,CMS001_024_Ra_S15,False,Culex,tarsalis,Culiseta,particeps
44,CMS001_045_Ra_S2,False,Culex,tarsalis,Culex,erythrothorax
61,CMS001_water2_S24,False,,,,
60,CMS001_water1_S11,False,,,,
57,CMS001_058_Ra_S9,False,,,Culex,erythrothorax
63,CMS001_water4_Zymo_S27,False,,,,
64,CMS001_water5_RNA_A_S12,False,,,,


#Note: The discordant samples correspond primarily to water controls because they have no calls for either visual or compute fields. There are also 4 mosquito specimens where computed and visual calls are different: CMS001_025, CMS001_024, CMS001_045, and CMS001_058. Note that CMS001_058 was also discordant, but this was because no visual call was provided for this specimen. The other mosquito specimens with a discordant call were likely visual mis-calls or mis-annotations. Based on this, 

##Action items: 1) I'm going to use compute_genus and compute_species calls for TableS1 genus and species calls. 2) Review and confirm overall kmer results & specific discordant results with JB. 3) Should probably highlight these 4 samples with discordant visual and compute calls. 4) Develop and run directed analysis of a transcript (or a set of transcripts) to provide conventional ground truth data to validate [Reviewers will ask for this]. 



In [12]:
print(AllMosData.columns.values)

['NewIDseqName' 'OldIDseqName' 'czbiohub-mosquito_sequences_id'
 'raw_sequence_run_directory' 'host' 'sample_type' 'nucleotide_type'
 'collection_date' 'collection_lat' 'collection_long' 'Habitat'
 'Population' 'collected_by' 'preservation_method' 'sex' 'life_stage'
 'visual_genus' 'visual_species' 'compute_genus' 'compute_species'
 'blood_fed' 'sample_unit' 'known_organism' 'detection_method'
 'library_prep' 'sequencer' 'rna_dna_input_ng' 'library_prep_batch'
 'extraction_batch' 'Notes:' 'visual_equals_compute']


#Generating new dataframe "TableS1data" with subset of relevant data for TableS1.

In [26]:
TableS1data = AllMosData.loc[:,('NewIDseqName','compute_genus','compute_species','sex','blood_fed','collected_by','collection_date','collection_lat','collection_long','Habitat')]

In [27]:
TableS1data

Unnamed: 0,NewIDseqName,compute_genus,compute_species,sex,blood_fed,collected_by,collection_date,collection_lat,collection_long,Habitat
0,CMS001_001_Ra_S1,Culex,erythrothorax,female,blood fed,ALCO,2017-10-03T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
1,CMS001_002_Ra_S1,Culiseta,particeps,female,blood fed,ALCO,2017-09-21T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
2,CMS001_003_Ra_S2,Culex,pipiens,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
3,CMS001_004_Ra_S2,Culiseta,incidens,female,blood fed,ALCO,2017-09-12T00:00:00Z,37.715200,-122.194310,Industrial (waste water treatment plant)
4,CMS001_005_Ra_S3,Culex,erythrothorax,female,blood fed,ALCO,2017-09-29T00:00:00Z,37.788860,-122.299030,Residential
5,CMS001_006_Ra_S5,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
6,CMS001_007_Ra_S12,Culiseta,particeps,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
7,CMS001_008_Ra_S3,Culiseta,particeps,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
8,CMS001_009_Ra_S13,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
9,CMS001_010_Ra_S1,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park


In [28]:
TableS1data.sort_values(by = ['collected_by','compute_genus','compute_species'])

Unnamed: 0,NewIDseqName,compute_genus,compute_species,sex,blood_fed,collected_by,collection_date,collection_lat,collection_long,Habitat
10,CMS001_011_Ra_S4,Aedes,dorsalis,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
0,CMS001_001_Ra_S1,Culex,erythrothorax,female,blood fed,ALCO,2017-10-03T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
4,CMS001_005_Ra_S3,Culex,erythrothorax,female,blood fed,ALCO,2017-09-29T00:00:00Z,37.788860,-122.299030,Residential
5,CMS001_006_Ra_S5,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
8,CMS001_009_Ra_S13,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
9,CMS001_010_Ra_S1,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
11,CMS001_012_Ra_S4,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
13,CMS001_014_Ra_S5,Culex,erythrothorax,female,blood fed,ALCO,2017-10-06T00:00:00Z,37.581910,-122.048380,Marsh
15,CMS001_016_Ra_S6,Culex,erythrothorax,female,blood fed,ALCO,2017-10-06T00:00:00Z,37.581910,-122.048460,Marsh
16,CMS001_017_Ra_S6,Culex,erythrothorax,female,blood fed,ALCO,2017-10-06T00:00:00Z,37.581910,-122.048380,Marsh


#Strip unnecessary suffix string from 'NewIDseqName' values

In [29]:
TableS1data.replace(r'\_Ra.*', '', regex=True)

Unnamed: 0,NewIDseqName,compute_genus,compute_species,sex,blood_fed,collected_by,collection_date,collection_lat,collection_long,Habitat
0,CMS001_001,Culex,erythrothorax,female,blood fed,ALCO,2017-10-03T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
1,CMS001_002,Culiseta,particeps,female,blood fed,ALCO,2017-09-21T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
2,CMS001_003,Culex,pipiens,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
3,CMS001_004,Culiseta,incidens,female,blood fed,ALCO,2017-09-12T00:00:00Z,37.715200,-122.194310,Industrial (waste water treatment plant)
4,CMS001_005,Culex,erythrothorax,female,blood fed,ALCO,2017-09-29T00:00:00Z,37.788860,-122.299030,Residential
5,CMS001_006,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
6,CMS001_007,Culiseta,particeps,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
7,CMS001_008,Culiseta,particeps,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
8,CMS001_009,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
9,CMS001_010,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park


#Strip unnecessary time suffix string from 'collection_date' values

In [30]:
TableS1data.replace(r'T00.*', '', regex=True)

Unnamed: 0,NewIDseqName,compute_genus,compute_species,sex,blood_fed,collected_by,collection_date,collection_lat,collection_long,Habitat
0,CMS001_001_Ra_S1,Culex,erythrothorax,female,blood fed,ALCO,2017-10-03,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
1,CMS001_002_Ra_S1,Culiseta,particeps,female,blood fed,ALCO,2017-09-21,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
2,CMS001_003_Ra_S2,Culex,pipiens,female,blood fed,ALCO,2017-09-19,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
3,CMS001_004_Ra_S2,Culiseta,incidens,female,blood fed,ALCO,2017-09-12,37.715200,-122.194310,Industrial (waste water treatment plant)
4,CMS001_005_Ra_S3,Culex,erythrothorax,female,blood fed,ALCO,2017-09-29,37.788860,-122.299030,Residential
5,CMS001_006_Ra_S5,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
6,CMS001_007_Ra_S12,Culiseta,particeps,female,blood fed,ALCO,2017-09-19,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
7,CMS001_008_Ra_S3,Culiseta,particeps,female,blood fed,ALCO,2017-09-19,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
8,CMS001_009_Ra_S13,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
9,CMS001_010_Ra_S1,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19,37.556970,-122.079380,Marsh / Wildlife Refuge / Park


#Tidying up dataframe: renaming columns

In [38]:
TableS1data.rename(columns={'NewIDseqName':'Specimen ID','compute_genus':'Genus','compute_species':'Species','sex':'Sex','blood_fed':'Status','collected_by':'Site','collection_date':'Collection Date','collection_lat':'Latitude','collection_long':'Longitude'}, inplace=True)

In [40]:
TableS1data[['Specimen ID','Genus','Species','Sex','Status','Site','Collection Date','Latitude','Longitude','Habitat']]

Unnamed: 0,Specimen ID,Genus,Species,Sex,Status,Site,Collection Date,Latitude,Longitude,Habitat
0,CMS001_001_Ra_S1,Culex,erythrothorax,female,blood fed,ALCO,2017-10-03T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
1,CMS001_002_Ra_S1,Culiseta,particeps,female,blood fed,ALCO,2017-09-21T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
2,CMS001_003_Ra_S2,Culex,pipiens,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
3,CMS001_004_Ra_S2,Culiseta,incidens,female,blood fed,ALCO,2017-09-12T00:00:00Z,37.715200,-122.194310,Industrial (waste water treatment plant)
4,CMS001_005_Ra_S3,Culex,erythrothorax,female,blood fed,ALCO,2017-09-29T00:00:00Z,37.788860,-122.299030,Residential
5,CMS001_006_Ra_S5,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
6,CMS001_007_Ra_S12,Culiseta,particeps,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
7,CMS001_008_Ra_S3,Culiseta,particeps,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
8,CMS001_009_Ra_S13,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
9,CMS001_010_Ra_S1,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park


In [41]:
TableS1=TableS1data[['Specimen ID','Genus','Species','Sex','Status','Site','Collection Date','Latitude','Longitude','Habitat']]

In [42]:
TableS1

Unnamed: 0,Specimen ID,Genus,Species,Sex,Status,Site,Collection Date,Latitude,Longitude,Habitat
0,CMS001_001_Ra_S1,Culex,erythrothorax,female,blood fed,ALCO,2017-10-03T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
1,CMS001_002_Ra_S1,Culiseta,particeps,female,blood fed,ALCO,2017-09-21T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
2,CMS001_003_Ra_S2,Culex,pipiens,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
3,CMS001_004_Ra_S2,Culiseta,incidens,female,blood fed,ALCO,2017-09-12T00:00:00Z,37.715200,-122.194310,Industrial (waste water treatment plant)
4,CMS001_005_Ra_S3,Culex,erythrothorax,female,blood fed,ALCO,2017-09-29T00:00:00Z,37.788860,-122.299030,Residential
5,CMS001_006_Ra_S5,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
6,CMS001_007_Ra_S12,Culiseta,particeps,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
7,CMS001_008_Ra_S3,Culiseta,particeps,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
8,CMS001_009_Ra_S13,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park
9,CMS001_010_Ra_S1,Culex,erythrothorax,female,blood fed,ALCO,2017-09-19T00:00:00Z,37.556970,-122.079380,Marsh / Wildlife Refuge / Park


In [44]:
TableS1.to_csv('TableS1_CMS.csv', index=False, header=True)