# Process GLTMP Guam coral disease data

In [2]:
#open pandas DF 
import pandas as pd
Guam_data = pd.read_csv("../Raw_Data/GLTMP_corals_2010-2019.csv")
print(Guam_data.columns)

Index(['SITE_ID', 'SITE', 'SITE_NAME', 'STATION', 'STATIONID', 'LATITUDE',
       'LONGITUDE', 'DATE_TIME', 'OBSERVERID', 'OBSERVER', 'WAVEEXPOSURE',
       'DEPTH', 'STRATUM', 'STATION_TYPE', 'CORALTRANSECTID',
       'TRANSECT_NUMBER', 'TRANSECT_LENGTH', 'CORALQUADRATID', 'TRANSECT_SIDE',
       'TRANSECT_LOCATION', 'QUADRAT_LENGTH', 'QUADRAT_WIDTH', 'APHIAID',
       'CORAL_CODE', 'SCIENTIFIC_NAME', 'GENUS', 'RANK', 'FUNCTIONAL_GROUP',
       'COLONY_LENGTH', 'COLONY_WIDTH', 'OLD_DEAD', 'RECENT_DEAD', 'DZ_CODE',
       'DISEASE', 'SEVERITY', 'DZ_CODE2', 'DISEASE2', 'SEVERITY2', 'PHOTO_YN',
       'COLLECTED_YN', 'FRAGMENT_YN', 'REMNANT_YN', 'COMMENT_'],
      dtype='object')


  Guam_data = pd.read_csv("../Raw_Data/GLTMP_corals_2010-2019.csv")


In [3]:
# isolating Genus for counting
Guam_data = Guam_data[["SCIENTIFIC_NAME","GENUS","DISEASE","DISEASE2"]]
Guam_data.columns = ["species", "genus","DISEASE","DISEASE2"]
Guam_data

Unnamed: 0,species,genus,DISEASE,DISEASE2
0,Astreopora listeri,Astreopora,,
1,Favia matthaii,Favia,,
2,Leptastrea purpurea,Leptastrea,,
3,Montipora spp,Montipora,Unknown,
4,Pavona varians,Pavona,,
...,...,...,...,...
22204,Porites spp - massive,Porites,,
22205,No coral colonies,NONE,,
22206,No coral colonies,NONE,,
22207,Pocillopora verrucosa,Pocillopora,,


In [4]:
# select Disease column and setting empty cells to healthy 
Guam_data['DISEASE'][Guam_data['DISEASE'].isnull()] = "Healthy"
Guam_data['DISEASE2'][Guam_data['DISEASE2'].isnull()] = "Healthy"
Guam_data

Unnamed: 0,species,genus,DISEASE,DISEASE2
0,Astreopora listeri,Astreopora,Healthy,Healthy
1,Favia matthaii,Favia,Healthy,Healthy
2,Leptastrea purpurea,Leptastrea,Healthy,Healthy
3,Montipora spp,Montipora,Unknown,Healthy
4,Pavona varians,Pavona,Healthy,Healthy
...,...,...,...,...
22204,Porites spp - massive,Porites,Healthy,Healthy
22205,No coral colonies,NONE,Healthy,Healthy
22206,No coral colonies,NONE,Healthy,Healthy
22207,Pocillopora verrucosa,Pocillopora,Healthy,Healthy


In [6]:
#Count of corals by genus and Disease 1 column 
Grouped_Guam_data = Guam_data.groupby(['species','DISEASE']).agg(Disease_count = ("DISEASE","count")).reset_index()
Grouped_Guam_data

Unnamed: 0,species,DISEASE,Disease_count
0,Acanthastrea cf. brevis,Healthy,4
1,Acanthastrea echinata,Healthy,44
2,Acanthastrea echinata,Unknown,3
3,Acanthastrea regularis,Healthy,5
4,Acanthastrea regularis,Paling,1
...,...,...,...
306,Stylocoeniella armata,Healthy,272
307,Stylocoeniella armata,Unknown,5
308,Stylophora mordax,Bleaching,1
309,Stylophora mordax,Healthy,27


In [7]:
# Count of corals by genus and Disease 2 column 
Grouped_Guam_data_2 = Guam_data.groupby(['species','DISEASE2']).agg(Disease2_count = ("DISEASE2","count")).reset_index()
#Grouped_Guam_data_2["DISEASE"]= Grouped_Guam_data_2["DISEASE2"]
Grouped_Guam_data_2

Unnamed: 0,species,DISEASE2,Disease2_count
0,Acanthastrea cf. brevis,Healthy,4
1,Acanthastrea echinata,Healthy,47
2,Acanthastrea regularis,Healthy,6
3,Acanthastrea spp,Healthy,5
4,Acropora abrotanoides,Healthy,3
...,...,...,...
125,Psammocora stellata,Healthy,5
126,Psammocora superficialis,Healthy,7
127,Scapophyllia cylindrica,Healthy,2
128,Stylocoeniella armata,Healthy,277


## Define a function to remove empty disease names

In [8]:
def filter_disease_names(disease_names):
    """ Returns disease names that has filtered out NaNs.
    
    disease_names - list of diseases 
    """
    valid_diseases = []
    for d in disease_names:
        try: 
            if not isnan(d):
                valid_diseases.append(d)
        except TypeError:
            # if we get a TypeError then d is not an NaN(could be a string or other object)
            valid_diseases.append(d)
    return valid_diseases
    

## Combine counts for corals with more than one disease

In [12]:
from numpy import isfinite
from math import isnan
#Combine DFs 
df =pd.merge(Grouped_Guam_data,Grouped_Guam_data_2,how='outer',left_on=['species','DISEASE'],right_on=['species','DISEASE2'])
unique_diseases = set(df["DISEASE"].unique())

unique_diseases2 = set(df["DISEASE2"].unique())

combined_diseases = unique_diseases.union(unique_diseases2)
combined_diseases = list(combined_diseases)

combined_diseases = filter_disease_names(combined_diseases)  

unique_genera = set(df["species"].unique())
unique_genera = list(unique_genera)
print(unique_genera)
print(combined_diseases)

disease_table = pd.DataFrame(0, index=unique_genera,columns=combined_diseases)
for genus in unique_genera:
    for disease in combined_diseases: 
        disease_row = df.loc[(df['species'] == genus) & (df['DISEASE'] == disease)]
        empty = bool(disease_row.empty)
        if not empty:
            count = disease_row['Disease_count']
            count = int(count)
            if disease != "Healthy":
                count2 = disease_row['Disease2_count']
                if isfinite(count2.all()):
                    try:
                        count2 = int(count2)
                        count += count2
                    except ValueError:
                        pass
            disease_table.loc[genus, disease] = count
disease_table.sort_index()

disease_table['n'] = disease_table.sum(axis=1)

disease_table['total_disease_count'] = disease_table['Sub-acute tissue loss'] +\
                                        disease_table['Ulcerative white spots disease'] +\
                                        disease_table['White syndrome'] +\
                                        disease_table['Black band disease']

disease_table['genus'] = disease_table.index.str.split().str[0]
disease_table['total_disease_percent'] = disease_table['total_disease_count']/(disease_table.sum(axis=1))

disease_table = disease_table[['genus','n','total_disease_percent','total_disease_count','Healthy','Alpheus burrow', 'Cyanobacteria', 'Ulcerative white spots disease',
       'Predation - unknown', 'Pigmentation response', 'Drupella', 'Bleaching',
       'Patchy bleaching', 'Crown of thorns seastar', 'Algal overgrowth',
       'Paling', 'Coralliophila', 'Algal infestation', 
       'Sediment on coral', 'Vermited worm infestation', 'Unknown',
       'Discoloration', 'Fish bites', 'Terpios overgrowth',
       'Black band disease', 'Urchin infestation', 'Sub-acute tissue loss',
       'White syndrome']]

disease_table = disease_table.rename(columns={'Healthy':'healthy_count','total_disease_count':'total_diseased_count'})

['Pavona divaricata', 'Astreopora listeri', 'Fungia scutaria', 'Montastrea spp.', 'Montipora spp', 'Porites australiensis', 'Stylophora mordax', 'Favia granulosa', 'Cyphastrea serailia', 'Echinophyllia echinata', 'Favia spp', 'Favites crassisepta', 'Favites russelli', 'Millepora platyphylla', 'Porites lichen', 'Porites mammalata', 'Hydnophora microconos', 'Leptastrea cf. immersa', 'Pavona decussata', 'Acropora latistella', 'Montipora foveolata', 'Pocillopora eydouxi', 'Porites lutea', 'Favia favus', 'Porites spp - submassive', 'Merulina ampliata', 'Stylocoeniella armata', 'Montipora hoffmeisteri', 'Acanthastrea cf. brevis', 'Montipora informis', 'Porites deformis', 'Mussid spp', 'Pocillopora meandrina', 'Leptastrea pruinosa', 'Montipora grisea', 'Cycloseris spp', 'Porites cylindrica', 'Fungia spp', 'Acropora spp', 'Diploastrea heliopora', 'Porites vaughani', 'Galaxea fascicularis', 'Montipora nodosa', 'Cyphastrea agassizi', 'Astreopora gracilis', 'Acanthastrea regularis', 'Cyphastrea s

  disease_table['total_disease_percent'] = disease_table['total_disease_count']/(disease_table.sum(axis=1))


In [13]:
disease_table = disease_table.rename_axis('species')
disease_table.to_csv("../Processed_Data/Guam_corals.csv")

## Summarize data at genus level

In [16]:
disease_table
final_column_names = ['genus','n','total_disease_percent','total_diseased_count','healthy_count','Alpheus burrow', 'Cyanobacteria', 'Ulcerative white spots disease',
       'Predation - unknown', 'Pigmentation response', 'Drupella', 'Bleaching',
       'Patchy bleaching', 'Crown of thorns seastar', 'Algal overgrowth',
       'Paling', 'Coralliophila', 'Algal infestation', 
       'Sediment on coral', 'Vermited worm infestation', 'Unknown',
       'Discoloration', 'Fish bites', 'Terpios overgrowth',
       'Black band disease', 'Urchin infestation', 'Sub-acute tissue loss',
       'White syndrome']

genus_table = disease_table.loc[:,final_column_names]
genus_table = genus_table.groupby('genus')['n','total_disease_percent','total_diseased_count','healthy_count','Alpheus burrow', 'Cyanobacteria', 'Ulcerative white spots disease',
       'Predation - unknown', 'Pigmentation response', 'Drupella', 'Bleaching',
       'Patchy bleaching', 'Crown of thorns seastar', 'Algal overgrowth',
       'Paling', 'Coralliophila', 'Algal infestation', 
       'Sediment on coral', 'Vermited worm infestation', 'Unknown',
       'Discoloration', 'Fish bites', 'Terpios overgrowth',
       'Black band disease', 'Urchin infestation', 'Sub-acute tissue loss',
       'White syndrome'].sum().reset_index()
genus_table['total_diseased_percent'] = genus_table['total_diseased_count']/(genus_table['healthy_count'] + genus_table['total_diseased_count'])
genus_table = genus_table.dropna()
genus_table.to_csv("../Processed_Data/per_genus_disease_data/Guam_corals.csv",index_label="genus")

genus_table

  genus_table = genus_table.groupby('genus')['n','total_disease_percent','total_diseased_count','healthy_count','Alpheus burrow', 'Cyanobacteria', 'Ulcerative white spots disease',


Unnamed: 0,genus,n,total_disease_percent,total_diseased_count,healthy_count,Alpheus burrow,Cyanobacteria,Ulcerative white spots disease,Predation - unknown,Pigmentation response,...,Vermited worm infestation,Unknown,Discoloration,Fish bites,Terpios overgrowth,Black band disease,Urchin infestation,Sub-acute tissue loss,White syndrome,total_diseased_percent
0,Acanthastrea,62,0.0,0,58,0,0,0,0,0,...,0,3,0,0,0,0,0,0,0,0.0
1,Acropora,97,0.0,0,86,0,0,0,0,0,...,0,7,0,0,0,0,0,0,0,0.0
2,Astreopora,386,0.0,0,367,0,0,0,0,0,...,0,9,2,0,0,0,0,0,0,0.0
3,Coscinaraea,1,0.0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.0
4,Cycloseris,1,0.0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.0
5,Cyphastrea,281,0.0,0,264,0,0,0,0,0,...,0,8,1,0,0,0,0,0,0,0.0
6,Diploastrea,9,0.0,0,9,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.0
7,Echinophyllia,4,0.0,0,4,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.0
8,Echinopora,2,0.0,0,2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0.0
9,Favia,1098,0.0,0,1019,0,0,0,0,1,...,0,61,0,0,0,0,0,0,0,0.0
