In [1]:
import pandas as pd
import numpy as np
from IPython.core.display import HTML, display

- Import the three data sets from FRRP, HICORDIS, and Lamb et al.
- Drop unnecessary columns from species name mapping file
- Trim Lamb et al. dataset down to only diseases
- Drop clade column from FRRP dataset

In [2]:
frrp = pd.read_csv('../input/datasets_renamed/FRRP_renamed.csv')
hicordis = pd.read_csv('../input/datasets_renamed/HICORDIS_renamed.csv')
lamb = pd.read_csv('../input/datasets_renamed/Lamb_et_al_renamed.csv')
spp_name_mapping = pd.read_csv('../input/species_name_mapping.csv')
spp_dict = spp_name_mapping.drop(columns="Specificity")
spp_dict = spp_dict.set_index("Species")['Huang_Roy_tree_name'].to_dict()
lamb = lamb[['Genus','TotalCorals','Total_diseased_Lamb_et_al','Total_compromised_or_bleached_Lamb_et_al','White_syndrome_Lamb_et_al','Black_band_disease_Lamb_et_al','Brown_band_Lamb_et_al','Skeletal_eroding_band_Lamb_et_al','Growth_anomalies_Lamb_et_al']]
frrp.drop(columns="Clade", inplace=True)

- Replace all species names with names found in Huang Roy tree
- Rename Genus column to Species in Lamb et al. dataset

In [3]:
frrp.replace({'Species': spp_dict}, inplace=True)
hicordis.replace({'Species': spp_dict}, inplace=True)
lamb.replace({'Genus': spp_dict}, inplace=True)
lamb.rename(columns={'Genus': 'Species'}, inplace=True)

Set the Species column of each dataset as the index

In [4]:
frrp.set_index("Species", inplace=True)
hicordis.set_index("Species", inplace=True)
lamb.set_index("Species", inplace=True)

Aggregate all species data into one line for each species/genus

In [5]:
frrp = frrp.groupby('Species').agg('sum')
hicordis = hicordis.groupby('Species').agg('mean')
lamb = lamb.groupby('Species').agg('sum')

Divide the number of each disease by the total number of corals to get prevalence rate for each species in the Lamb et al. data, and then drop the TotalCorals column afterward.

In [6]:
total = lamb.TotalCorals
for col in lamb.columns:
    lamb[col] = lamb[col]/total
lamb.drop(columns="TotalCorals", inplace=True)

- First combine the FRRP and HICRODIS datasets
- Second, combine those two datasets with the Lamb et al. data.

In [7]:
f_h_combo = pd.merge(frrp, hicordis, how="outer", left_index = True, right_index = True, indicator=False)
f_h_l_combo = pd.merge(f_h_combo, lamb, how="outer", left_index = True, right_index = True, indicator=False)
fhl_html = f_h_l_combo.to_html()
HTML(fhl_html)

Unnamed: 0_level_0,Unknown_disease_FRRP,Black_band_disease_FRRP,Dark_spots_FRRP,Red_band_FRRP,White_band_FRRP,White_plague_FRRP,White_spot_FRRP,Yellow_band_FRRP,Algal_infection_HICORDIS,Algal_overgrowth_HICORDIS,Black_band_disease_HICORDIS,Bleaching_HICORDIS,Ciliates_HICORDIS,Corallophila_huysmansii_HICORDIS,Cyanobacteria_HICORDIS,Discoloration_HICORDIS,Endolithic_fungal_infection_HICORDIS,Endolithic_hypermycosis_HICORDIS,Gear_entanglement_HICORDIS,Growth_anomalies_HICORDIS,Macroalgal_overgrowth_HICORDIS,No_Disease_HICORDIS,Pigmentation_response_HICORDIS,Predation_HICORDIS,Recently_denuded_skeleton_HICORDIS,Swollen_patches_HICORDIS,Tissue_loss_HICORDIS,Trematodiasis_HICORDIS,White_syndrome_HICORDIS,Total_diseased_Lamb_et_al,Total_compromised_or_bleached_Lamb_et_al,White_syndrome_Lamb_et_al,Black_band_disease_Lamb_et_al,Brown_band_Lamb_et_al,Skeletal_eroding_band_Lamb_et_al,Growth_anomalies_Lamb_et_al
Species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1
ACR_Acropora,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.913043,0.043478,0.043478,0.0,0.0,0.0,0.0,0.0,0.032284,0.108322,0.00498,0.006148,0.005461,0.014493,0.00103
ACR_Acropora_cervicornis,0.948,0.0,0.0,0.0,2.212,0.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,
ACR_Acropora_cytherea,,,,,,,,,0.0,0.016073,0.0,0.035017,0.0,0.0,0.001722,0.003444,0.0,0.0,0.0,0.013777,0.001148,0.897245,0.001722,0.001722,0.001148,0.0,0.023536,0.0,0.003444,,,,,,,
ACR_Acropora_humilis,,,,,,,,,0.0,0.0,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.833333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,
ACR_Acropora_nasuta,,,,,,,,,0.0,0.428571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.571429,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,
ACR_Acropora_palmata,0.0,0.0,0.0,0.0,5.479,0.0,5.479,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,
ACR_Acropora_valida,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,,,,,
ACR_Anacropora,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.3,0.0,0.0,0.0,0.0,0.0
ACR_Astreopora,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.001681,0.194958,0.0,0.0,0.0,0.001681,0.0
ACR_Montipora,,,,,,,,,0.0,0.0,0.0,0.323571,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.025,0.0,0.604286,0.0,0.0,0.0,0.0,0.047143,0.0,0.0,0.026756,0.221129,0.008631,0.005351,0.001208,0.009322,0.00069


Write table to output folder as a tab seperated values file.

In [8]:
f_h_l_combo.to_csv('../output/merged_disease_table.tsv', sep="\t")