In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [6]:
# Coral bleaching database from https://zenodo.org/records/6780843

# Read csv file
df_coral = pd.read_csv('./data/Bleaching Database.csv')

# Filter quality control tags: all thermal bleaching events with known severity
df_coral = df_coral[df_coral['QC_CODE'].isin(['AAAA', 'AAA', 'AA'])]

# Focus on caribbean region
df_coral = df_coral[df_coral['OCEAN_REGION'] == 'Caribbean']

# Select relevant columns
df_coral = df_coral[['OCEAN_REGION', 'COUNTRY', 'LOCATION', 'LATITUDE', 'LONGITUDE', 'MONTH', 'YEAR', 'SEVERITY_CODE']]

# Drop na values
df_coral = df_coral.dropna()

df_coral.head()

  df_coral = pd.read_csv('./data/Bleaching Database.csv')


Unnamed: 0,OCEAN_REGION,COUNTRY,LOCATION,LATITUDE,LONGITUDE,MONTH,YEAR,SEVERITY_CODE
6,Caribbean,Netherlands Antilles,Bonaire,12.183333,-68.3,9.0,1979,2
76,Caribbean,Panama,San Blas (Kuna Yala),9.55,-78.866667,6.0,1983,3
78,Caribbean,Bahamas,Great Bahama Bank,25.0,-78.2,9.0,1983,3
113,Caribbean,Jamaica,Discovery Bay,18.4722,-77.4095,10.0,1986,2
131,Caribbean,United States,Florida,25.15,-80.283333,7.0,1987,3


In [7]:
# Add a column for datetime, assumes the day is the first of the month
df_coral['DAY'] = 1
df_coral['datetime'] = pd.to_datetime(df_coral[['YEAR', 'MONTH', 'DAY']])
df_coral['datetime'] = pd.to_datetime(df_coral['datetime'], format='%Y-%m')
df_coral['datetime']

6       1979-09-01
76      1983-06-01
78      1983-09-01
113     1986-10-01
131     1987-07-01
           ...    
37749   2017-12-01
37750   2017-12-01
37765   2017-12-01
37767   2017-12-01
37769   2017-12-01
Name: datetime, Length: 12565, dtype: datetime64[ns]

In [13]:
df_co2 = pd.read_csv('./data/CO2.csv')

# Filter relevant countries and years
df_co2 = df_co2[df_co2['Country'].isin(df_coral['COUNTRY'].unique())]
df_co2 = df_co2[df_co2['Year'].isin(df_coral['YEAR'].unique())]

# Select relevant columns
df_co2 = df_co2[['Country', 'Year', 'Total']]

df_co2 = df_co2[~df_co2['Country'].isin(['Mexico', 'Brazil', 'Venezuela', 'Puerto Rico'])]

df_co2 = df_co2.rename(columns={'Country':'COUNTRY', 'Year':'YEAR'})

In [14]:
# Merge CO2 and coral data
df_merged = df_coral.merge(df_co2, on=['COUNTRY', 'YEAR'], how='left')

# Drop Na values and select years past 1998
df_merged = df_merged.dropna()
df_merged = df_merged[df_merged['YEAR'] >= 1998]
df_merged.head(10)

Unnamed: 0,OCEAN_REGION,COUNTRY,LOCATION,LATITUDE,LONGITUDE,MONTH,YEAR,SEVERITY_CODE,DAY,datetime,Total
83,Caribbean,Bahamas,Gaulin's Reef,24.17,-74.48,6.0,1998,0,1,1998-06-01,1.989552
84,Caribbean,Bahamas,Gaulin's Reef,24.14,-74.45,6.0,1998,1,1,1998-06-01,1.989552
85,Caribbean,Bahamas,Snapshot Reef,24.04,-74.54,6.0,1998,1,1,1998-06-01,1.989552
86,Caribbean,Bahamas,Gaulin's Reef,24.15,-74.47,6.0,1998,1,1,1998-06-01,1.989552
87,Caribbean,Bahamas,Telephone Pole Reef,24.03,-74.54,6.0,1998,1,1,1998-06-01,1.989552
88,Caribbean,Bahamas,Gaulin's Reef,24.16,-74.48,6.0,1998,1,1,1998-06-01,1.989552
89,Caribbean,Bahamas,Gaulin's Reef,24.16,-74.46,6.0,1998,1,1,1998-06-01,1.989552
93,Caribbean,Bahamas,China Point (S10),24.75133,-77.80767,8.0,1998,2,1,1998-08-01,1.989552
94,Caribbean,Bahamas,South Staniard 2 (D7),24.84383,-77.85733,8.0,1998,2,1,1998-08-01,1.989552
95,Caribbean,Bahamas,Long Rock (D13),24.626,-77.691,8.0,1998,2,1,1998-08-01,1.989552


In [15]:
df_merged.to_csv('./data/Data.csv')