### MODIS (1080x2160)

In [None]:
import requests
import os
from bs4 import BeautifulSoup
import urllib.parse

def download_file(url, filename):
    response = requests.get(url, stream=True)
    response.raise_for_status()
    with open(filename, 'wb') as f:
        for chunk in response.iter_content(chunk_size=8192):
            f.write(chunk)

# Make sure 'data/' directory exists
if not os.path.exists('data/'):
    os.makedirs('data/')
    

# Download the data from MODIS (1080x2160):  http://orca.science.oregonstate.edu/1080.by.2160.monthly.hdf.sst.modis.php

modis_url = 'http://orca.science.oregonstate.edu/data/1x2/monthly/sst.modis.r2022/hdf/sst.m.{}.tar'

for year in range(2002, 2023):  # 2023 is not included
    url = modis_url.format(year)
    filename = f'data/sst.m.{year}.tar'
    download_file(url, filename)
    print(f"Downloaded: {filename}")


# Download the satellite data (1080x2160): http://orca.science.oregonstate.edu/1080.by.2160.monthly.hdf.vgpm.m.chl.m.sst.php

base_url = 'http://orca.science.oregonstate.edu/data/1x2/monthly/vgpm.r2022.m.chl.m.sst/hdf/vgpm.m.{}.tar'

for year in range(2002, 2023):  # 2023 is not included
    url = base_url.format(year)
    filename = f'data/vgpm.m.{year}.tar'
    download_file(url, filename)
    print(f"Downloaded: {filename}")

### SeaWIFS/AVHRR (1024x2048)

In [None]:
# Download the data from SeaWIFS/AVHRR (1024x2048): http://orca.science.oregonstate.edu/1080.by.2160.monthly.hdf.sst.avhrr.php

seawifs_url = 'http://orca.science.oregonstate.edu/data/1x2/monthly/sst.avhrr/hdf/sst.a.{}.tar'

for year in range(1997, 2002):  # 2003 is not included
    url = seawifs_url.format(year)
    filename = f'data/sst.a.{year}.tar'
    download_file(url, filename)
    print(f"Downloaded: {filename}")


# Download satellite data (1080x2160): http://orca.science.oregonstate.edu/1080.by.2160.monthly.hdf.vgpm.s.chl.a.sst.php

new_url = 'http://orca.science.oregonstate.edu/data/1x2/monthly/vgpm.r2014.s.chl.a.sst/hdf/vgpm.s.{}.tar'

for year in range(1997, 2002):  # 2023 is not included
    url = new_url.format(year)
    filename = f'data/vgpm.s.{year}.tar'
    download_file(url, filename)
    print(f"Downloaded: {filename}")

### Calculating MODIS Mean

In [None]:
import os
import tarfile
import gzip
import tempfile
!pip install pyhdf
from pyhdf.SD import *
import pandas as pd
import numpy as np

# Create a DataFrame to store the results
df = pd.DataFrame()

# Loop through each tar file
for year in range(2002, 2023):
    # Open the tar file
    tar_filename = f'data/sst.m.{year}.tar'
    with tarfile.open(tar_filename, "r:") as tar:
        # Loop through each member of the tar file
        for member in tar.getmembers():
            # If it's a .hdf.gz file
            if member.name.endswith('.hdf.gz'):
                # Open the .hdf.gz file
                f = tar.extractfile(member)
                with gzip.open(f, 'rb') as gz:
                    # Decompress the .hdf.gz file to a temporary file
                    with tempfile.NamedTemporaryFile() as tmp:
                        tmp.write(gz.read())
                        tmp.seek(0)  # Go back to the start of the file

                        # Open the temporary .hdf file
                        hdf_file = SD(tmp.name, SDC.READ)

                        # Access the 'sst' dataset
                        data = hdf_file.select('sst')[:]

                        # Take the mean along the time (2nd) dimension and reshape to a 1D array
                        mean_data = np.mean(data, axis=1).reshape(-1)

                        # Add this to the DataFrame
                        df[str(year)] = mean_data

                        # Close the file
                        hdf_file.end()

# Save the DataFrame to a CSV file
df.to_csv('sst_m_means.csv', index=False)

In [None]:
# Create a DataFrame to store the results
df = pd.DataFrame()

# Loop through each tar file
for year in range(2002, 2023):
    # Open the tar file
    tar_filename = f'data/sst.m.{year}.tar'
    with tarfile.open(tar_filename, "r:") as tar:
        # Loop through each member of the tar file
        for member in tar.getmembers():
            # If it's a .hdf.gz file
            if member.name.endswith('.hdf.gz'):
                # Open the .hdf.gz file
                f = tar.extractfile(member)
                with gzip.open(f, 'rb') as gz:
                    # Decompress the .hdf.gz file to a temporary file
                    with tempfile.NamedTemporaryFile() as tmp:
                        tmp.write(gz.read())
                        tmp.seek(0)  # Go back to the start of the file

                        # Open the temporary .hdf file
                        hdf_file = SD(tmp.name, SDC.READ)

                        # Access the 'sst' dataset
                        data = hdf_file.select('sst')

                        # Retrieve the latitude and longitude attributes
                        latitude_center = data.attributes()['Latitude Center']
                        longitude_center = data.attributes()['Longitude Center']

                        # Take the mean along the time (2nd) dimension
                        mean_data = np.mean(data[:], axis=1).reshape(-1)

                        # Add this to the DataFrame
                        df_temp = pd.DataFrame({
                            'year': [year] * len(mean_data),
                            'latitude': [latitude_center] * len(mean_data),
                            'longitude': [longitude_center] * len(mean_data),
                            'sst': mean_data
                        })
                        df = pd.concat([df, df_temp])

                        # Close the file
                        hdf_file.end()

# Save the DataFrame to a CSV file
df.to_csv('sst_m_means_new.csv', index=False)


### Calculating SeaWIFS/AVHRR Mean

In [None]:
# Waiting to resolve merge issue

import os
import tarfile
import gzip
import tempfile
!pip install pyhdf
from pyhdf.SD import *
import pandas as pd
import numpy as np

# Create a DataFrame to store the results
df = pd.DataFrame()

# Loop through each tar file
for year in range(1997, 2002):
    # Open the tar file
    tar_filename = f'data/sst.a.{year}.tar'
    with tarfile.open(tar_filename, "r:") as tar:
        # Loop through each member of the tar file
        for member in tar.getmembers():
            # If it's a .hdf.gz file
            if member.name.endswith('.hdf.gz'):
                # Open the .hdf.gz file
                f = tar.extractfile(member)
                with gzip.open(f, 'rb') as gz:
                    # Decompress the .hdf.gz file to a temporary file
                    with tempfile.NamedTemporaryFile() as tmp:
                        tmp.write(gz.read())
                        tmp.seek(0)  # Go back to the start of the file

                        # Open the temporary .hdf file
                        hdf_file = SD(tmp.name, SDC.READ)

                        # Access the 'sst' dataset
                        data = hdf_file.select('sst')[:]

                        # Take the mean along the time (2nd) dimension and reshape to a 1D array
                        mean_data = np.mean(data, axis=1).reshape(-1)

                        # Add this to the DataFrame
                        df[str(year)] = mean_data

                        # Close the file
                        hdf_file.end()

# Save the DataFrame to a CSV file
df.to_csv('sst_a_means.csv', index=False)

In [None]:
# Create a DataFrame to store the results
df = pd.DataFrame()

# Loop through each tar file
for year in range(1997, 2002):
    # Open the tar file
    tar_filename = f'data/sst.a.{year}.tar'
    with tarfile.open(tar_filename, "r:") as tar:
        # Loop through each member of the tar file
        for member in tar.getmembers():
            # If it's a .hdf.gz file
            if member.name.endswith('.hdf.gz'):
                # Open the .hdf.gz file
                f = tar.extractfile(member)
                with gzip.open(f, 'rb') as gz:
                    # Decompress the .hdf.gz file to a temporary file
                    with tempfile.NamedTemporaryFile() as tmp:
                        tmp.write(gz.read())
                        tmp.seek(0)  # Go back to the start of the file

                        # Open the temporary .hdf file
                        hdf_file = SD(tmp.name, SDC.READ)

                        # Access the 'sst' dataset
                        data = hdf_file.select('sst')

                        # Retrieve the latitude and longitude attributes
                        latitude_center = data.attributes()['Latitude Center']
                        longitude_center = data.attributes()['Longitude Center']

                        # Take the mean along the time (2nd) dimension
                        mean_data = np.mean(data[:], axis=1).reshape(-1)

                        # Add this to the DataFrame
                        df_temp = pd.DataFrame({
                            'year': [year] * len(mean_data),
                            'latitude': [latitude_center] * len(mean_data),
                            'longitude': [longitude_center] * len(mean_data),
                            'sst': mean_data
                        })
                        df = pd.concat([df, df_temp])

                        # Close the file
                        hdf_file.end()

# Save the DataFrame to a CSV file
df.to_csv('sst_a_means_new.csv', index=False)