In [None]:
!pip install requests xarray pandas pyjstat datetime pydap netCDF4
import os
import sqlite3
import pandas as pd
import requests
from pyjstat import pyjstat
from urllib.parse import quote
from io import StringIO
from IPython.display import display
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker


!apt-get update && apt-get install -y iputils-ping

In [None]:
# Create a SQLite database
db_path = 'greenland_fishery.db'
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
print(f"Connected to SQLite database at {db_path}")

In [None]:
# LOAD VAR 1 TOTAL CATCH

df_clean = None

cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='total_catch'")
if cursor.fetchone():
    print("Loading Total Catch from SQLite database")
    df_clean = pd.read_sql_query("SELECT * FROM total_catch", conn)
    print("Loaded DataFrame from SQLite:")
    # Validate the loaded data
    if df_clean['Quarter'].isnull().any():
        print("Warning: Found NaN values in Quarter column. Dropping and recreating table...")
        cursor.execute("DROP TABLE total_catch")
        conn.commit()
    else:
        print("Loaded data is valid.")
else:
    print("total_catch table not found, querying API and creating table...")
    url = "https://bank.stat.gl:443/api/v1/en/Greenland/FI/FI10/FIX008.px"
    query = {
        "query": [
            {"code": "nation", "selection": {"filter": "item", "values": ["GRL"]}},
            {"code": "unit", "selection": {"filter": "item", "values": ["Ton"]}},
            {"code": "time", "selection": {"filter": "item", "values": [str(y) for y in range(2011, 2025)]}},
            {"code": "quarter", "selection": {"filter": "item", "values": ["1", "2", "3", "4"]}}
        ],
        "response": {"format": "json-stat2"}
    }
    response = requests.post(url, json=query)

    if response.status_code == 200:
        dataset = pyjstat.Dataset.read(response.text)
        df = dataset.write('dataframe')
        print("Data successfully retrieved and converted to DataFrame!")

        # Clean DataFrame
        df_clean = df.copy()
        df_clean.drop(columns=['nation'], inplace=True)
        df_clean.rename(columns={
            "time": "Year",
            "quarter": "Quarter",
            "unit": "Unit",
            "value": "Total_Catch"
        }, inplace=True)
        df_clean["Quarter"] = df_clean["Quarter"].str.replace("Quarter ", "Q")
        quarter_order = ["Q1", "Q2", "Q3", "Q4"]
        df_clean["Quarter"] = pd.Categorical(df_clean["Quarter"], categories=quarter_order, ordered=True)
        df_clean = df_clean[["Year", "Quarter", "Unit", "Total_Catch"]]
        df_clean["Year"] = df_clean["Year"].astype(int)

        # Validate before saving to SQLite
        if df_clean.isnull().any().any():
            raise ValueError("DataFrame contains NaN values before saving to SQLite: " + str(df_clean.head()))

        # Create table
        cursor.execute("""
            CREATE TABLE total_catch (
                Year INTEGER,
                Quarter TEXT,
                Unit TEXT,
                Total_Catch INTEGER
            )
        """)

        # Insert data into table
        df_clean.to_sql('total_catch', conn, if_exists='append', index=False)
        conn.commit()
        print("Saved Total Catch to SQLite table 'total_catch'")
    else:
        print(f"Error {response.status_code}: {response.text}")

# Final display
print("Final Total Catch DataFrame:")
display(df_clean.head())

In [None]:
### Variable 2: Exports of Fish

df_fish_clean = None

# Check if fish_exports table exists
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='fish_exports'")
if cursor.fetchone():
    print("Loading Fish Exports from SQLite database")
    df_fish_clean = pd.read_sql_query("SELECT * FROM fish_exports", conn)
    print("Loaded DataFrame from SQLite:")
    if df_fish_clean['Quarter'].isnull().any():
        print("Warning: Found NaN values in Quarter column. Dropping and recreating table...")
        cursor.execute("DROP TABLE fish_exports")
        conn.commit()
        df_fish_clean = None
    else:
        print("Loaded data is valid.")
else:
    print("fish_exports table not found, querying API and creating table...")
    url = "https://bank.stat.gl:443/api/v1/en/Greenland/IE/IE10/IEX2PROD.px"
    query = {
        "query": [
            {"code": "branch", "selection": {"filter": "item", "values": ["46"]}},
            {"code": "quarter", "selection": {"filter": "item", "values": ["1", "2", "3", "4"]}},
            {"code": "time", "selection": {"filter": "item", "values": [str(y) for y in range(2011, 2025)]}}
        ],
        "response": {"format": "json-stat2"}
    }
    try:
        response = requests.post(url, json=query)
        response.raise_for_status()
        print("API Response:")
        print(response.text[:500])

        dataset = pyjstat.Dataset.read(response.text)
        df_fish_exports = dataset.write('dataframe')
        print("Fish export data successfully retrieved!")

        # Clean DataFrame
        df_fish_clean = df_fish_exports.copy()
        column_mapping = {
            "time": "Year",
            "quarter": "Quarter",
            "value": "Fish_Export_Value_Million_Kr"
        }
        if "quarter" not in df_fish_exports.columns and "Quarter" in df_fish_exports.columns:
            column_mapping["Quarter"] = "Quarter"
            del column_mapping["quarter"]
        df_fish_clean.rename(columns=column_mapping, inplace=True)

        if "Quarter" not in df_fish_clean.columns:
            raise ValueError("Quarter column missing after renaming.")
        if df_fish_clean["Quarter"].isnull().any():
            raise ValueError("Quarter column contains NaN values after renaming: " + str(df_fish_clean["Quarter"].head()))

        # Fix quarters (case-insensitive replacement)
        df_fish_clean["Quarter"] = df_fish_clean["Quarter"].str.replace(r"[Qq]uarter ", "Q", regex=True)
        quarter_order = ["Q1", "Q2", "Q3", "Q4"]
        df_fish_clean["Quarter"] = pd.Categorical(df_fish_clean["Quarter"], categories=quarter_order, ordered=True)
        df_fish_clean = df_fish_clean.sort_values(by=["Year", "Quarter"]).reset_index(drop=True)
        print("After fixing quarters:")
        display(df_fish_clean.head())

        if df_fish_clean["Quarter"].isnull().any():
            raise ValueError("Quarter column contains NaN values after transformation: " + str(df_fish_clean["Quarter"].head()))

        # Convert export value to million Kr and round
        df_fish_clean["Fish_Export_Value_Million_Kr"] = df_fish_clean["Fish_Export_Value_Million_Kr"] / 1e6
        df_fish_clean["Fish_Export_Value_Million_Kr"] = df_fish_clean["Fish_Export_Value_Million_Kr"].round(0).astype(int)
        df_fish_clean = df_fish_clean[["Year", "Quarter", "Fish_Export_Value_Million_Kr"]]
        df_fish_clean["Year"] = df_fish_clean["Year"].astype(int)

        if df_fish_clean.isnull().any().any():
            raise ValueError("DataFrame contains NaN values before saving to SQLite.")

        # Create table
        cursor.execute("""
            CREATE TABLE fish_exports (
                Year INTEGER,
                Quarter TEXT,
                Fish_Export_Value_Million_Kr INTEGER
            )
        """)

        # Insert data into table
        df_fish_clean.to_sql('fish_exports', conn, if_exists='append', index=False)
        conn.commit()
        print("Saved Fish Exports to SQLite table 'fish_exports'")
    except requests.exceptions.RequestException as e:
        print(f"Failed to retrieve data from API: {e}")

# Final display
print("Final Fish Exports DataFrame:")
if df_fish_clean is not None:
    display(df_fish_clean.head())
else:
    print("Error: df_fish_clean not created due to API failure.")

In [None]:
# VARIABLE 3: WEST GREENLAND SST

df_sst_west_clean = None

# Degree to ERDDAP grid index conversion
def deg_to_index_lat(lat): return int(round((lat + 90) / 0.25))
def deg_to_index_lon(lon): return int(round((lon + 180) / 0.25))

# Define bounding box in degrees
bbox_deg = {
    'lat_min': 65.0,
    'lat_max': 70.0,
    'lon_min': -55.0,
    'lon_max': -50.0
}

# Convert to grid indices
bbox_idx = {
    'lat_min': deg_to_index_lat(bbox_deg['lat_min']),
    'lat_max': deg_to_index_lat(bbox_deg['lat_max']),
    'lon_min': deg_to_index_lon(bbox_deg['lon_min']),
    'lon_max': deg_to_index_lon(bbox_deg['lon_max'])
}
print("Bounding box indices:", bbox_idx)

# Check if sst_west table exists
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='sst_west'")
if cursor.fetchone():
    print("Loading SST West from SQLite database")
    df_sst_west_clean = pd.read_sql_query("SELECT * FROM sst_west", conn)
    print("Loaded data is valid.")
else:
    print("sst_west table not found, querying API and creating table...")
    years = list(range(2011, 2025))
    west_quarters = []
    for year in years:
        print(f"Processing year: {year}")
        try:
            base = "https://coastwatch.pfeg.noaa.gov/erddap/griddap/ncdcOisst21Agg_LonPM180.csv?"
            var = "sst"
            time = f"[({year}-01-01T00:00:00Z):1:({year}-12-31T00:00:00Z)]".replace(" ", "")
            zlev = "[0:1:0]"
            lat = f"[({bbox_idx['lat_min']}):1:({bbox_idx['lat_max']})]"
            lon = f"[({bbox_idx['lon_min']}):1:({bbox_idx['lon_max']})]"
            query = f"{var}{time}{zlev}{lat}{lon}"
            full_url = base + quote(query, safe=":/[](),-T")
            print("Constructed URL:", full_url)

            response = requests.get(full_url)
            if response.status_code != 200:
                raise ValueError(f"HTTP {response.status_code}: {response.text}")

            df = pd.read_csv(StringIO(response.text), skiprows=[1])
            df = df.rename(columns={col: col.strip() for col in df.columns})
            df = df.dropna(subset=["sst"])

            df["time"] = pd.to_datetime(df["time"])
            df["Year"] = df["time"].dt.year.astype(int)
            df["Quarter"] = "Q" + df["time"].dt.quarter.astype(str)

            df_q = df.groupby(['Year', 'Quarter'])["sst"].mean().reset_index()
            df_q = df_q.rename(columns={"sst": "Sea_Surface_Temp_C_West"})

            df_q["Melt_Active_West"] = (df_q["Sea_Surface_Temp_C_West"] > 0.5).astype(int)
            df_q["Melt_Index_West"] = df_q["Sea_Surface_Temp_C_West"].clip(lower=0, upper=4) / 4

            west_quarters.append(df_q)
            print(f"{year} processed.")
        except Exception as e:
            print(f"Failed for {year}: {e}")

    if west_quarters:
        df_sst_west_clean = pd.concat(west_quarters).reset_index(drop=True)
        if df_sst_west_clean.isnull().any().any():
            raise ValueError("DataFrame contains NaN values before saving to SQLite.")

        cursor.execute("""
            CREATE TABLE sst_west (
                Year INTEGER,
                Quarter TEXT,
                Sea_Surface_Temp_C_West REAL,
                Melt_Active_West INTEGER,
                Melt_Index_West REAL
            )
        """)
        df_sst_west_clean.to_sql('sst_west', conn, if_exists='append', index=False)
        conn.commit()
        print("Saved SST West to SQLite table 'sst_west'")
    else:
        print("No data retrieved for SST West.")

# Final display
print("Final SST West DataFrame:")
if df_sst_west_clean is not None:
    print("Final SST dataset shape:", df_sst_west_clean.shape)
    display(df_sst_west_clean.head())
else:
    print("Error: df_sst_west_clean not created due to API failure.")

In [None]:
# Variable 4: East Greenland SST, Melt

df_sst_east_clean = None

# Define bounding box in degrees
bbox_deg_east = {
    'lat_min': 65.0,
    'lat_max': 70.0,
    'lon_min': -40.0,
    'lon_max': -35.0
}

# Convert to grid indices (using same functions as SST West)
bbox_idx_east = {
    'lat_min': deg_to_index_lat(bbox_deg_east['lat_min']),
    'lat_max': deg_to_index_lat(bbox_deg_east['lat_max']),
    'lon_min': deg_to_index_lon(bbox_deg_east['lon_min']),
    'lon_max': deg_to_index_lon(bbox_deg_east['lon_max'])
}
print("East Greenland bounding box indices:", bbox_idx_east)

# Check if sst_east table exists
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='sst_east'")
if cursor.fetchone():
    print("Loading SST East from SQLite database")
    df_sst_east_clean = pd.read_sql_query("SELECT * FROM sst_east", conn)
    print("Loaded data is valid.")
else:
    print("sst_east table not found, querying API and creating table...")
    years = list(range(2011, 2025))
    east_quarters = []
    for year in years:
        print(f"Processing East Greenland year: {year}")
        try:
            base = "https://coastwatch.pfeg.noaa.gov/erddap/griddap/ncdcOisst21Agg_LonPM180.csv?"
            var = "sst"
            time = f"[({year}-01-01T00:00:00Z):1:({year}-12-31T00:00:00Z)]".replace(" ", "")
            zlev = "[0:1:0]"
            lat = f"[({bbox_idx_east['lat_min']}):1:({bbox_idx_east['lat_max']})]"
            lon = f"[({bbox_idx_east['lon_min']}):1:({bbox_idx_east['lon_max']})]"
            query = f"{var}{time}{zlev}{lat}{lon}"
            full_url = base + quote(query, safe=":/[](),-")
            print("Constructed URL:", full_url)
            response = requests.get(full_url)
            if response.status_code != 200:
                raise ValueError(f"HTTP {response.status_code}: {response.text}")
            df = pd.read_csv(StringIO(response.text), skiprows=[1])
            df = df.rename(columns={col: col.strip() for col in df.columns})
            df = df.dropna(subset=["sst"])
            df["time"] = pd.to_datetime(df["time"])
            df["Year"] = df["time"].dt.year.astype(int)
            df["Quarter"] = "Q" + df["time"].dt.quarter.astype(str)
            df_q = df.groupby(['Year', 'Quarter'])["sst"].mean().reset_index()
            df_q = df_q.rename(columns={"sst": "Sea_Surface_Temp_C_East"})
            df_q["Melt_Active_East"] = (df_q["Sea_Surface_Temp_C_East"] > 0.5).astype(int)
            df_q["Melt_Index_East"] = df_q["Sea_Surface_Temp_C_East"].clip(lower=0, upper=4) / 4
            east_quarters.append(df_q)
            print(f"{year} processed.")
        except Exception as e:
            print(f"Failed for {year}: {e}")

    if east_quarters:
        df_sst_east_clean = pd.concat(east_quarters).reset_index(drop=True)
        if df_sst_east_clean.isnull().any().any():
            raise ValueError("DataFrame contains NaN values before saving to SQLite.")

        cursor.execute("""
            CREATE TABLE sst_east (
                Year INTEGER,
                Quarter TEXT,
                Sea_Surface_Temp_C_East REAL,
                Melt_Active_East INTEGER,
                Melt_Index_East REAL
            )
        """)
        df_sst_east_clean.to_sql('sst_east', conn, if_exists='append', index=False)
        conn.commit()
        print("Saved SST East to SQLite table 'sst_east'")
    else:
        print("No data retrieved for SST East.")

# Final display
print("Final SST East DataFrame:")
if df_sst_east_clean is not None:
    print("Final SST East dataset shape:", df_sst_east_clean.shape)
    display(df_sst_east_clean.head())
else:
    print("Error: df_sst_east_clean not created due to API failure.")

In [None]:
# Variable 5: South Greenland SST, Melt

df_sst_south_clean = None

# Define bounding box in degrees
bbox_deg_south = {
    'lat_min': 60.0,
    'lat_max': 65.0,
    'lon_min': -45.0,
    'lon_max': -40.0
}

# Convert to grid indices
bbox_idx_south = {
    'lat_min': deg_to_index_lat(bbox_deg_south['lat_min']),
    'lat_max': deg_to_index_lat(bbox_deg_south['lat_max']),
    'lon_min': deg_to_index_lon(bbox_deg_south['lon_min']),
    'lon_max': deg_to_index_lon(bbox_deg_south['lon_max'])
}
print("South Greenland bounding box indices:", bbox_idx_south)

# Check if sst_south table exists
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='sst_south'")
if cursor.fetchone():
    print("Loading SST South from SQLite database")
    df_sst_south_clean = pd.read_sql_query("SELECT * FROM sst_south", conn)
    print("Loaded data is valid.")
else:
    print("sst_south table not found, querying API and creating table...")
    years = list(range(2011, 2025))
    south_quarters = []
    for year in years:
        print(f"Processing South Greenland year: {year}")
        try:
            base = "https://coastwatch.pfeg.noaa.gov/erddap/griddap/ncdcOisst21Agg_LonPM180.csv?"
            var = "sst"
            time = f"[({year}-01-01T00:00:00Z):1:({year}-12-31T00:00:00Z)]".replace(" ", "")
            zlev = "[0:1:0]"
            lat = f"[({bbox_idx_south['lat_min']}):1:({bbox_idx_south['lat_max']})]"
            lon = f"[({bbox_idx_south['lon_min']}):1:({bbox_idx_south['lon_max']})]"
            query = f"{var}{time}{zlev}{lat}{lon}"
            full_url = base + quote(query, safe=":/[](),-")
            print("Constructed URL:", full_url)
            response = requests.get(full_url)
            if response.status_code != 200:
                raise ValueError(f"HTTP {response.status_code}: {response.text}")
            df = pd.read_csv(StringIO(response.text), skiprows=[1])
            df = df.rename(columns={col: col.strip() for col in df.columns})
            df = df.dropna(subset=["sst"])
            df["time"] = pd.to_datetime(df["time"])
            df["Year"] = df["time"].dt.year.astype(int)
            df["Quarter"] = "Q" + df["time"].dt.quarter.astype(str)
            df_q = df.groupby(['Year', 'Quarter'])["sst"].mean().reset_index()
            df_q = df_q.rename(columns={"sst": "Sea_Surface_Temp_C_South"})
            df_q["Melt_Active_South"] = (df_q["Sea_Surface_Temp_C_South"] > 0.5).astype(int)
            df_q["Melt_Index_South"] = df_q["Sea_Surface_Temp_C_South"].clip(lower=0, upper=4) / 4
            south_quarters.append(df_q)
            print(f"{year} processed.")
        except Exception as e:
            print(f"Failed for {year}: {e}")

    if south_quarters:
        df_sst_south_clean = pd.concat(south_quarters).reset_index(drop=True)
        if df_sst_south_clean.isnull().any().any():
            raise ValueError("DataFrame contains NaN values before saving to SQLite.")

        cursor.execute("""
            CREATE TABLE sst_south (
                Year INTEGER,
                Quarter TEXT,
                Sea_Surface_Temp_C_South REAL,
                Melt_Active_South INTEGER,
                Melt_Index_South REAL
            )
        """)
        df_sst_south_clean.to_sql('sst_south', conn, if_exists='append', index=False)
        conn.commit()
        print("Saved SST South to SQLite table 'sst_south'")
    else:
        print("No data retrieved for SST South.")

# Final display
print("Final SST South DataFrame:")
if df_sst_south_clean is not None:
    print("Final SST South dataset shape:", df_sst_south_clean.shape)
    display(df_sst_south_clean.head())
else:
    print("Error: df_sst_south_clean not created due to API failure.")

In [None]:
#Variable 6: Total Catch by Foreign Vessels

df_foreign_clean = None

# Check if foreign_catch table exists
cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='foreign_catch'")
if cursor.fetchone():
    print("Loading Foreign Catch from SQLite database")
    df_foreign_clean = pd.read_sql_query("SELECT * FROM foreign_catch", conn)
    print("Loaded data is valid.")
else:
    print("foreign_catch table not found, querying API and creating table...")
    url = "https://bank.stat.gl:443/api/v1/en/Greenland/FI/FI10/FIX008.px"
    query = {
        "query": [
            {"code": "nation", "selection": {"filter": "item", "values": ["UDL"]}},
            {"code": "unit", "selection": {"filter": "item", "values": ["Ton"]}},
            {"code": "time", "selection": {"filter": "item", "values": [str(y) for y in range(2011, 2025)]}},
            {"code": "quarter", "selection": {"filter": "item", "values": ["1", "2", "3", "4"]}}
        ],
        "response": {"format": "json-stat2"}
    }
    try:
        response = requests.post(url, json=query)
        if response.status_code == 200:
            dataset = pyjstat.Dataset.read(response.text)
            df = dataset.write('dataframe')
            print("Foreign vessel data successfully retrieved and converted to DataFrame!")

            # Clean DataFrame
            df_foreign_clean = df.copy()
            df_foreign_clean.drop(columns=["nation"], inplace=True)
            df_foreign_clean.rename(columns={
                "time": "Year",
                "quarter": "Quarter",
                "unit": "Unit",
                "value": "Foreign_Catch"
            }, inplace=True)
            df_foreign_clean["Quarter"] = df_foreign_clean["Quarter"].str.replace("Quarter ", "Q")
            quarter_order = ["Q1", "Q2", "Q3", "Q4"]
            df_foreign_clean["Quarter"] = pd.Categorical(df_foreign_clean["Quarter"], categories=quarter_order, ordered=True)
            df_foreign_clean = df_foreign_clean.sort_values(by=["Year", "Quarter"]).reset_index(drop=True)
            df_foreign_clean = df_foreign_clean[["Year", "Quarter", "Unit", "Foreign_Catch"]]
            df_foreign_clean["Year"] = df_foreign_clean["Year"].astype(int)

            if df_foreign_clean.isnull().any().any():
                raise ValueError("DataFrame contains NaN values before saving to SQLite.")

            # Create table
            cursor.execute("""
                CREATE TABLE foreign_catch (
                    Year INTEGER,
                    Quarter TEXT,
                    Unit TEXT,
                    Foreign_Catch INTEGER
                )
            """)
            df_foreign_clean.to_sql('foreign_catch', conn, if_exists='append', index=False)
            conn.commit()
            print("Saved Foreign Catch to SQLite table 'foreign_catch'")
        else:
            print(f"Error {response.status_code}: {response.text}")
    except Exception as e:
        print(f"Failed to retrieve data from API: {e}")

# Final display
print("Final Foreign Catch DataFrame:")
if df_foreign_clean is not None:
    display(df_foreign_clean.head())
else:
    print("Error: df_foreign_clean not created due to API failure.")