In [None]:
# Install dependencies and import libraries
!pip install requests pandas pyjstat xarray datetime
import sqlite3
import pandas as pd
import requests
from pyjstat import pyjstat
from IPython.display import display
import xarray as xr
from datetime import datetime

In [None]:
# Connect to the SQLite database
db_path = 'greenland_fishery.db'
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
print(f"Connected to SQLite database at {db_path}")

In [None]:
# Helper function to get the latest year and quarter from a table
def get_latest_period(table_name, conn):
    query = f"SELECT MAX(Year), Quarter FROM {table_name} WHERE Year = (SELECT MAX(Year) FROM {table_name})"
    result = pd.read_sql_query(query, conn)
    if not result.empty:
        max_year = result['Year'].iloc[0]
        max_quarter = result['Quarter'].iloc[0]
        return max_year, max_quarter
    return None, None

In [None]:
# Helper function to determine quarters to fetch
def quarters_to_fetch(last_year, last_quarter):
    quarter_order = ["Q1", "Q2", "Q3", "Q4"]
    last_idx = quarter_order.index(last_quarter)
    years = [last_year] if last_idx < 3 else [last_year + 1]
    quarters = [quarter_order[i] for i in range(last_idx + 1, 4)] if last_idx < 3 else ["Q1"]
    if last_idx == 3:
        years = [last_year + 1]
        quarters = ["Q1"]
    return years, quarters

In [None]:
# Update Total Catch Data
print("Updating Total Catch Data...")
last_year, last_quarter = get_latest_period("total_catch", conn)
if last_year is None:
    print("No data found in total_catch table. Run setup_dataset.ipynb first.")
else:
    years_to_fetch, quarters_to_fetch = quarters_to_fetch(last_year, last_quarter)
    print(f"Last period in total_catch: {last_year} {last_quarter}")
    print(f"Fetching data for years: {years_to_fetch}, quarters: {quarters_to_fetch}")

    # Convert quarters to API format (1, 2, 3, 4)
    quarter_mapping = {"Q1": "1", "Q2": "2", "Q3": "3", "Q4": "4"}
    api_quarters = [quarter_mapping[q] for q in quarters_to_fetch]

    # Fetch new data
    url = "https://bank.stat.gl:443/api/v1/en/Greenland/FI/FI10/FIX008.px"
    query = {
        "query": [
            {"code": "nation", "selection": {"filter": "item", "values": ["GRL"]}},
            {"code": "unit", "selection": {"filter": "item", "values": ["Ton"]}},
            {"code": "time", "selection": {"filter": "item", "values": [str(y) for y in years_to_fetch]}},
            {"code": "quarter", "selection": {"filter": "item", "values": api_quarters}}
        ],
        "response": {"format": "json-stat2"}
    }
    response = requests.post(url, json=query)

    if response.status_code == 200:
        dataset = pyjstat.Dataset.read(response.text)
        df = dataset.write('dataframe')
        print("New Total Catch data retrieved!")

        # Clean DataFrame
        df_clean = df.copy()
        df_clean.drop(columns=['nation'], inplace=True)
        df_clean.rename(columns={
            "time": "Year",
            "quarter": "Quarter",
            "unit": "Unit",
            "value": "Total_Catch"
        }, inplace=True)
        df_clean["Quarter"] = df_clean["Quarter"].str.replace("Quarter ", "Q")
        quarter_order = ["Q1", "Q2", "Q3", "Q4"]
        df_clean["Quarter"] = pd.Categorical(df_clean["Quarter"], categories=quarter_order, ordered=True)
        df_clean = df_clean[["Year", "Quarter", "Unit", "Total_Catch"]]
        df_clean["Year"] = df_clean["Year"].astype(int)

        # Validate
        if df_clean.isnull().any().any():
            raise ValueError("New Total Catch data contains NaN values: " + str(df_clean.head()))

        # Insert new data into table
        df_clean.to_sql('total_catch', conn, if_exists='append', index=False)
        conn.commit()
        print("Updated total_catch table with new data")
        display(df_clean.head())
    else:
        print(f"Error fetching Total Catch data: {response.status_code} - {response.text}")

In [None]:
# Update Fish Exports Data
print("Updating Fish Exports Data...")
last_year, last_quarter = get_latest_period("fish_exports", conn)
if last_year is None:
    print("No data found in fish_exports table. Run setup_dataset.ipynb first.")
else:
    years_to_fetch, quarters_to_fetch = quarters_to_fetch(last_year, last_quarter)
    print(f"Last period in fish_exports: {last_year} {last_quarter}")
    print(f"Fetching data for years: {years_to_fetch}, quarters: {quarters_to_fetch}")

    # Convert quarters to API format
    api_quarters = [quarter_mapping[q] for q in quarters_to_fetch]

    # Fetch new data
    url = "https://bank.stat.gl:443/api/v1/en/Greenland/IE/IE10/IEX2PROD.px"
    query = {
        "query": [
            {"code": "branch", "selection": {"filter": "item", "values": ["46"]}},
            {"code": "quarter", "selection": {"filter": "item", "values": api_quarters}},
            {"code": "time", "selection": {"filter": "item", "values": [str(y) for y in years_to_fetch]}}
        ],
        "response": {"format": "json-stat2"}
    }
    try:
        response = requests.post(url, json=query)
        response.raise_for_status()
        dataset = pyjstat.Dataset.read(response.text)
        df_fish_exports = dataset.write('dataframe')
        print("New Fish Exports data retrieved!")

        # Clean DataFrame
        df_fish_clean = df_fish_exports.copy()
        column_mapping = {
            "time": "Year",
            "quarter": "Quarter",
            "value": "Fish_Export_Value_Million_Kr"
        }
        if "quarter" not in df_fish_exports.columns and "Quarter" in df_fish_exports.columns:
            column_mapping["Quarter"] = "Quarter"
            del column_mapping["quarter"]
        df_fish_clean.rename(columns=column_mapping, inplace=True)

        if "Quarter" not in df_fish_clean.columns:
            raise ValueError("Quarter column missing after renaming.")
        if df_fish_clean["Quarter"].isnull().any():
            raise ValueError("Quarter column contains NaN values: " + str(df_fish_clean["Quarter"].head()))

        df_fish_clean["Quarter"] = df_fish_clean["Quarter"].str.replace(r"[Qq]uarter ", "Q", regex=True)
        df_fish_clean["Quarter"] = pd.Categorical(df_fish_clean["Quarter"], categories=quarter_order, ordered=True)
        df_fish_clean = df_fish_clean.sort_values(by=["Year", "Quarter"]).reset_index(drop=True)

        # Convert export value to million Kr and round
        df_fish_clean["Fish_Export_Value_Million_Kr"] = df_fish_clean["Fish_Export_Value_Million_Kr"] / 1e6
        df_fish_clean["Fish_Export_Value_Million_Kr"] = df_fish_clean["Fish_Export_Value_Million_Kr"].round(0).astype(int)
        df_fish_clean = df_fish_clean[["Year", "Quarter", "Fish_Export_Value_Million_Kr"]]
        df_fish_clean["Year"] = df_fish_clean["Year"].astype(int)

        if df_fish_clean.isnull().any().any():
            raise ValueError("New Fish Exports data contains NaN values.")

        # Insert new data into table
        df_fish_clean.to_sql('fish_exports', conn, if_exists='append', index=False)
        conn.commit()
        print("Updated fish_exports table with new data")
        display(df_fish_clean.head())
    except requests.exceptions.RequestException as e:
        print(f"Error fetching Fish Exports data: {e}")

In [None]:
# Update Ice Melt and SST Data
print("Updating Ice Melt and SST Data...")
last_year, last_quarter = get_latest_period("ice_melt_sst", conn)
if last_year is None:
    print("No data found in ice_melt_sst table. Run setup_dataset.ipynb first.")
else:
    years_to_fetch, quarters_to_fetch = quarters_to_fetch(last_year, last_quarter)
    print(f"Last period in ice_melt_sst: {last_year} {last_quarter}")
    print(f"Fetching data for years: {years_to_fetch}, quarters: {quarters_to_fetch}")

    try:
        start_year = last_year if quarters_to_fetch != ["Q1"] else last_year + 1
        end_year = start_year + 1
        start_date = f"{start_year}-01-01"
        end_date = f"{end_year}-01-01"
        url_template = "https://psl.noaa.gov/thredds/dodsC/Datasets/ncep.reanalysis/Monthly/gaussian_grid/air.sfc.mon.mean.nc"
        ds = xr.open_dataset(url_template)
        ds = ds.sel(time=slice(start_date, end_date))
        west_lon_range = slice(300, 310)
        east_lon_range = slice(350, 360)
        greenland_lat_range = slice(85, 60)
        sst_west = ds['air'].sel(lon=west_lon_range, lat=greenland_lat_range).mean(dim=['lat', 'lon'])
        sst_east = ds['air'].sel(lon=east_lon_range, lat=greenland_lat_range).mean(dim=['lat', 'lon'])
        sst_west_df = sst_west.to_dataframe().reset_index()
        sst_east_df = sst_east.to_dataframe().reset_index()
        sst_west_df['region'] = 'West'
        sst_east_df['region'] = 'East'
        sst_df = pd.concat([sst_west_df, sst_east_df], ignore_index=True)
        sst_df.rename(columns={'time': 'Date', 'air': 'SST'}, inplace=True)

        # Ice Melt Rate (simplified placeholder)
        ice_melt_west = sst_west * 0.1
        ice_melt_east = sst_east * 0.1
        ice_melt_west_df = ice_melt_west.to_dataframe().reset_index()
        ice_melt_east_df = ice_melt_east.to_dataframe().reset_index()
        ice_melt_west_df['region'] = 'West'
        ice_melt_east_df['region'] = 'East'
        ice_melt_west_df.rename(columns={'air': 'Ice_Melt_Rate'}, inplace=True)
        ice_melt_east_df.rename(columns={'air': 'Ice_Melt_Rate'}, inplace=True)

        # Combine SST and Ice Melt
        ice_melt_df = pd.concat([ice_melt_west_df, ice_melt_east_df], ignore_index=True)
        ice_melt_df = ice_melt_df[['time', 'region', 'Ice_Melt_Rate']]
        ice_melt_df.rename(columns={'time': 'Date'}, inplace=True)
        df_melt_sst = pd.merge(sst_df, ice_melt_df, on=['Date', 'region'], how='inner')
        df_melt_sst['Year'] = df_melt_sst['Date'].dt.year
        df_melt_sst['Month'] = df_melt_sst['Date'].dt.month
        df_melt_sst['Quarter'] = df_melt_sst['Month'].apply(lambda m: f"Q{(m-1)//3 + 1}")
        df_melt_sst_clean = df_melt_sst.groupby(['Year', 'Quarter', 'region']).mean(numeric_only=True).reset_index()
        df_melt_sst_clean = df_melt_sst_clean.pivot(index=['Year', 'Quarter'], columns='region', value=['SST', 'Ice_Melt_Rate']).reset_index()
        df_melt_sst_clean.columns = ['Year', 'Quarter', 'Ice_Melt_Rate_East', 'Ice_Melt_Rate_West', 'SST_East', 'SST_West']
        df_melt_sst_clean = df_melt_sst_clean[['Year', 'Quarter', 'Ice_Melt_Rate_East', 'Ice_Melt_Rate_West', 'SST_East', 'SST_West']]
        quarter_order = ["Q1", "Q2", "Q3", "Q4"]
        df_melt_sst_clean["Quarter"] = pd.Categorical(df_melt_sst_clean["Quarter"], categories=quarter_order, ordered=True)
        df_melt_sst_clean = df_melt_sst_clean.sort_values(by=["Year", "Quarter"]).reset_index(drop=True)

        # Filter for the quarters we need
        df_melt_sst_clean = df_melt_sst_clean[
            (df_melt_sst_clean['Year'].isin(years_to_fetch)) &
            (df_melt_sst_clean['Quarter'].isin(quarters_to_fetch))
        ]

        if df_melt_sst_clean.empty:
            print("No new Ice Melt/SST data available for the specified periods.")
        else:
            # Insert new data into table
            df_melt_sst_clean.to_sql('ice_melt_sst', conn, if_exists='append', index=False)
            conn.commit()
            print("Updated ice_melt_sst table with new data")
            display(df_melt_sst_clean.head())
    except Exception as e:
        print(f"Error fetching Ice Melt/SST data: {e}")

In [None]:
# Update Foreign Catch Data
print("Updating Foreign Catch Data...")
last_year, last_quarter = get_latest_period("foreign_catch", conn)
if last_year is None:
    print("No data found in foreign_catch table. Run setup_dataset.ipynb first.")
else:
    years_to_fetch, quarters_to_fetch = quarters_to_fetch(last_year, last_quarter)
    print(f"Last period in foreign_catch: {last_year} {last_quarter}")
    print(f"Fetching data for years: {years_to_fetch}, quarters: {quarters_to_fetch}")

    # Convert quarters to API format
    api_quarters = [quarter_mapping[q] for q in quarters_to_fetch]

    # Fetch new data
    url = "https://bank.stat.gl:443/api/v1/en/Greenland/FI/FI10/FIX008.px"
    query = {
        "query": [
            {"code": "nation", "selection": {"filter": "item", "values": ["FOK"]}},
            {"code": "unit", "selection": {"filter": "item", "values": ["Ton"]}},
            {"code": "time", "selection": {"filter": "item", "values": [str(y) for y in years_to_fetch]}},
            {"code": "quarter", "selection": {"filter": "item", "values": api_quarters}}
        ],
        "response": {"format": "json-stat2"}
    }
    response = requests.post(url, json=query)

    if response.status_code == 200:
        dataset = pyjstat.Dataset.read(response.text)
        df = dataset.write('dataframe')
        print("New Foreign Catch data retrieved!")

        # Clean DataFrame
        df_foreign_clean = df.copy()
        df_foreign_clean.drop(columns=['nation'], inplace=True)
        df_foreign_clean.rename(columns={
            "time": "Year",
            "quarter": "Quarter",
            "unit": "Unit",
            "value": "Foreign_Catch"
        }, inplace=True)
        df_foreign_clean["Quarter"] = df_foreign_clean["Quarter"].str.replace("Quarter ", "Q")
        df_foreign_clean["Quarter"] = pd.Categorical(df_foreign_clean["Quarter"], categories=quarter_order, ordered=True)
        df_foreign_clean = df_foreign_clean[["Year", "Quarter", "Unit", "Foreign_Catch"]]
        df_foreign_clean["Year"] = df_foreign_clean["Year"].astype(int)

        if df_foreign_clean.isnull().any().any():
            raise ValueError("New Foreign Catch data contains NaN values: " + str(df_foreign_clean.head()))

        # Insert new data into table
        df_foreign_clean.to_sql('foreign_catch', conn, if_exists='append', index=False)
        conn.commit()
        print("Updated foreign_catch table with new data")
        display(df_foreign_clean.head())
    else:
        print(f"Error fetching Foreign Catch data: {response.status_code} - {response.text}")

In [None]:
# Close the database connection
conn.close()
print("Database connection closed.")