## Initial test plotting of sea ice fraction (ice extent) and water temperature in Northern Alaksa from MERRA2 dataset

### data from source: https://disc.gsfc.nasa.gov/datasets/M2TMNXOCN_5.12.4/summary 

in the following code, you will see the plotting evolution from data exploration to final concept

In [17]:
import xarray as xr
import plotly.graph_objects as go
import os
import pandas as pd

# Base directory where the NetCDF files are stored
data_dir = '/Users/casey/Desktop/blue-dot/data/raw'

# Months (1 through 12)
months = list(range(1, 13))

# Initialize DataFrames with months as columns
df_max_sea_ice = pd.DataFrame(columns=months, index=range(1980, 2024))
df_min_sea_ice = pd.DataFrame(columns=months, index=range(1980, 2024))
df_max_water_temp = pd.DataFrame(columns=months, index=range(1980, 2024))
df_min_water_temp = pd.DataFrame(columns=months, index=range(1980, 2024))

# Latitude and longitude bounds for Northern Alaska
lat_bounds = [64, 71]
lon_bounds = [-168, -141]

# Process each file in the directory
for file_name in os.listdir(data_dir):
    if file_name.endswith('.nc4'):
        # Split the filename and extract the date part
        parts = file_name.split('.')
        date_part = parts[-2]  # Assuming the format 'YYYYMM'
        year = int(date_part[:4])
        month = int(date_part[4:6])

        file_path = os.path.join(data_dir, file_name)
        ds = xr.open_dataset(file_path)

        # Extract 'FRSEAICE' and 'Var_TSKINWTR' for Northern Alaska region
        sea_ice = ds['FRSEAICE'].sel(lat=slice(*lat_bounds), lon=slice(*lon_bounds))
        water_temp = ds['Var_TSKINWTR'].sel(lat=slice(*lat_bounds), lon=slice(*lon_bounds))

        # Compute max and min and store in the DataFrames
        df_max_sea_ice.at[year, month] = sea_ice.max().values
        df_min_sea_ice.at[year, month] = sea_ice.min().values
        df_max_water_temp.at[year, month] = water_temp.max().values
        df_min_water_temp.at[year, month] = water_temp.min().values
        ds.close()

# Create a figure for plotting
fig = go.Figure()

# Add filled lines for max and min values
for month in months:
    fig.add_trace(go.Scatter(x=months, y=df_max_sea_ice[month], fill=None, mode='lines', line_color='blue', showlegend=False))
    fig.add_trace(go.Scatter(x=months, y=df_min_sea_ice[month], fill='tonexty', mode='lines', line_color='blue', fillcolor='rgba(0, 0, 255, 0.3)', showlegend=False))
    fig.add_trace(go.Scatter(x=months, y=df_max_water_temp[month], fill=None, mode='lines', line_color='red', yaxis='y2', showlegend=False))
    fig.add_trace(go.Scatter(x=months, y=df_min_water_temp[month], fill='tonexty', mode='lines', line_color='red', fillcolor='rgba(255, 0, 0, 0.3)', yaxis='y2', showlegend=False))

# Update layout
fig.update_layout(
    title='Max and Min Sea Ice Fraction and Water Temperature in Northern Alaska (1980-2023)',
    xaxis=dict(title='Month', tickmode='array', tickvals=months),
    yaxis=dict(title='Sea Ice Fraction'),
    yaxis2=dict(title='Water Temperature (°C)', overlaying='y', side='right'),
    template='plotly_dark'
)

fig.show()


### plotting monthly sea ice frac

In [5]:
import xarray as xr
import plotly.graph_objects as go
import os
import pandas as pd

# Base directory where the NetCDF files are stored
data_dir = '/Users/casey/Desktop/blue-dot/data/raw'

# Months (1 through 12)
months = list(range(1, 13))

# Initialize DataFrame for sea ice fraction
df_sea_ice = pd.DataFrame(columns=months, index=range(1980, 2024))

# Latitude and longitude bounds for Northern Alaska
lat_bounds = [64, 71]
lon_bounds = [-168, -141]

# Process each file in the directory
for file_name in os.listdir(data_dir):
    if file_name.endswith('.nc4'):
        # Extract year and month from file name
        parts = file_name.split('.')
        date_part = parts[-2]  # Format 'YYYYMM'
        year = int(date_part[:4])
        month = int(date_part[4:6])

        file_path = os.path.join(data_dir, file_name)
        ds = xr.open_dataset(file_path)

        # Extract 'FRSEAICE' for Northern Alaska region
        sea_ice = ds['FRSEAICE'].sel(lat=slice(*lat_bounds), lon=slice(*lon_bounds))

        # Compute average sea ice fraction for the month and store in the DataFrame
        df_sea_ice.at[year, month] = sea_ice.mean().values
        ds.close()

# Transpose the DataFrame for plotting
df_sea_ice = df_sea_ice.T

# Create a figure for plotting
fig = go.Figure()

# Add lines for each year
for year in df_sea_ice.columns:
    fig.add_trace(go.Scatter(x=months, y=df_sea_ice[year], mode='lines', name=f'Year {year}'))

# Update layout
fig.update_layout(
    title='Monthly Sea Ice Fraction in Northern Alaska (1980-2023)',
    xaxis=dict(title='Month', tickmode='array', tickvals=months),
    yaxis=dict(title='Average Sea Ice Fraction'),
    #template='plotly_dark'
)

fig.show()


### Plotting all water temps with month as the x-axis

In [20]:
import xarray as xr
import plotly.graph_objects as go
import os
import pandas as pd

# Base directory where the NetCDF files are stored
data_dir = '/Users/casey/Desktop/blue-dot/data/raw'

# Months (1 through 12)
months = list(range(1, 13))

# Initialize DataFrame for water temperature
df_water_temp = pd.DataFrame(columns=months, index=range(1980, 2024))

# Latitude and longitude bounds for Northern Alaska
lat_bounds = [64, 71]
lon_bounds = [-168, -141]

# Process each file in the directory
for file_name in os.listdir(data_dir):
    if file_name.endswith('.nc4'):
        # Extract year and month from file name
        parts = file_name.split('.')
        date_part = parts[-2]  # Format 'YYYYMM'
        year = int(date_part[:4])
        month = int(date_part[4:6])


        file_path = os.path.join(data_dir, file_name)
        ds = xr.open_dataset(file_path)

        # Extract 'Var_TSKINWTR' for Northern Alaska region
        water_temp = ds['Var_TSKINWTR'].sel(lat=slice(*lat_bounds), lon=slice(*lon_bounds))

        # Compute average water temperature for the month and store in the DataFrame
        df_water_temp.at[year, month] = water_temp.mean().values
        ds.close()

# Transpose the DataFrame for plotting
df_water_temp = df_water_temp.T

# Create a figure for plotting
fig = go.Figure()

# Add lines for each year
for year in df_water_temp.columns:
    fig.add_trace(go.Scatter(x=months, y=df_water_temp[year], mode='lines', name=f'Year {year}'))

# Update layout
fig.update_layout(
    title='Monthly Water Temperature in Northern Alaska (1980-2023)',
    xaxis=dict(title='Month', tickmode='array', tickvals=months),
    yaxis=dict(title='Average Water Temperature (°C)'),
    #template='plotly_dark'
)

fig.show()

## From plot above, something weird is going on in 1980

### Plotting without 1980

In [21]:
import xarray as xr
import plotly.graph_objects as go
import os
import pandas as pd

# Base directory where the NetCDF files are stored
data_dir = '/Users/casey/Desktop/blue-dot/data/raw'

# Months (1 through 12)
months = list(range(1, 13))

# Initialize DataFrame for water temperature
df_water_temp = pd.DataFrame(columns=months, index=range(1981, 2024))

# Latitude and longitude bounds for Northern Alaska
lat_bounds = [64, 71]
lon_bounds = [-168, -141]

# Process each file in the directory
for file_name in os.listdir(data_dir):
    if file_name.endswith('.nc4'):
        # Extract year and month from file name
        parts = file_name.split('.')
        date_part = parts[-2]  # Format 'YYYYMM'
        year = int(date_part[:4])
        month = int(date_part[4:6])

        # Skip the year 1980
        if year == 1980:
            continue

        file_path = os.path.join(data_dir, file_name)
        ds = xr.open_dataset(file_path)

        # Extract 'Var_TSKINWTR' for Northern Alaska region
        water_temp = ds['Var_TSKINWTR'].sel(lat=slice(*lat_bounds), lon=slice(*lon_bounds))

        # Compute average water temperature for the month and store in the DataFrame
        df_water_temp.at[year, month] = water_temp.mean().values
        ds.close()

# Transpose the DataFrame for plotting
df_water_temp = df_water_temp.T

# Create a figure for plotting
fig = go.Figure()

# Add lines for each year (excluding 1980)
for year in df_water_temp.columns:
    fig.add_trace(go.Scatter(x=months, y=df_water_temp[year], mode='lines', name=f'Year {year}'))

# Update layout
fig.update_layout(
    title='Monthly Water Temperature in Northern Alaska (1981-2023)',
    xaxis=dict(title='Month', tickmode='array', tickvals=months),
    yaxis=dict(title='Average Water Temperature (°C)'),
    #template='plotly_dark'
)

fig.show()


### Changing viewpoint on plot, make x-axis year and lines months

In [11]:
import xarray as xr
import plotly.graph_objects as go
import os
import pandas as pd

# Base directory where the NetCDF files are stored
data_dir = '/Users/casey/Desktop/blue-dot/data/raw'

# Years range from 1981 to 2023
years = list(range(1981, 2024))

# Month names for labeling
month_names = ['January', 'February', 'March', 'April', 'May', 'June', 
               'July', 'August', 'September', 'October', 'November', 'December']

# Initialize DataFrame for water temperature
df_water_temp = pd.DataFrame(index=month_names, columns=years)

# Latitude and longitude bounds for Northern Alaska
lat_bounds = [64, 71]
lon_bounds = [-168, -141]

# Process each file in the directory
for file_name in os.listdir(data_dir):
    if file_name.endswith('.nc4'):
        # Extract year and month from file name
        parts = file_name.split('.')
        date_part = parts[-2]  # Format 'YYYYMM'
        year = int(date_part[:4])
        month = int(date_part[4:6])

        # Skip the year 1980
        if year == 1980:
            continue

        file_path = os.path.join(data_dir, file_name)
        ds = xr.open_dataset(file_path)

        # Extract 'Var_TSKINWTR' for Northern Alaska region
        water_temp = ds['Var_TSKINWTR'].sel(lat=slice(*lat_bounds), lon=slice(*lon_bounds))

        # Compute average water temperature for the month and store in the DataFrame
        df_water_temp.at[month_names[month-1], year] = water_temp.mean().values
        ds.close()

# Create a figure for plotting
fig = go.Figure()

# Add lines for each month
for month in df_water_temp.index:
    fig.add_trace(go.Scatter(x=years, y=df_water_temp.loc[month], mode='lines', name=month))

# Update layout
fig.update_layout(
    title='Yearly Water Temperature in Northern Alaska (1981-2023)',
    xaxis=dict(title='Year', tickmode='array', tickvals=years),
    yaxis=dict(title='Average Water Temperature (°C)'),
    #template='plotly_dark'
)

fig.show()


## Linear regression and visualization for temps

### Curious what a linear regression result would be with just year as an independent variable

In [13]:
import xarray as xr
import plotly.graph_objects as go
import os
import pandas as pd
import numpy as np

# Base directory where the NetCDF files are stored
data_dir = '/Users/casey/Desktop/blue-dot/data/raw'

# Years range from 1981 to 2023
years = list(range(1981, 2024))

# Month names for labeling
month_names = ['January', 'February', 'March', 'April', 'May', 'June', 
               'July', 'August', 'September', 'October', 'November', 'December']

# Initialize DataFrame for water temperature
df_water_temp = pd.DataFrame(index=month_names, columns=years)

# Latitude and longitude bounds for Northern Alaska
lat_bounds = [64, 71]
lon_bounds = [-168, -141]

# Process each file in the directory
for file_name in os.listdir(data_dir):
    if file_name.endswith('.nc4'):
        # Extract year and month from file name
        parts = file_name.split('.')
        date_part = parts[-2]  # Format 'YYYYMM'
        year = int(date_part[:4])
        month = int(date_part[4:6])

        # Skip the year 1980
        if year == 1980:
            continue

        file_path = os.path.join(data_dir, file_name)
        ds = xr.open_dataset(file_path)

        # Extract 'Var_TSKINWTR' for Northern Alaska region
        water_temp = ds['Var_TSKINWTR'].sel(lat=slice(*lat_bounds), lon=slice(*lon_bounds))

        # Compute average water temperature for the month and store in the DataFrame
        df_water_temp.at[month_names[month-1], year] = water_temp.mean().values
        ds.close()

# Calculate the average temperature for each year
yearly_avg_temp = df_water_temp.mean()

# Perform linear regression
slope, intercept = np.polyfit(years, yearly_avg_temp, 1)
line = np.poly1d([slope, intercept])

# Create a figure for plotting
fig = go.Figure()

# Add lines for each month
for month in df_water_temp.index:
    fig.add_trace(go.Scatter(x=years, y=df_water_temp.loc[month], mode='lines', name=month))

# Add line of best fit
fig.add_trace(go.Scatter(x=years, y=line(years), mode='lines', name='Best Fit Line', line=dict(color='black')))

# Update layout
fig.update_layout(
    title='Yearly Water Temperature in Northern Alaska (1981-2023) with Best Fit Line',
    xaxis=dict(title='Year', tickmode='array', tickvals=years),
    yaxis=dict(title='Average Water Temperature (°C)'),
    #template='plotly_dark',
    annotations=[
        dict(
            x=years[-1],
            y=line(years[-1]),
            xanchor='left',
            text=f'Best Fit: y = {slope:.2f}x + {intercept:.2f}',
            showarrow=False
        )
    ]
)

fig.show()


## Visualizing sea ice fraction parameter by 25th and 75th percentiles

In [15]:
import xarray as xr
import plotly.graph_objects as go
import os
import pandas as pd
import numpy as np

# Base directory where the NetCDF files are stored
data_dir = '/Users/casey/Desktop/blue-dot/data/raw'

# Months (1 through 12)
months = list(range(1, 13))

# Decade ranges
decades = [(1980, 1990), (1991, 2000), (2001, 2010), (2011, 2023)]

# Latitude and longitude bounds for Northern Alaska
lat_bounds = [64, 71]
lon_bounds = [-168, -141]

# Initialize DataFrame for sea ice extent
df_sea_ice = pd.DataFrame()

# Process each file in the directory
for file_name in os.listdir(data_dir):
    if file_name.endswith('.nc4'):
        # Extract year and month from file name
        parts = file_name.split('.')
        date_part = parts[-2]  # Format 'YYYYMM'
        year = int(date_part[:4])
        month = int(date_part[4:6])

        file_path = os.path.join(data_dir, file_name)
        ds = xr.open_dataset(file_path)

        # Extract 'FRSEAICE' for Northern Alaska region
        sea_ice = ds['FRSEAICE'].sel(lat=slice(*lat_bounds), lon=slice(*lon_bounds))

        # Store data in DataFrame
        df_sea_ice.at[pd.to_datetime(f'{year}-{month}-01'), 'Sea Ice Extent'] = sea_ice.mean().values
        ds.close()

# Create a figure for plotting
fig = go.Figure()

# Plot for each decade
for start_year, end_year in decades:
    # Select the data for the decade
    decade_data = df_sea_ice[(df_sea_ice.index.year >= start_year) & (df_sea_ice.index.year <= end_year)]
    
    # Calculate the 25th and 75th percentiles for each month
    lower_bound = decade_data.groupby(decade_data.index.month).quantile(0.25)
    upper_bound = decade_data.groupby(decade_data.index.month).quantile(0.75)

    # Plot the percentiles
    fig.add_trace(go.Scatter(x=lower_bound.index, y=lower_bound['Sea Ice Extent'], mode='lines', name=f'{start_year}-{end_year} 25th Percentile'))
    fig.add_trace(go.Scatter(x=upper_bound.index, y=upper_bound['Sea Ice Extent'], mode='lines', name=f'{start_year}-{end_year} 75th Percentile', fill='tonexty'))

# Update layout
fig.update_layout(
    title='Sea Ice Extent in Northern Alaska by Decade (1980-2023)',
    xaxis=dict(title='Month', tickmode='array', tickvals=months),
    yaxis=dict(title='Average Sea Ice Extent'),
    template='plotly_dark'
)

fig.show()


### Excluding 1980 to stay consistent with temp parameter

In [23]:
import xarray as xr
import plotly.graph_objects as go
import os
import pandas as pd
import numpy as np

# Base directory where the NetCDF files are stored
data_dir = '/Users/casey/Desktop/blue-dot/data/raw'

# Months (1 through 12)
months = list(range(1, 13))

# Decade ranges
decades = [(1981, 1990), (1991, 2000), (2001, 2010), (2011, 2023)]

# Latitude and longitude bounds for Northern Alaska
lat_bounds = [64, 71]
lon_bounds = [-168, -141]

# Initialize DataFrame for sea ice extent
df_sea_ice = pd.DataFrame()

# Process each file in the directory
for file_name in os.listdir(data_dir):
    if file_name.endswith('.nc4'):
        # Extract year and month from file name
        parts = file_name.split('.')
        date_part = parts[-2]  # Format 'YYYYMM'
        year = int(date_part[:4])
        month = int(date_part[4:6])

        file_path = os.path.join(data_dir, file_name)
        ds = xr.open_dataset(file_path)

        # Extract 'FRSEAICE' for Northern Alaska region
        sea_ice = ds['FRSEAICE'].sel(lat=slice(*lat_bounds), lon=slice(*lon_bounds))

        # Store data in DataFrame
        df_sea_ice.at[pd.to_datetime(f'{year}-{month}-01'), 'Sea Ice Extent'] = sea_ice.mean().values
        ds.close()

# Create a figure for plotting
fig = go.Figure()

# Plot for each decade
for start_year, end_year in decades:
    # Select the data for the decade
    decade_data = df_sea_ice[(df_sea_ice.index.year >= start_year) & (df_sea_ice.index.year <= end_year)]
    
    # Calculate the 25th and 75th percentiles for each month
    lower_bound = decade_data.groupby(decade_data.index.month).quantile(0.25)
    upper_bound = decade_data.groupby(decade_data.index.month).quantile(0.75)

    # Plot the percentiles
    fig.add_trace(go.Scatter(x=lower_bound.index, y=lower_bound['Sea Ice Extent'], mode='lines', name=f'{start_year}-{end_year} 25th Percentile'))
    fig.add_trace(go.Scatter(x=upper_bound.index, y=upper_bound['Sea Ice Extent'], mode='lines', name=f'{start_year}-{end_year} 75th Percentile', fill='tonexty'))

# Update layout
fig.update_layout(
    title='Sea Ice Extent in Northern Alaska by Decade (1981-2023)',
    xaxis=dict(title='Month', tickmode='array', tickvals=months, showgrid=False),
    yaxis=dict(title='Average Sea Ice Extent', showgrid=False),
    plot_bgcolor='rgba(0,0,0,0)',  # Transparent background
    paper_bgcolor='rgba(0,0,0,0)'
)


fig.show()

### Revamping plot with custom colors

In [24]:
import xarray as xr
import plotly.graph_objects as go
import os
import pandas as pd

# Base directory where the NetCDF files are stored
data_dir = '/Users/casey/Desktop/blue-dot/data/raw'

# Months (1 through 12)
months = list(range(1, 13))

# Decade ranges
decades = [(1981, 1990), (1991, 2000), (2001, 2010), (2011, 2022)]

# Latitude and longitude bounds for Northern Alaska
lat_bounds = [64, 71]
lon_bounds = [-168, -141]

# Initialize DataFrame for sea ice extent
df_sea_ice = pd.DataFrame()

# Process each file in the directory
for file_name in os.listdir(data_dir):
    if file_name.endswith('.nc4'):
        # Extract year and month from file name
        parts = file_name.split('.')
        date_part = parts[-2]  # Format 'YYYYMM'
        year = int(date_part[:4])
        month = int(date_part[4:6])

        file_path = os.path.join(data_dir, file_name)
        ds = xr.open_dataset(file_path)

        # Extract 'FRSEAICE' for Northern Alaska region
        sea_ice = ds['FRSEAICE'].sel(lat=slice(*lat_bounds), lon=slice(*lon_bounds))

        # Store data in DataFrame
        df_sea_ice.at[pd.to_datetime(f'{year}-{month}-01'), 'Sea Ice Extent'] = sea_ice.mean().values
        ds.close()

# Create a figure for plotting
fig = go.Figure()

# Custom fill colors for each decade
decade_fill_colors = ['#b1ffff', '#00ebeb', '#00c4c4', '#009d9d']

# Plot for each decade
for (start_year, end_year), color in zip(decades, decade_fill_colors):
    # Select the data for the decade
    decade_data = df_sea_ice[(df_sea_ice.index.year >= start_year) & (df_sea_ice.index.year <= end_year)]
    
    # Calculate the 25th and 75th percentiles for each month
    lower_bound = decade_data.groupby(decade_data.index.month).quantile(0.25)
    upper_bound = decade_data.groupby(decade_data.index.month).quantile(0.75)

    # Plot the percentiles
    fig.add_trace(go.Scatter(x=lower_bound.index, y=lower_bound['Sea Ice Extent'], mode='lines', line=dict(width=1), name=f'{start_year}-{end_year} 25th Percentile', showlegend=False))
    fig.add_trace(go.Scatter(x=upper_bound.index, y=upper_bound['Sea Ice Extent'], mode='lines', line=dict(width=1), name=f'{start_year}-{end_year} 75th Percentile', fill='tonexty', fillcolor=color, showlegend=False))

# Plot the year 2023
data_2023 = df_sea_ice[df_sea_ice.index.year == 2023].sort_index()
if not data_2023.empty:
    fig.add_trace(go.Scatter(x=data_2023.index.month, y=data_2023['Sea Ice Extent'], mode='lines', line=dict(color='#007676', width=2), name='Year 2023'))

# Update layout
fig.update_layout(
    title='Sea Ice Extent in Northern Alaska by Decade (1981-2023)',
    xaxis=dict(title='Month', tickmode='array', tickvals=months),
    yaxis=dict(title='Average Sea Ice Extent'),
    plot_bgcolor='rgba(255,255,255,1)',  # White background
    paper_bgcolor='rgba(255,255,255,1)'  # White background
)

fig.show()


## Fix colors and add legend

In [66]:
import xarray as xr
import plotly.graph_objects as go
import os
import pandas as pd

# Base directory where the NetCDF files are stored
data_dir = '/Users/casey/Desktop/blue-dot/data/raw'

# Months (1 through 12) and their corresponding names
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
months = list(range(1, 13))

# Decade ranges
decades = [(1981, 1990), (1991, 2000), (2001, 2010), (2011, 2022)]

# Custom line and fill colors for each decade
decade_colors = ['#b1ffff', '#00ebeb', '#00c4c4', '#009d9d']

# Latitude and longitude bounds for Northern Alaska
lat_bounds = [64, 71]
lon_bounds = [-168, -141]

# Initialize DataFrame for sea ice extent
df_sea_ice = pd.DataFrame()

# Process each file in the directory
for file_name in os.listdir(data_dir):
    if file_name.endswith('.nc4'):
        parts = file_name.split('.')
        date_part = parts[-2]
        year = int(date_part[:4])
        month = int(date_part[4:6])

        file_path = os.path.join(data_dir, file_name)
        ds = xr.open_dataset(file_path)

        sea_ice = ds['FRSEAICE'].sel(lat=slice(*lat_bounds), lon=slice(*lon_bounds))
        df_sea_ice.at[pd.to_datetime(f'{year}-{month}-01'), 'Sea Ice Extent'] = sea_ice.mean().values
        ds.close()

# Create a figure for plotting
fig = go.Figure()

# Plot for each decade
for (start_year, end_year), color in zip(decades, decade_colors):
    decade_data = df_sea_ice[(df_sea_ice.index.year >= start_year) & (df_sea_ice.index.year <= end_year)]
    lower_bound = decade_data.groupby(decade_data.index.month).quantile(0.25)
    upper_bound = decade_data.groupby(decade_data.index.month).quantile(0.75)

    fig.add_trace(go.Scatter(x=lower_bound.index, y=lower_bound['Sea Ice Extent'], mode='lines', line=dict(color=color, width=1), name=f'{start_year}-{end_year} 25th Pctl', showlegend=False))
    fig.add_trace(go.Scatter(x=upper_bound.index, y=upper_bound['Sea Ice Extent'], mode='lines', line=dict(color=color, width=1), name=f'{start_year}-{end_year} 75th Pctl', fill='tonexty', fillcolor=color, showlegend=False))

# Plot the year 2023
data_2023 = df_sea_ice[df_sea_ice.index.year == 2023].sort_index()
if not data_2023.empty:
    fig.add_trace(go.Scatter(x=data_2023.index.month, y=data_2023['Sea Ice Extent'], mode='lines', line=dict(color='#007676', width=2), name='Year 2023'))

# Update layout with custom legend
fig.update_layout(
    title='Sea Ice Extent in Northern Alaska by Decade (1981-2023)',
    xaxis=dict(title='Month', tickmode='array', tickvals=months, ticktext=month_names, showgrid=False, tickfont=dict(family='Georgia', size=12)),
    yaxis=dict(title='Average Sea Ice Extent', showgrid=False),
    plot_bgcolor='rgba(0,0,0,0)',  # Transparent background
    paper_bgcolor='rgba(0,0,0,0)',  # Transparent background
    legend=dict(
        title='Legend',
        orientation='v',
        x=1.05,
        y=1,
        xanchor='left',
        yanchor='top'
    )
)

# Manually add custom legend entries
for (start_year, end_year), color in zip(decades, decade_colors):
    fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name=f'{start_year}-{end_year} 25th Pctl', line=dict(color=color, width=10)))
    fig.add_trace(go.Scatter(x=[None], y=[None], mode='lines', name=f'{start_year}-{end_year} 75th Pctl', line=dict(color=color, width=10)))

fig.show()

## delete later

In [67]:
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
fig.update_layout(
    title='',
    xaxis=dict(title='Month', tickmode='array', tickvals=months, showgrid=False, ticktext=month_names, title_font=dict(size=27), tickfont=dict(family = "Arial", size=27)),
    yaxis=dict(title='Sea Ice Extent', showgrid=False, title_font=dict(size=27), tickfont=dict(family = "Arial", size=27)),
    plot_bgcolor='rgba(255,255,255,1)',  # White background
    paper_bgcolor='rgba(255,255,255,1)',
    showlegend=False,
    legend=dict(
        title='Legend',
        orientation='v',
        x=1.05,
        y=1,
        xanchor='left',
        yanchor='top',
        #title_font=dict(size=27)
    )
    )
fig.show()

In [68]:
fig.write_image('/Users/casey/Desktop/images/ice_jank.svg', width = 1250, height = 700)

### Export just shape of the data

In [27]:
fig.update_layout(showlegend=False, xaxis_visible=False, yaxis_visible=False, plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', title='')
fig.show()
fig.write_image('/Users/casey/Desktop/images/sea_ice_no_axes.svg', width = 1250, height = 700)

### Splitting temp data by 25th and 75th percentile

In [27]:
import xarray as xr
import plotly.graph_objects as go
import os
import pandas as pd

# Base directory where the NetCDF files are stored
data_dir = '/Users/casey/Desktop/blue-dot/data/raw'

# Months (1 through 12)
months = list(range(1, 13))

# Decade ranges, starting from 1981
decades = [(1981, 1990), (1991, 2000), (2001, 2010), (2011, 2022)]

# Latitude and longitude bounds for Northern Alaska
lat_bounds = [64, 71]
lon_bounds = [-168, -141]

# Initialize DataFrame for water temperature
df_water_temp = pd.DataFrame()

# Process each file in the directory
for file_name in os.listdir(data_dir):
    if file_name.endswith('.nc4'):
        # Extract year and month from file name
        parts = file_name.split('.')
        date_part = parts[-2]  # Format 'YYYYMM'
        year = int(date_part[:4])
        month = int(date_part[4:6])

        file_path = os.path.join(data_dir, file_name)
        ds = xr.open_dataset(file_path)

        # Extract 'Var_TSKINWTR' for Northern Alaska region
        water_temp = ds['Var_TSKINWTR'].sel(lat=slice(*lat_bounds), lon=slice(*lon_bounds))

        # Store temperature data in DataFrame
        df_water_temp.at[pd.to_datetime(f'{year}-{month}-01'), 'Water Temperature'] = water_temp.mean().values
        ds.close()

# Create a figure for plotting
fig = go.Figure()

# Plot for each decade
for start_year, end_year in decades:
    # Select the data for the decade
    decade_data = df_water_temp[(df_water_temp.index.year >= start_year) & (df_water_temp.index.year <= end_year)]
    
    # Calculate the 25th and 75th percentiles for each month
    lower_bound = decade_data.groupby(decade_data.index.month).quantile(0.25)
    upper_bound = decade_data.groupby(decade_data.index.month).quantile(0.75)

    # Plot the percentiles
    fig.add_trace(go.Scatter(x=lower_bound.index, y=lower_bound['Water Temperature'], mode='lines', line=dict(width=1), name=f'{start_year}-{end_year} 25th Percentile'))
    fig.add_trace(go.Scatter(x=upper_bound.index, y=upper_bound['Water Temperature'], mode='lines', line=dict(width=1), name=f'{start_year}-{end_year} 75th Percentile', fill='tonexty'))

# Plot the year 2023
data_2023 = df_water_temp[df_water_temp.index.year == 2023].sort_index()
if not data_2023.empty:
    fig.add_trace(go.Scatter(x=data_2023.index.month, y=data_2023['Water Temperature'], mode='lines', line=dict(color='#ff6600', width=2), name='Year 2023'))

# Update layout
fig.update_layout(
    title='Decadal Water Temperature Percentiles in Northern Alaska (1981-2023)',
    xaxis=dict(title='Month', tickmode='array', tickvals=months, showgrid=False),
    yaxis=dict(title='Average Water Temperature (°C)', showgrid=False),
    plot_bgcolor='rgba(255,255,255,1)',  # White background
    paper_bgcolor='rgba(255,255,255,1)'
)

fig.show()


## Adding custom colors

In [41]:
import xarray as xr
import plotly.graph_objects as go
import os
import pandas as pd

# Base directory where the NetCDF files are stored
data_dir = '/Users/casey/Desktop/blue-dot/data/raw'

# Months (1 through 12)
months = list(range(1, 13))

# Decade ranges, starting from 1981
decades = [(1981, 1990), (1991, 2000), (2001, 2010), (2011, 2022)]

# Latitude and longitude bounds for Northern Alaska
lat_bounds = [64, 71]
lon_bounds = [-168, -141]

# Initialize DataFrame for water temperature
df_water_temp = pd.DataFrame()

# Process each file in the directory
for file_name in os.listdir(data_dir):
    if file_name.endswith('.nc4'):
        # Extract year and month from file name
        parts = file_name.split('.')
        date_part = parts[-2]  # Format 'YYYYMM'
        year = int(date_part[:4])
        month = int(date_part[4:6])

        file_path = os.path.join(data_dir, file_name)
        ds = xr.open_dataset(file_path)

        # Extract 'Var_TSKINWTR' for Northern Alaska region
        water_temp = ds['Var_TSKINWTR'].sel(lat=slice(*lat_bounds), lon=slice(*lon_bounds))

        # Store temperature data in DataFrame
        df_water_temp.at[pd.to_datetime(f'{year}-{month}-01'), 'Water Temperature'] = water_temp.mean().values
        ds.close()

# Create a figure for plotting
fig = go.Figure()

# Custom line and fill colors for each decade
decade_colors = {
    (1981, 1990): '#ff9dff',
    (1991, 2000): '#ffd0b1',
    (2001, 2010): '#ffc4ff',
    (2011, 2022): '#ffb889'
}

# Plot for each decade
for (start_year, end_year), color in decade_colors.items():
    # Select the data for the decade
    decade_data = df_water_temp[(df_water_temp.index.year >= start_year) & (df_water_temp.index.year <= end_year)]
    
    # Calculate the 25th and 75th percentiles for each month
    lower_bound = decade_data.groupby(decade_data.index.month).quantile(0.25)
    upper_bound = decade_data.groupby(decade_data.index.month).quantile(0.75)

    # Plot the percentiles
    fig.add_trace(go.Scatter(x=lower_bound.index, y=lower_bound['Water Temperature'], mode='lines', line=dict(color=color, width=1), name=f'{start_year}-{end_year} 25th Percentile', showlegend=False))
    fig.add_trace(go.Scatter(x=upper_bound.index, y=upper_bound['Water Temperature'], mode='lines', line=dict(color=color, width=1), name=f'{start_year}-{end_year} 75th Percentile', fill='tonexty', fillcolor=color, showlegend=False))

# Plot the year 2023
data_2023 = df_water_temp[df_water_temp.index.year == 2023].sort_index()
if not data_2023.empty:
    fig.add_trace(go.Scatter(x=data_2023.index.month, y=data_2023['Water Temperature'], mode='lines', line=dict(color='#ff954e', width=2), name='Year 2023'))

# Update layout
fig.update_layout(
    title='',
    xaxis=dict(title='Month', tickmode='array', tickvals=months, showgrid=False),
    yaxis=dict(title='Average Water Temperature (°C)', showgrid=False),
    plot_bgcolor='rgba(0,0,0,0)',  # Transparent background
    paper_bgcolor='rgba(0,0,0,0)'
)

fig.show()


## Adding legend to plot

In [32]:
import xarray as xr
import plotly.graph_objects as go
import os
import pandas as pd

# Base directory where the NetCDF files are stored
data_dir = '/Users/casey/Desktop/blue-dot/data/raw'

# Months (1 through 12)
months = list(range(1, 13))

# Decade ranges, starting from 1981
decades = [(1981, 1990), (1991, 2000), (2001, 2010), (2011, 2022)]

# Latitude and longitude bounds for Northern Alaska
lat_bounds = [64, 71]
lon_bounds = [-168, -141]

# Initialize DataFrame for water temperature
df_water_temp = pd.DataFrame()

# Process each file in the directory
for file_name in os.listdir(data_dir):
    if file_name.endswith('.nc4'):
        # Extract year and month from file name
        parts = file_name.split('.')
        date_part = parts[-2]  # Format 'YYYYMM'
        year = int(date_part[:4])
        month = int(date_part[4:6])

        file_path = os.path.join(data_dir, file_name)
        ds = xr.open_dataset(file_path)

        # Extract 'Var_TSKINWTR' for Northern Alaska region
        water_temp = ds['Var_TSKINWTR'].sel(lat=slice(*lat_bounds), lon=slice(*lon_bounds))

        # Store temperature data in DataFrame
        df_water_temp.at[pd.to_datetime(f'{year}-{month}-01'), 'Water Temperature'] = water_temp.mean().values
        ds.close()

# Create a figure for plotting
fig = go.Figure()

# Custom line and fill colors for each decade
decade_colors = {
    (1981, 1990): '#ff9dff', #pink
    (1991, 2000): '#ffd0b1', #light orange
    (2001, 2010): '#ffc4ff', #medium pink
    (2011, 2022): '#ffb889' #orange
}


# Plot for each decade
for (start_year, end_year), color in decade_colors.items():
    # Select the data for the decade
    decade_data = df_water_temp[(df_water_temp.index.year >= start_year) & (df_water_temp.index.year <= end_year)]
    
    # Calculate the 25th and 75th percentiles for each month
    lower_bound = decade_data.groupby(decade_data.index.month).quantile(0.25)
    upper_bound = decade_data.groupby(decade_data.index.month).quantile(0.75)

    # Plot the percentiles
    fig.add_trace(go.Scatter(x=lower_bound.index, y=lower_bound['Water Temperature'], mode='lines', line=dict(color=color, width=1), name=f'{start_year}-{end_year} 25th Percentile', showlegend=True))
    fig.add_trace(go.Scatter(x=upper_bound.index, y=upper_bound['Water Temperature'], mode='lines', line=dict(color=color, width=1), name=f'{start_year}-{end_year} 75th Percentile', fill='tonexty', fillcolor=color, showlegend=True))

# Plot 2023 separately
data_2023_water_temp = df_water_temp[df_water_temp.index.year == 2023].sort_index()
if not data_2023_water_temp.empty:
    fig.add_trace(go.Scatter(x=data_2023_water_temp.index.month, y=data_2023_water_temp['Water Temperature'], mode='lines', line=dict(color='#ff954e', width=2), name='Year 2023 Water Temp'))

# Update layout with legend
fig.update_layout(
    title='Decadal Water Temperature Percentiles in Northern Alaska (1981-2023)',
    xaxis=dict(title='Month', tickmode='array', tickvals=months, showgrid=False),
    yaxis=dict(title='Average Water Temperature (°C)', showgrid=False),
    plot_bgcolor='rgba(0,0,0,0)',  # Transparent background
    paper_bgcolor='rgba(0,0,0,0)',
    legend=dict(
        title='Legend',
        orientation='v',
        x=1.05,
        y=1,
        xanchor='left',
        yanchor='top',
    )
)

fig.show()



In [46]:
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
fig.update_layout(
    title='',
    xaxis=dict(title='Month', tickmode='array', tickvals=months, showgrid=False, ticktext=month_names, title_font=dict(size=28), tickfont=dict(size=28)),
    yaxis=dict(title='Average Water Temperature (°C)', showgrid=False, title_font=dict(size=28), tickfont=dict(size=28)),
    plot_bgcolor='rgba(255,255,255,1)',  # White background
    paper_bgcolor='rgba(255,255,255,1)', 
    legend=dict(
        title='Legend',
        orientation='v',
        x=1.05,
        y=1,
        xanchor='left',
        yanchor='top',
        title_font=dict(size=28)
    )
)
fig.show()

In [47]:
fig.write_image('/Users/casey/Desktop/temp_axes.svg', width = 1200, height = 700)

## Exporting just shape of data

In [33]:
fig.update_layout(showlegend=False, xaxis_visible=False, yaxis_visible=False, plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)', title='')
fig.show()
fig.write_image('/Users/casey/Desktop/images/temp_no_axes.svg', width = 1250, height = 700)

### Plotting both sea ice and temp on one axes (test plot)

In [61]:
import xarray as xr
import plotly.graph_objects as go
import os
import pandas as pd

# Base directory where the NetCDF files are stored
data_dir = '/Users/casey/Desktop/raw'

# Months (1 through 12) and their corresponding names
month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
months = list(range(1, 13))

# Decade ranges, starting from 1981
decades = [(1981, 1990), (1991, 2000), (2001, 2010), (2011, 2022)]

# Latitude and longitude bounds for Northern Alaska
lat_bounds = [64, 71]
lon_bounds = [-168, -141]

# Initialize DataFrames for sea ice extent and water temperature
df_sea_ice = pd.DataFrame()
df_water_temp = pd.DataFrame()

# Process each file in the directory for sea ice and water temperature
for file_name in os.listdir(data_dir):
    if file_name.endswith('.nc4'):
        # Extract year and month from file name
        parts = file_name.split('.')
        date_part = parts[-2]  # Format 'YYYYMM'
        year = int(date_part[:4])
        month = int(date_part[4:6])

        file_path = os.path.join(data_dir, file_name)
        ds = xr.open_dataset(file_path)

        # Extract 'FRSEAICE' and 'Var_TSKINWTR' for Northern Alaska region
        sea_ice = ds['FRSEAICE'].sel(lat=slice(*lat_bounds), lon=slice(*lon_bounds))
        water_temp = ds['Var_TSKINWTR'].sel(lat=slice(*lat_bounds), lon=slice(*lon_bounds))

        # Store data in respective DataFrames
        df_sea_ice.at[pd.to_datetime(f'{year}-{month}-01'), 'Sea Ice Extent'] = sea_ice.mean().values
        df_water_temp.at[pd.to_datetime(f'{year}-{month}-01'), 'Water Temperature'] = water_temp.mean().values
        ds.close()

# Create a figure for plotting
fig = go.Figure()

# Custom line and fill colors for each decade
sea_ice_colors = ['#b1ffff', '#00ebeb', '#00c4c4', '#009d9d']
water_temp_colors = {
    (1981, 1990): '#ff9dff',
    (1991, 2000): '#ffd0b1',
    (2001, 2010): '#ffc4ff',
    (2011, 2022): '#ffb889'
}

# Plot for each decade - Sea Ice Extent
for (start_year, end_year), color in zip(decades, sea_ice_colors):
    # Select the data for the decade
    decade_data = df_sea_ice[(df_sea_ice.index.year >= start_year) & (df_sea_ice.index.year <= end_year)]
    
    # Calculate the 25th and 75th percentiles for each month
    lower_bound = decade_data.groupby(decade_data.index.month).quantile(0.25)
    upper_bound = decade_data.groupby(decade_data.index.month).quantile(0.75)

    # Plot the percentiles
    fig.add_trace(go.Scatter(x=lower_bound.index, y=lower_bound['Sea Ice Extent'], mode='lines', line=dict(color=color, width=1), name=f'{start_year}-{end_year} 25th Percentile Sea Ice', showlegend=False))
    fig.add_trace(go.Scatter(x=upper_bound.index, y=upper_bound['Sea Ice Extent'], mode='lines', line=dict(color=color, width=1), name=f'{start_year}-{end_year} 75th Percentile Sea Ice', fill='tonexty', fillcolor=color, showlegend=False))

# Plot for each decade - Water Temperature
for (start_year, end_year), color in water_temp_colors.items():
    # Select the data for the decade
    decade_data = df_water_temp[(df_water_temp.index.year >= start_year) & (df_water_temp.index.year <= end_year)]
    
    # Calculate the 25th and 75th percentiles for each month
    lower_bound = decade_data.groupby(decade_data.index.month).quantile(0.25)
    upper_bound = decade_data.groupby(decade_data.index.month).quantile(0.75)

    # Plot the percentiles
    fig.add_trace(go.Scatter(x=lower_bound.index, y=lower_bound['Water Temperature'], mode='lines', line=dict(color=color, width=1), name=f'{start_year}-{end_year} 25th Percentile Water Temp', yaxis='y2', showlegend=False))
    fig.add_trace(go.Scatter(x=upper_bound.index, y=upper_bound['Water Temperature'], mode='lines', line=dict(color=color, width=1), name=f'{start_year}-{end_year} 75th Percentile Water Temp', fill='tonexty', fillcolor=color, yaxis='y2', showlegend=False))

# Plot the year 2023 for both data types
data_2023_sea_ice = df_sea_ice[df_sea_ice.index.year == 2023].sort_index()
data_2023_water_temp = df_water_temp[df_water_temp.index.year == 2023].sort_index()
if not data_2023_sea_ice.empty:
    fig.add_trace(go.Scatter(x=data_2023_sea_ice.index.month, y=data_2023_sea_ice['Sea Ice Extent'], mode='lines', line=dict(color='#007676', width=2), showlegend=False))
if not data_2023_water_temp.empty:
    fig.add_trace(go.Scatter(x=data_2023_water_temp.index.month, y=data_2023_water_temp['Water Temperature'], mode='lines', line=dict(color='#ff954e', width=2), yaxis='y2', showlegend=False))


# Update layout
fig.update_layout(
    title='',
    xaxis=dict(title='Month', tickmode='array', tickvals=months, ticktext=month_names, title_font=dict(size=16), tickfont=dict(size=16)),
    yaxis=dict(title='Average Sea Ice Extent', title_font=dict(size=16), tickfont=dict(size=16)),
    yaxis2=dict(title='Average Water Temperature (°C)', overlaying='y', side='right', title_font=dict(size=16), tickfont=dict(size=16)),
    plot_bgcolor='rgba(255,255,255,1)',  # White background
    paper_bgcolor='rgba(255,255,255,1)'  # White background
)

fig.show()
