# Plotting Sentinel Satellite data Frequency And Summary Statistics Over Antarctica

## Instructions
- Sentinel Data is Downloaded using the download_metadata.ipynb notebook

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import datetime
import shapely.wkt
import geopandas as gpd
import cartopy.crs as ccrs
import cartopy
import rasterio
from rasterio.features import rasterize
import collections
import json
import os
import pandas as pd
import math
import matplotlib
from shapely.strtree import STRtree
import datetime

from fiona.transform import transform_geom
from shapely.geometry import mapping, shape
from shapely.ops import polygonize_full, unary_union

import shared_functions as sf


# Settings

In [None]:
MAX_DATE = datetime.datetime.strptime(
    '16/06/23', '%d/%m/%y')
N_POINTS = 500 # number of spatial points to identify intersection of passes
roi_shape = 'shapefiles/50south_excl_argentina_falkand_mid.geojson'

# Functions

## Data Engineering

In [None]:
def add_metadata(df, date_col='datatakesensingstart', max_date='', CRS=3031):
    
    df['geometry'] = df['footprint'].apply(lambda x : shapely.wkt.loads(x))
    df['geometry_4326'] = df['geometry'].copy()
    df[date_col] = df[date_col].apply(lambda x : str(x).split('.')[0])
    df[date_col] = pd.to_datetime(df[date_col], format="%Y-%m-%d %H:%M:%S")
    df['month'] = df[date_col].dt.month
    df['month_name'] = df[date_col].dt.month_name()
    df['year'] = df[date_col].dt.year
    df['sat_id'] = df['identifier'].apply(lambda x : x.split('_')[0])
    if max_date:
        df = df[df[date_col] < max_date]
    df = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326")
    df = df.to_crs(CRS)
    return df

def filter_results_with_geojson(df, filename, plot=False):

    gdf_inclusion = gpd.read_file(filename).set_crs(4326)
    l1 = len(df)
    df = df[df['geometry_4326'].apply(lambda x : x.intersects(gdf_inclusion.geometry.values[0]))]
    print(f'{l1 - len(df)} products have been removed')

    if plot:
        plt.rcParams["figure.figsize"] = [10,8]
        ax = plt.axes(projection=ccrs.PlateCarree(), title='Product Search Area - 50deg south excl. South America and Falkland Islands')
        ax.add_feature(cartopy.feature.LAND)
        ax.add_feature(cartopy.feature.OCEAN)
        ax.add_geometries(gdf_inclusion.geometry, crs=ccrs.PlateCarree(), alpha=0.7)

    return df

def plot_results_footprint_map(df, title=''):
    # plot the the product geometries on a map
    east, west, south, north = -180, 180, -90, -50
    plt.rcParams["figure.figsize"] = [10,8]
    ax = plt.axes(projection=ccrs.SouthPolarStereo())
    ax.set_extent((east, west, south, north+1), ccrs.PlateCarree())
    ax.add_feature(cartopy.feature.LAND)
    ax.add_feature(cartopy.feature.OCEAN)
    ax.add_geometries(df.geometry, crs=ccrs.SouthPolarStereo(), alpha=0.3)
    ax.gridlines(draw_labels=True)
    plt.show()

def wkt_to_geojson(wkt, filename):
    t = shapely.wkt.loads(wkt)
    t = shapely.to_geojson(t)
    gdf = gpd.read_file(t, driver='GeoJSON')
    gdf.set_crs(crs='EPSG:4326')
    gdf.to_file(filename, driver='GeoJSON')

## Summary Statistics

In [None]:
def summarise_s1(df_s1):
    # read in dataframe S1
    print(f'Summarising Sentinel-1 ({df_s1.ingestiondate.min()} to {df_s1.ingestiondate.max()})')
    df_s1['sizeGB'] = df_s1['size'].apply(lambda x : 
                                            float(x.split(' ')[0]) if 'GB' in x else
                                            float(x.split(' ')[0])/1000 if 'MB' in x else 
                                            float(x.split(' ')[0])/1E6 if 'KB' in x else
                                            float(x.split(' ')[0])*1000 if 'TB' in x else
                                            '?')
    
    df_s1['sizeTB'] = df_s1['sizeGB']/1000
    # summary of products
    df_s1_sum = df_s1.groupby(['sensoroperationalmode','polarisationmode']).aggregate(
        start = ('ingestiondate','min'),
        end = ('ingestiondate','max'),
        product_count=('sizeGB','count'),
        size_GB=('sizeGB','sum'),
        size_TB=('sizeTB','sum'),
    ).reset_index()
    df_s1_sum['start'] = pd.to_datetime(df_s1_sum['start']).dt.date
    df_s1_sum['end'] = pd.to_datetime(df_s1_sum['end']).dt.date
    df_s1_sum.loc["Total"] = df_s1_sum.sum(numeric_only=True)
    df_s1_sum = df_s1_sum.fillna('').round(1)
    df_s1_sum['product_count'] = df_s1_sum['product_count'].astype(int)
    return df_s1_sum

def year_month_product_summary(df):

    # read in dataframe S2
    print(f'Summarising ({df.ingestiondate.min()} to {df.ingestiondate.max()})')
    df[['ingestiondate','year','month','month_name','platformname','producttype','size']]
    df['sizeGB'] = df['size'].apply(lambda x : 
                                            float(x.split(' ')[0]) if 'GB' in x else
                                            float(x.split(' ')[0])/1000 if 'MB' in x else 
                                            float(x.split(' ')[0])/1E6 if 'KB' in x else
                                            float(x.split(' ')[0])*1000 if 'TB' in x else
                                            '?')
    df['sizeMB'] = df['sizeGB']*1000
    df['sizeTB'] = df['sizeGB']/1000

    # monthly summary
    df_m_sum = df.groupby(['year','month']).aggregate(
        month_name=('month_name','first'),
        product_count=('sizeGB','count'),
        size_GB=('sizeGB','sum'),
        size_TB=('sizeTB','sum'),
    ).reset_index().sort_values(['year','month'])

    df_m_sum.loc["Total"] = df_m_sum.sum(numeric_only=True).drop(['month'])
    df_m_sum = df_m_sum.fillna('').round(1)
    df_m_sum['product_count'] = df_m_sum['product_count'].astype(int)

    # Annual summary
    df_y_sum = df.groupby(['year']).aggregate(
        product_count=('sizeGB','count'),
        size_MB=('sizeMB','sum'),
        size_GB=('sizeGB','sum'),
        size_TB=('sizeTB','sum'),
    ).reset_index().sort_values(['year'])

    df_y_sum.loc["Total"] = df_y_sum.sum(numeric_only=True)
    df_y_sum = df_y_sum.fillna('').round(1)
    df_y_sum['product_count'] = df_y_sum['product_count'].astype(int)
    df_y_sum.loc['Total','year'] = ''
    df_y_sum['avg_size_MB'] = df_y_sum['size_MB']/df_y_sum['product_count']

    return df_m_sum, df_y_sum
    

## Plotting - Timeseries Product Count

In [None]:

def plot_timeseries_products(df, title='',stack_col='sat_id', date_col='beginposition',count_freq='7D', plot_freq='1M'):
    import seaborn as sns
    sns.set_theme()
    sns.set(rc={'figure.figsize':(10,2)})
    df['round_time'] = df[date_col].dt.round(count_freq)
    c = df[['round_time',stack_col,'filename']].groupby(['round_time',stack_col]).count().reset_index()
    c = c.pivot(index='round_time', columns=stack_col, values='filename').fillna(0)
    c = c.resample(plot_freq).max()
    ax = c.plot(kind='bar', stacked=True, width=1)
    import matplotlib.ticker as ticker
    ticklabels = ['']
    for i in range(1,len(c.index)):
        ticklabels.append('') if c.index[i].year == c.index[i-1].year else ticklabels.append(c.index[i].year)
    ax.xaxis.set_major_formatter(ticker.FixedFormatter(ticklabels))
    ax.set_xlabel('')
    plt.xticks(rotation = 45, fontsize='10')
    plt.yticks(fontsize='10')
    plt.legend(title='')
    plt.title(title)
    set(ticklabels)
    sns.reset_orig()
    plt.show()

## Plotting - Product Coverage
- Functions to plot data by coverage. E.g. based on the geometries of each product. This does not account for duplicated data that can occur based on the gridding of products (e.g. overlapping of MGRS tiles)

In [None]:
def get_freq_and_cloud_raster(df, shape=(1000,1000)):
    # convert each polygon of the of the datasets into a raster image (1's for data, 0's for non-data)
    # and progressively add them all up, so the places with 1's data will accumulate into a frequency count
    # replace the zeroes with nans to mask them out from the plot
    crs = ccrs.SouthPolarStereo()
    bounds = crs.boundary.bounds
    transform = rasterio.transform.from_bounds(*bounds, *shape)
    
    freq_raster = np.zeros(shape)
    cc_raster = np.zeros(shape) # approximate cloud cover perc for sentinel 2
    for i in range(0,len(df)):
        polygon = df['geometry'].iloc[i]
        freq_raster += rasterize([(polygon, 1)], out_shape=shape, transform=transform)
        if df['platformname'].iloc[0] == "Sentinel-2":
            cc = df['cloudcoverpercentage'].iloc[i]
            cc_raster += rasterize([(polygon, cc)], out_shape=shape, transform=transform)

    #average the cc percentage
    cc_raster = cc_raster/freq_raster
    #mask out where nodata
    cc_raster[freq_raster==0] = np.nan
    freq_raster[freq_raster==0] = np.nan
    return freq_raster, cc_raster

def plot_frequency(df, title='', cbar_label='Frequency', cloud=False, shape=(1000,1000)):

    freq_raster, cc_raster = get_freq_and_cloud_raster(df, shape=shape)
    raster = cc_raster if cloud else freq_raster

    crs = ccrs.SouthPolarStereo()
    bounds = crs.boundary.bounds
    east, west, south, north = -180, 180, -90, -50
    transform = rasterio.transform.from_bounds(*bounds, *shape)
    
    plt.rcParams["figure.figsize"] = [10,8]
    ax = plt.axes(projection=ccrs.SouthPolarStereo())
    ax.set_extent((east, west, south, north+1), ccrs.PlateCarree())
    ax.add_feature(cartopy.feature.LAND)
    ax.add_feature(cartopy.feature.OCEAN)
    color = ax.imshow(raster, origin="upper", extent=(bounds[0], bounds[2], bounds[1], bounds[3]), transform=ccrs.SouthPolarStereo())
    ax.add_feature(cartopy.feature.COASTLINE)
    cbar_max = int(raster[raster>-1].max())
    plt.colorbar(color, ticks=np.linspace(0, cbar_max, 10, dtype=int), label=cbar_label)
    ax.gridlines(draw_labels=True)
    plt.title(title)
    return ax
    
def plot_frequency_side(raster, title='', cbar_label='Frequency', shape = (1000, 1000)):

    crs = ccrs.SouthPolarStereo()
    bounds = crs.boundary.bounds
    transform = rasterio.transform.from_bounds(*bounds, *shape)
    east, west, south, north = -180, 180, -90, -50
    
    plt.rcParams["figure.figsize"] = [14,3]
    ax = plt.axes(projection=ccrs.SouthPolarStereo())
    ax.set_extent((0, 180, -90, -50), ccrs.PlateCarree())
    ax.add_feature(cartopy.feature.LAND)
    ax.add_feature(cartopy.feature.OCEAN)
    color = ax.imshow(raster, origin="upper", extent=(bounds[0], bounds[2], bounds[1], bounds[3]), transform=ccrs.SouthPolarStereo())
    cbar_max = int(raster[raster>-1].max())
    plt.colorbar(color, ticks=np.linspace(0, cbar_max, 10, dtype=int), pad=0.1, label=cbar_label)
    ax.gridlines(draw_labels=True)
    plt.title(title)
    return ax

def plot_multiple_frequency(df, group, sort_group='', title='', n_cols=2, cloud=False, cbar_label='Pass Frequency', shape=(1000,1000)):
    """Plot multiple graphs on same figures based on the multi levelled
        dictionary passed. E.g. Products grouped by month or year

    Args:
        data (dict):
        title (str): large title for plot
        n_rows: number of rows
    """
    sort_group = group if not sort_group else sort_group
    df = df.sort_values(sort_group)

    # calculate the size of the figure
    n = df[group].nunique()
    n_rows = math.ceil(n/n_cols)

    # using the variable axs for multiple Axes
    fig, ax = plt.subplots(nrows=n_rows, ncols=n_cols,
                        subplot_kw={'projection': ccrs.SouthPolarStereo()},
                        figsize=(n_cols*6,n_rows*5.5))
    
    # first create the plot raster for each category
    # we can therefore get color to scale for all figures
    raster_dict = {}
    max_freq = 0
    min_freq = 0
    for cat in df[group].unique():
        cat_data = df[df[group]==cat]
        # get the frequency raster for each cat
        freq_raster, cc_raster = get_freq_and_cloud_raster(cat_data, shape=shape)
        if not cloud:
            raster = freq_raster
        else:
            raster = cc_raster
        #r_max = int(raster[raster>-1].max()) # max
        r_max = int(np.percentile(raster[raster>-1],98)) #percentile 
        max_freq = r_max if (r_max > max_freq) else max_freq
        r_min = int(raster[raster>-1].min()) 
        min_freq = r_min if (r_min < min_freq) else min_freq
        raster_dict[cat] = raster
    
    crs = ccrs.SouthPolarStereo()
    bounds = crs.boundary.bounds
    east, west, south, north = -180, 180, -90, -50
    
    # iterate through the product dict
    count = 0
    print(df[group].unique())
    for cat in df[group].unique():
        r = math.floor((count)/n_cols)
        c = count % n_cols
        ax_i = (r,c) if ((n_cols > 1) and (n_rows>1)) else count # only a single index needed if no cols
        # get product data for each catagory
        n_products = len(df[df[group]==cat])
        raster = raster_dict[cat]
        ax[ax_i].set_extent((east, west, south, north), ccrs.PlateCarree())
        ax[ax_i].add_feature(cartopy.feature.LAND)
        ax[ax_i].add_feature(cartopy.feature.OCEAN)
        ax[ax_i].title.set_text(f'{cat} ({n_products:,} products)')
        color = ax[ax_i].imshow(raster, 
                                origin="upper", 
                                extent=(bounds[0], bounds[2], bounds[1], bounds[3]), 
                                transform=ccrs.SouthPolarStereo(),
                                vmin=min_freq,
                                vmax=max_freq
                                )
        gl = ax[ax_i].gridlines(draw_labels=True)
        ax[ax_i].add_feature(cartopy.feature.COASTLINE)
        gl.xlabel_style['rotation']= 0
        gl.xlabel_style['ha']= 'center'
        gl.xlabel_style['va']= 'center'
        count += 1

    # add the colorbar for all plots
    plt.tight_layout()
    cbar_ax = fig.add_axes([0.05, -0.03, 0.9, 0.02])
    fig.colorbar(color, cax=cbar_ax, label=cbar_label, orientation="horizontal")
    plt.suptitle(title, y=1.03, fontsize='x-large')

    #delete subplot if uneven number
    if count != (n_rows*n_cols):
        if n_cols > 1:
            fig.delaxes(ax[n_rows-1,n_cols-1])
        else:
            fig.delaxes(ax[n])

## Plotting - Point Based Coverage
- This plotting method uses randomly generated points to determine the number of unique passes over a given location. i.e. it accounts for the duplication of data from overlapping products

In [None]:
def generate_points_in_bounds(filename, number, plot=True): 
    """ function to add a fixed number of points in a polygon. Points are spread equally across
    latitudes and longitudes. TODO improve points generation spatially (i.e. the southern most
    latitides have a smaller area than higher up the earth)
    """  
    from shapely.geometry import Point, Polygon
    import itertools

    #open the spatial AOI
    roi_gdf = gpd.read_file(filename)

    minx, miny, maxx, maxy = roi_gdf.geometry.bounds.values[0]
    xs = np.linspace(minx, maxx, num=number)
    ys = np.linspace(miny, maxy, num=number)
    #scale number for y
    y_number = int(number * (abs(miny-maxy)/abs(minx-maxx)))
    ys = np.linspace(miny, maxy, num=y_number)
    points = list(itertools.product(xs, ys))
    print(len(points))


    df = pd.DataFrame()
    df['points'] = points
    df['points'] = df['points'].apply(Point)
    points_gdf = gpd.GeoDataFrame(df, geometry='points', crs="EPSG:4326")
    Sjoin = gpd.tools.sjoin(points_gdf, roi_gdf, predicate="within", how='left')
    # # Keep points in "myPoly"
    points_in_poly = Sjoin.intersection(roi_gdf['geometry'].values[0]).to_crs(3031)

    # Plot result
    if plot:
        base = roi_gdf['geometry'].plot(linewidth=1, edgecolor="black")
        points_in_poly.plot(ax=base, linewidth=1, color="black", markersize=8)
        plt.show()
        
    return points_in_poly

def calculate_valid_intersections(df, points_df, date_col='ingestiondate', min_revisit_time=10, plot=False):

    print('Finding point based intersections for plotting')
    # Build spatial index
    spatial_index = STRtree(df.geometry)
    # Create an empty GeoDataFrame to store the intersections for the current polygon
    intersections = []
    # Iterate over each polygon in the DataFrame
    from tqdm import tqdm
    for p in tqdm(points_df):
        # Find potential intersecting polygons using the spatial index
        potential_intersections = spatial_index.query(p)
        # Check for intersections with potential polygons
        for j in potential_intersections:
             if p.intersects(df.geometry.iloc[j]):
                row_data = df.iloc[j]
                date = row_data[date_col]
                if row_data.platformname == 'Sentinel-2':
                    cc = row_data.cloudcoverpercentage
                    sensoropmode = 'MSI'
                    polarisation = 'NA'
                    if df['producttype'].values[0] == 'S2MSI2A':
                        tileid = row_data.identifier.split('_')[5]
                    else:
                        tileid = row_data.tileid
                    
                else:
                    cc = 0
                    tileid=''
                    sensoropmode = row_data.sensoroperationalmode
                    polarisation = row_data.polarisationmode
                intersections.append({
                    'intersectionpoint':p,
                    'intersectionpointstr':str(p),
                    'intersectionpoint_x':p.x,
                    'intersectionpoint_y':p.y,
                    date_col: row_data[date_col],
                    'cloudcoverpercentage': cc,
                    'polygon': row_data.geometry,
                    'tileid': tileid,
                    'relativeorbitnumber': row_data.relativeorbitnumber,
                    'orbitdirection': row_data.relativeorbitnumber,
                    'orbitnumber': row_data.relativeorbitnumber,
                    'month_name': row_data.month_name,
                    'month': row_data.month,
                    'year': row_data.year,
                    'sensoroperationalmode': sensoropmode,
                    'polarisationmode': polarisation,
                })

    #covert to dataframe 
    intersections = pd.DataFrame.from_dict(intersections)

    # sort by point and date aquired
    intersections = intersections.sort_values(by=['intersectionpointstr',date_col])

    # calculate the days between observations at a given point
    # replace na / first datapoint with placeholder revisit time
    placeholder_dt = datetime.timedelta(days=5)
    intersections['timebetweenobs'] = (intersections[date_col] - intersections[date_col].shift(1)).fillna(placeholder_dt)
    
    # The first observation for each point does not have another time to compare
    intersections[intersections['intersectionpointstr']!=intersections['intersectionpointstr'].shift(1)]['timebetweenobs'] = placeholder_dt
    intersections['daysbetweenobs'] = intersections['timebetweenobs'].dt.days.astype(float)
    intersections['minutesbetweenobs'] = intersections['timebetweenobs'].dt.total_seconds().astype(int)/60
    
    # keep only valid revisit times, 
    # i.e. when a point is measure at least 'min_revisit_time' minutes apart
    intersections = intersections[intersections['minutesbetweenobs']>min_revisit_time]
    intersections = gpd.GeoDataFrame(intersections, geometry='intersectionpoint', crs="EPSG:3031")

    # sort by point and date aquired
    intersections = intersections.sort_values(by=['intersectionpointstr',date_col])

    if plot:
        heatmap, xedges, yedges = np.histogram2d(
        intersections.intersectionpoint_x, 
        intersections.intersectionpoint_y, 
        bins=100)
        extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
        print(extent)

        plt.clf()
        plt.title('Heatmap (not freq count)')
        plt.imshow(heatmap.T, extent=extent, origin='lower')
        plt.colorbar(orientation='horizontal')
        plt.show()

    return intersections

def summarise_intersections(intersections, group=''):

    if group:
        groupby = ['intersectionpoint',group]
    else:
        groupby = ['intersectionpoint']
    
    intrsection_summary = intersections.groupby(groupby).agg(
        revisit_count=('intersectionpointstr','count'),
        mean_revisit_mins=('minutesbetweenobs','mean'),
        median_revisit_mins=('minutesbetweenobs','median'),
        mean_cloudcover=('cloudcoverpercentage','mean'),
        median_cloudcover=('cloudcoverpercentage','median'),
    ).reset_index()
    
    return intrsection_summary

def plot_intersection_frequency(intersections, plot_var, title='', cbar_label='',shape=(1000,1000), force_max_val='', cmap='viridis', fixed_vals=False, repeat_orbits=False):
    
    # summarise the intersections
    intrsection_summary = summarise_intersections(intersections)
    
    crs = ccrs.SouthPolarStereo()
    bounds = crs.boundary.bounds
    transform = rasterio.transform.from_bounds(*bounds, *shape)

    freq_raster = np.zeros(shape)
    for i,row in intrsection_summary.iterrows():
        point = row.intersectionpoint
        count = row[plot_var]
        new_rast = rasterize([(point, count)], out_shape=shape, transform=transform)
        # replace the value where higher - i.e. get max value per pixel
        r,c = np.where(new_rast>freq_raster)
        freq_raster[(r,c)] = new_rast[(r,c)]

    # plot the raster
    freq_raster[freq_raster==0] = np.nan
    raster = freq_raster #cc_raster if cloud else freq_raster

    if force_max_val:
        raster[raster>force_max_val] = force_max_val

    if fixed_vals:
        raster[raster<5] = 5
        raster[(raster>5) & (raster <= 10)] = 10
        raster[(raster>10) & (raster <= 20)] = 15
        raster[(raster>20) & (raster <= 40)] = 20
        raster[(raster>=40)] = 25

    crs = ccrs.SouthPolarStereo()
    bounds = crs.boundary.bounds
    east, west, south, north = -180, 180, -90, -50
    transform = rasterio.transform.from_bounds(*bounds, *shape)
    
    plt.rcParams["figure.figsize"] = [10,8]
    ax = plt.axes(projection=ccrs.SouthPolarStereo())
    ax.set_extent((east, west, south, north+1), ccrs.PlateCarree())
    ax.add_feature(cartopy.feature.LAND)
    ax.add_feature(cartopy.feature.OCEAN)
    color = ax.imshow(raster, origin="upper", extent=(bounds[0], bounds[2], bounds[1], bounds[3]), transform=ccrs.SouthPolarStereo(), cmap=cmap)
    ax.add_feature(cartopy.feature.COASTLINE)

    if fixed_vals:
        # fixed colors
        import matplotlib.patches as mpatches
        values = np.unique(raster.ravel())
        colors = [color.cmap(color.norm(value)) for value in sorted(values)]
        # create a patch (proxy artist) for every color 
        patches = []
        labels = ['<5','5 to 10','10 to 20','20 to 40','>40']
        print(values[0:-1])
        for i in range(len(values[0:-1])):
            patches.append(mpatches.Patch(color=colors[i], label=labels[i].format(l=values[i])))
        # put those patched as legend-handles into the legend
        plt.legend(handles=patches, loc='upper right', title='Observations')
    else:
        cbar_max = int(raster[raster>-1].max())
        plt.colorbar(color, ticks=np.linspace(0, cbar_max, 10, dtype=int), label=cbar_label)

    ax.gridlines(draw_labels=True)
    plt.title(title)
    plt.show()

def plot_multiple_intersection_frequency(intersections, plot_var, group, n_cols=2, title='', cbar_label='',shape=(1000,1000)):
    
    # summarise the intersections
    intrsection_summary = summarise_intersections(intersections, group=group)
    
    crs = ccrs.SouthPolarStereo()
    bounds = crs.boundary.bounds
    transform = rasterio.transform.from_bounds(*bounds, *shape)

     # calculate the size of the figure
    n = intersections[group].nunique()
    print(n)
    n_rows = math.ceil(n/n_cols)

    # using the variable axs for multiple Axes
    fig, ax = plt.subplots(nrows=n_rows, ncols=n_cols,
                        subplot_kw={'projection': ccrs.SouthPolarStereo()},
                        figsize=(n_cols*5,n_rows*4.5))
    
    # product count by group to get max and min for colorbar
    minima = intrsection_summary[plot_var].min()
    maxima = intrsection_summary[plot_var].max()

    east, west, south, north = -180, 180, -90, -50
    
    # colorbar
    import matplotlib.cm as cm
    norm = matplotlib.colors.Normalize(vmin=minima, vmax=maxima, clip=True)
    mapper = cm.ScalarMappable(norm=norm, cmap=cm.viridis)
    count = 0

    groups = intersections[group].unique()
    if 'January' in groups:
        print('Months')
        groups = ['January','February','March','April','May','June','July','August','September','October','November','December']
    else:
        print('Years')
        groups = sorted(groups)
    print(groups)
    for g in groups:
        g_intrsection_summary = intrsection_summary[intrsection_summary[group]==g]
        raster = np.zeros(shape)
        for i,row in g_intrsection_summary.iterrows():
            
            # data
            point = row.intersectionpoint
            plot_val = row[plot_var]
            new_rast = rasterize([(point, plot_val)], out_shape=shape, transform=transform)
            
            # replace the value where higher - i.e. get max value per pixel
            r_,c_ = np.where(new_rast>raster)
            raster[(r_,c_)] = new_rast[(r_,c_)]

        raster[raster==0] = np.nan
        r = math.floor((count)/n_cols)
        c = count % n_cols
        ax_i = (r,c) if ((n_cols > 1) and (n_rows>1)) else count # only a single index needed if no cols
        # get product data for each catagory
        #n_products = g_intrsection_summary
        ax[ax_i].set_extent((east, west, south, north), ccrs.PlateCarree())
        ax[ax_i].add_feature(cartopy.feature.LAND)
        ax[ax_i].add_feature(cartopy.feature.OCEAN)
        ax[ax_i].title.set_text(f'{g}')
        color = ax[ax_i].imshow(raster, 
                                origin="upper", 
                                extent=(bounds[0], bounds[2], bounds[1], bounds[3]), 
                                transform=ccrs.SouthPolarStereo(),
                                vmin=minima,
                                vmax=maxima
                                )
        ax[ax_i].add_feature(cartopy.feature.COASTLINE)
        gl = ax[ax_i].gridlines(draw_labels=True)
        gl.xlabel_style['rotation']= 0
        gl.xlabel_style['ha']= 'center'
        gl.xlabel_style['va']= 'center'
        count += 1

    # add the colorbar for all plots
    plt.tight_layout()
    cbar_ax = fig.add_axes([0.05, -0.05, 0.9, 0.04])
    plt.colorbar(mapper, cax=cbar_ax, label=cbar_label, orientation="horizontal")
    plt.suptitle(title, y=1.03, fontsize='x-large')

    #delete subplot if uneven number
    if count != (n_rows*n_cols):
        if n_cols > 1:
            fig.delaxes(ax[n_rows-1,n_cols-1])
        else:
            fig.delaxes(ax[n])


## Plotting - MGRS Grid Based

In [None]:
def plot_mgrs_product_count(df, title='', cbar_label='Count of Products'):

    #read in the mgrs tile 
    gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
    df_mgrs = gpd.read_file('S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml', driver='KML')
    #df_mgrs = df_mgrs.set_crs(4326).to_crs(3031)

    # create a tile id identifier for level2
    if df['producttype'].values[0] == 'S2MSI2A':
        df['tileid'] = df['identifier'].apply(lambda x : x.split('_')[5])
        #'identifier': 'S2B_MSIL2A_20190218T155329_N0211_R025_T03CVK_20190218T211152',
    
    # product count
    prod_count = df.groupby(['tileid'])['filename'].count().reset_index() 
    tile_count = prod_count.merge(df_mgrs, right_on='Name', left_on='tileid')
    tile_count = tile_count.rename(columns={'filename':'filecount'})
    tile_count = tile_count.sort_values('filecount')

    crs = ccrs.PlateCarree()
    shape = 1000, 1000
    bounds = crs.boundary.bounds
    east, west, south, north = -180, 180, -90, -50
    transform = rasterio.transform.from_bounds(*bounds, *shape)

    import matplotlib.cm as cm

    minima = tile_count.filecount.min()
    maxima = tile_count.filecount.max()

    norm = matplotlib.colors.Normalize(vmin=minima, vmax=maxima, clip=True)
    mapper = cm.ScalarMappable(norm=norm, cmap=cm.viridis)

    plt.rcParams["figure.figsize"] = [10,8]
    ax = plt.axes(projection=ccrs.SouthPolarStereo())
    ax.set_extent((east, west, south, north+1), ccrs.PlateCarree())
    ax.add_feature(cartopy.feature.LAND)
    ax.add_feature(cartopy.feature.OCEAN)
    g = [t.geoms[0] for t in tile_count.geometry.values]
    count_vals = tile_count.filecount.values
    c = []
    for v in count_vals:
        c.append(mapper.to_rgba(v))
    ax.gridlines(draw_labels=True)
    ax.add_geometries(g, crs=ccrs.PlateCarree(), alpha=1, facecolor=c)
    ax.add_feature(cartopy.feature.COASTLINE)
    plt.colorbar(mapper, label=cbar_label)
    plt.title(title)
    plt.show()

def plot_multiple_product_count(df, group, title='', n_cols=2, cbar_label='Count of Products'):

    #read in the mgrs tile 
    gpd.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw'
    df_mgrs = gpd.read_file('S2A_OPER_GIP_TILPAR_MPC__20151209T095117_V20150622T000000_21000101T000000_B00.kml', driver='KML')
    #df_mgrs = df_mgrs.set_crs(4326).to_crs(3031)

    # create a tile id identifier for level2
    if df['producttype'].values[0] == 'S2MSI2A':
        df['tileid'] = df['identifier'].apply(lambda x : x.split('_')[5][1:])
        #'identifier': 'S2B_MSIL2A_20190218T155329_N0211_R025_T03CVK_20190218T211152',
    
    # calculate the size of the figure
    n = df[group].nunique()
    n_rows = math.ceil(n/n_cols)

    # using the variable axs for multiple Axes
    fig, ax = plt.subplots(nrows=n_rows, ncols=n_cols,
                        subplot_kw={'projection': ccrs.SouthPolarStereo()},
                        figsize=(n_cols*5,n_rows*4.5))
    
    # product count by group to get max and min for colorbar
    prod_counts = df.groupby([group,'tileid'])['filename'].count().reset_index()
    minima = prod_counts.filename.min()
    maxima = prod_counts.filename.max()
    import matplotlib.cm as cm

    norm = matplotlib.colors.Normalize(vmin=minima, vmax=maxima, clip=True)
    mapper = cm.ScalarMappable(norm=norm, cmap=cm.viridis)
    count = 0
    
    plot_grps = df[group].unique()
    try:
        # sort if numeric
        int(plot_grps[0])
        plot_grps = sorted(plot_grps)
    except:
        ...

    if 'January' in plot_grps:
        print('Trying to plot')
        plot_grps = ['January','February','March','April','May','June','July','August','September','October','November','December']
    for grp in plot_grps:
        grp_df = df[df[group] == grp]
        tile_count = grp_df.groupby([group,'tileid'])['filename'].count().reset_index()
        tile_count = tile_count.merge(df_mgrs, right_on='Name', left_on='tileid')
        tile_count = tile_count.rename(columns={'filename':'filecount'})
        n_products = tile_count['filecount'].sum()
        tile_count = tile_count.sort_values('filecount')
        east, west, south, north = -180, 180, -90, -50
        r = math.floor((count)/n_cols)
        c = count % n_cols
        ax_i = (r,c) if ((n_cols > 1) and (n_rows>1)) else count # only a single index needed if no cols
        ax[ax_i].set_extent((east, west, south, north), ccrs.PlateCarree())
        ax[ax_i].add_feature(cartopy.feature.LAND)
        ax[ax_i].add_feature(cartopy.feature.OCEAN)
        subtitle = f'{grp} ({n_products:,} products)'
        ax[ax_i].title.set_text(subtitle)
        print(subtitle)
        g = [t.geoms[0] for t in tile_count.geometry.values]
        count_vals = tile_count.filecount.values
        c = []
        for v in count_vals:
            c.append(mapper.to_rgba(v))
        ax[ax_i].add_geometries(g, crs=ccrs.PlateCarree(), alpha=1, facecolor=c)
        ax[ax_i].add_feature(cartopy.feature.COASTLINE)
        gl = ax[ax_i].gridlines(draw_labels=True)
        gl.xlabel_style['rotation']= 0
        gl.xlabel_style['ha']= 'center'
        gl.xlabel_style['va']= 'center'
        count += 1

    # add the colorbar for all plots
    print('Adding colorbar...')
    plt.tight_layout()
    cbar_ax = fig.add_axes([0.05, -0.05, 0.9, 0.04])
    #fig.colorbar(color, cax=cbar_ax, label=cbar_label, orientation="horizontal")
    plt.colorbar(mapper, cax=cbar_ax, label=cbar_label, orientation="horizontal")
    plt.suptitle(title, y=1.02, fontsize='x-large')

    #delete subplot if uneven number
    if count != (n_rows*n_cols):
        if n_cols > 1:
            fig.delaxes(ax[n_rows-1,n_cols-1])
        else:
            fig.delaxes(ax[n])

# Generate Points for Mapping

In [None]:
N_POINTS = 550
points_in_roi = generate_points_in_bounds(roi_shape, N_POINTS, plot=True)

# Sentinel 1 GRD

## Load and Process

In [None]:
filename = f'metadata/Sentinel-1_GRD_-50N_products.json'
product = filename.split('_')[1]

# engineering
df = pd.read_json(filename, orient='index')
df = add_metadata(df, date_col='beginposition', max_date=MAX_DATE)
df = filter_results_with_geojson(df, roi_shape, plot=True)

In [None]:
summarise_s1(df)

In [None]:
df.iloc[0]

In [None]:
summarise_s1(df[df['year']==2022])

In [None]:
# get the minimum lat
df.geometry.bounds.miny.min()

In [None]:
year_month_product_summary(df)
#df.groupby('sensoroperationalmode')['beginposition'].min()

In [None]:
title = 'Sentinel-1 Level 1 GRD - Weekly Products' 
plot_timeseries_products(df, title=title, stack_col='sat_id', date_col='beginposition',count_freq='7D', plot_freq='1M')

## EW (All)

### Annual

In [None]:
title = f"Sentinel-1 GRD EW (All Polarisation) Revisit Frequency by Year"
plot_multiple_frequency(
    df[df['sensoroperationalmode']=='EW'], 
    group='year', 
    title=title, 
    n_cols=4, 
    cbar_label='Revisit Count',
    shape=(1300,1300)
)
plt.show()

### Annual (2022)

In [None]:
title = f"Sentinel-1 GRD EW (All Polarisation) Revisit Frequency (2022)"
plot_frequency(
    df[(df['sensoroperationalmode']=='EW') & (df['year']==2022)], 
    title=title, 
    cbar_label='Revisit Count',
    shape=(1300,1300)
)
plt.show()

### Annual (2022) - Intersection based

In [None]:
#support of above
title = f"Sentinel-1 GRD EW (All Polarisation) Revisit Frequency (2022)"
cbar_label = 'Revisit Count'
intersections = calculate_valid_intersections(
    df[(df['sensoroperationalmode']=='EW') & (df['year']==2022)], 
    points_df=points_in_roi,
    date_col='beginposition')
plot_intersection_frequency(intersections, 
                                     'revisit_count', 
                                     title=title, 
                                     cbar_label=cbar_label,
                                     shape=(1000,1000))

### Monthly (2022)

In [None]:
title = f"Sentinel-1 GRD EW (All Polarisation) Revisit Frequency by Month (2022)"
plot_multiple_frequency(
    df[(df['sensoroperationalmode']=='EW') & (df['year']==2022)], 
    group='month_name', 
    sort_group='month',
    title=title, 
    n_cols=4,
    )
plt.show()

## IW (All Polarisation)

### Annual

In [None]:
title = f"Sentinel-1 GRD IW (All Polarisation) Revisit Frequency by Year"
plot_multiple_frequency(
    df[df['sensoroperationalmode']=='IW'], 
    group='year', 
    title=title, 
    n_cols=4, 
    cbar_label='Revisit Count'
)
plt.show()

### Monthly (2022)

In [None]:
title = f"Sentinel-1 GRD IW (All Polarisation) Revisit Frequency by Month (2022)"
plot_multiple_frequency(
    df[(df['sensoroperationalmode']=='IW') & (df['year']==2022)], 
    group='month_name', 
    sort_group='month',
    title=title, 
    n_cols=4
    )
plt.show()

## EW (HH HV Polarisation) - Polar Monitoring

### Annual

In [None]:
title = f"Sentinel-1 GRD EW (HH HV Polarisation) Revisit Frequency by Year"
plot_multiple_frequency(
    df[(df['sensoroperationalmode']=='EW') & (df['polarisationmode']=='HH HV')], 
    group='year', 
    title=title, 
    n_cols=4, 
    cbar_label='Revisit Count'
)
plt.show()

### Monthly

In [None]:
title = f"Sentinel-1 GRD EW (HH HV Polarisation) Revisit Frequency by Month (2022)"
plot_multiple_frequency(
    df[(df['sensoroperationalmode']=='EW') & (df['polarisationmode']=='HH HV') & (df['year']==2022)], 
    group='month_name', 
    sort_group='month',
    title=title, 
    n_cols=4, 
    cbar_label='Revisit Count'
)
plt.show()

## EW (HH Polarisation) - Polar Monitoring

### Annual

In [None]:
title = f"Sentinel-1 GRD EW (HH Polarisation) Revisit Frequency by Year"
plot_multiple_frequency(
    df[(df['sensoroperationalmode']=='EW') & (df['polarisationmode']=='HH')], 
    group='year', 
    title=title, 
    n_cols=4, 
    cbar_label='Revisit Count'
)

### Monthly

In [None]:
title = f"Sentinel-1 GRD EW (HH Polarisation) Revisit Frequency by Month (2022)"
plot_multiple_frequency(
    df[(df['sensoroperationalmode']=='EW') & (df['polarisationmode']=='HH') & (df['year']==2022)], 
    group='month_name', 
    sort_group='month',
    title=title, 
    n_cols=4, 
    cbar_label='Revisit Count'
)
plt.show()

## IW (HH Polarisation)

### Annual

In [None]:
title = f"Sentinel-1 GRD IW (HH Polarisation) Revisit Frequency by Year"
plot_multiple_frequency(
    df[(df['sensoroperationalmode']=='IW') & (df['polarisationmode']=='HH')], 
    group='year', 
    title=title, 
    n_cols=4, 
    cbar_label='Revisit Count'
)
plt.show()

### Monthly

In [None]:
title = f"Sentinel-1 GRD IW (HH Polarisation) Revisit Frequency by Month (2022)"
plot_multiple_frequency(
    df[(df['sensoroperationalmode']=='IW') & (df['polarisationmode']=='HH') & (df['year']==2022)], 
    group='month_name', 
    sort_group='month',
    title=title, 
    n_cols=4, 
    cbar_label='Revisit Count'
)
plt.show()

## IW (VV VH Polarisation)

### Annual

In [None]:
title = f"Sentinel-1 GRD IW (HH VH Polarisation) Revisit Frequency by Year"
plot_multiple_frequency(
    df[(df['sensoroperationalmode']=='IW') & (df['polarisationmode']=='VV VH')], 
    group='year', 
    title=title, 
    n_cols=4, 
    cbar_label='Revisit Count'
)
plt.show()

### Monthly

In [None]:
title = f"Sentinel-1 GRD IW (HH VH Polarisation) Revisit Frequency by Month (2022)"
plot_multiple_frequency(
    df[(df['sensoroperationalmode']=='IW') & (df['polarisationmode']=='VV VH') & (df['year']==2022)], 
    group='month_name', 
    sort_group='month',
    title=title, 
    n_cols=4, 
    cbar_label='Revisit Count'
)
plt.show()

## SM (All Polarisation)

### Annual

In [None]:
title = f"Sentinel-1 GRD SM (All Polarisation) Revisit Count by Year"
plot_multiple_frequency(
    df[(df['sensoroperationalmode']=='SM')], 
    group='year', 
    title=title, 
    n_cols=4, 
    cbar_label='Revisit Count'
)
plt.show()

## Product Comparison

### IW / EW / SM(2022)

In [None]:
title = f"Sentinel-1 GRD Revisit Frequency by Aquisition Mode (2022)"
plot_multiple_frequency(
    df[(df['year']==2022)], 
    group='sensoroperationalmode', 
    title=title, 
    n_cols=3, 
    cbar_label='Revisit Count',
    shape=(1200,1200)
)
plt.show()

## IW Polarisation Comparison (2022)

In [None]:
title = f"Sentinel-1 GRD IW Revisit Frequency Comparison of Polarisation (2022)"
plot_multiple_frequency(
    df[(df['year']==2022) & (df['sensoroperationalmode']=='IW')], 
    group='polarisationmode', 
    title=title, 
    n_cols=3, 
    cbar_label='Revisit Count',
    shape=(1300,1300)
)
plt.show()

## EW Polarisation Comparison (2022)

In [None]:
title = f"Sentinel-1 GRD EW Revisit Frequency by Polarisation (2022)"
plot_multiple_frequency(
    df[(df['year']==2022) & (df['sensoroperationalmode']=='EW')], 
    group='polarisationmode', 
    title=title, 
    n_cols=2, 
    cbar_label='Revisit Count'
)
plt.show()

# Seninel 1 SLC

## Load and Process

In [None]:
df = None
filename = f'metadata/Sentinel-1_SLC_-50N_products.json'
product = filename.split('_')[1]

# engineering
df = pd.read_json(filename, orient='index')
df = add_metadata(df, date_col='beginposition', max_date=MAX_DATE)
df = filter_results_with_geojson(df, roi_shape, plot=True)
#summarise_s1(df)
summarise_s1(df[df['year']==2022])

In [None]:
summarise_s1(df)

In [None]:
year_month_product_summary(df)

In [None]:
title = 'Sentinel-1 Level 1 SLC - Weekly Products' 
plot_timeseries_products(df, title=title, stack_col='sat_id', date_col='beginposition',count_freq='7D', plot_freq='1M')

## Product Comparison (2022)

### Aquisition Mode

In [None]:
title = f"Sentinel-1 SLC Revisit Frequency by Aquisition Mode (2022)"
cbar_label = 'Revisit Count'
intersections = calculate_valid_intersections(
    df[df['year']==2022], 
    points_df=points_in_roi,
    date_col='beginposition')
plot_multiple_intersection_frequency(intersections, 
                                     'revisit_count', 
                                     group='sensoroperationalmode', 
                                     n_cols=4, title=title, cbar_label=cbar_label,shape=(1100,1100))

### Polarisation (IW)

In [None]:
title = f"Sentinel-1 SLC IW Revisit Frequency by Polarisation (2022)"
cbar_label = 'Revisit Count'
intersections = calculate_valid_intersections(
    df[(df['year']==2022) & (df['sensoroperationalmode']=='IW')], 
    points_df=points_in_roi,
    date_col='beginposition')
plot_multiple_intersection_frequency(intersections, 
                                     'revisit_count', 
                                     group='polarisationmode', 
                                     n_cols=3, title=title, cbar_label=cbar_label,shape=(1100,1100))


## IW (All Polarisation)

### IW

In [None]:
title = f"Sentinel-1 SLC IW (All Polarisation) Revisit Frequency by Year"
cbar_label = 'Revisit Count'
intersections = calculate_valid_intersections(
    df[(df['sensoroperationalmode']=='IW')], 
    points_df=points_in_roi,
    date_col='beginposition')
plot_multiple_intersection_frequency(intersections, 
                                     'revisit_count', 
                                     group='year', 
                                     n_cols=4, title=title, cbar_label=cbar_label,shape=(1100,1100))

### (IW 2022)

In [None]:
title = f"Sentinel-1 SLC IW (All Polarisation) Revisit Frequency (2022)"
cbar_label = 'Revisit Count'
# intersections = calculate_valid_intersections(
#     df[(df['sensoroperationalmode']=='IW') & (df['year']==2022)], 
#     points_df=points_in_roi,
#     date_col='beginposition')
plot_intersection_frequency(intersections, 
                                     'revisit_count', 
                                     title=title, 
                                     cbar_label=cbar_label,
                                     shape=(1300,1300))

# Sentinel-2 Level 1

## Load and Process

In [None]:
df = None
filename = f'metadata/Sentinel-2_S2MSI1C_-50N_products.json'
product = filename.split('_')[1]
with open(filename, 'r') as f:
     df = json.load(f)

# # engineering
df = pd.DataFrame.from_dict(df, orient='index')
df = add_metadata(df, date_col='beginposition', max_date=MAX_DATE)
print(list(df))
df = filter_results_with_geojson(df, roi_shape, plot=True)


In [None]:
m, y = year_month_product_summary(df)
y

In [None]:
plot_results_footprint_map(df.head(5000))

In [None]:
df['sizeGB'].mean()

In [None]:
# assign a cloud coverage category based on - https://landsat.usgs.gov/landsat-8-cloud-cover-assessment-validation-data
df['cloud_cover_category'] = df['cloudcoverpercentage'].apply(
    lambda x : 'Clear (<35%)' if x < 35 else ('Cloudy (>65%)' if x>65 else 'MidClouds (35-65%)'))

import pvlib
def add_sun_zenith(df, date_col='beginposition'):
    df[date_col] = pd.to_datetime(df[date_col])
    df[date_col].dt.tz_localize('UTC')
    df['centroid_x'] = df['geometry_4326'].apply(lambda x : x.centroid.x)
    df['centroid_y'] = df['geometry_4326'].apply(lambda x : x.centroid.y)
    z = pvlib.solarposition.get_solarposition(df[date_col], df.centroid_y, df.centroid_x, altitude=0)
    z.columns = ['sun_' + x for x in z.columns.values]
    z = z['sun_zenith'].to_dict()
    df['sun_zenith'] = df[date_col].map(z)
    return df

# add sun zenith
df = add_sun_zenith(df, date_col='datatakesensingstart')

In [None]:
title = 'Sentinel-2 Level 1C - Weekly Products' 
plot_timeseries_products(df, title=title, stack_col='sat_id', date_col='beginposition',count_freq='7D', plot_freq='1M')

In [None]:
df['centroid'] = df['geometry_4326'].apply(lambda x : x.centroid)
df['centroid_x'] = df['centroid'].apply(lambda x : x.x)
df['centroid_y'] = df['centroid'].apply(lambda x : x.y)

bins = [-90, -85, -80, -75, -70, -65, -60, -55, -50, -45]
labels = ['85-90', '80-85', '75-80', '70-75', '65-70', '60-65', '55-60', '50-55', 'less than 50']
df['lon_category'] = pd.cut(x = df['centroid_y'], bins = bins, labels = labels, include_lowest = True)

In [None]:
df['sun_zenith_over_70'] = df['sun_zenith'] > 70
df['sun_zenith_over_88'] = df['sun_zenith'] > 88

In [None]:
zenith_by_lat = df[df['year'] ==2022].groupby('lon_category').agg(
    count=('sun_zenith_over_70','count'),
    sun_zenith_over_70=('sun_zenith_over_70','sum'),
    sun_zenith_over_88=('sun_zenith_over_88','sum'),
).reset_index()
zenith_by_lat

### Cloud Cover by Category

In [None]:
df.groupby(['cloud_cover_category'])['filename'].count() / (df.groupby(['cloud_cover_category'])['filename'].count().sum())
#list(df)

### Sun Zenith Plot

In [None]:
title = 'Sentinel 2 Level 1 Distribution of Product Sun Zenith Angles (2016-12 to 2023-06)'
ax = df['sun_zenith'].plot(kind='hist', title=title, bins=20, figsize=(10,5))
ax.axvline(x=70, color='red', linestyle='--')
ax.set_xlabel('Sun Zenith Angle (SZA)')
ax.set_ylabel('Count of Products')
#ax.annotate('Maximum SZA adhering to Sen2Cor\nLevel-2 Processing Assumptions (70°)', (71, 200_000), color='red')
ax.annotate('70°', (65, 250_000), color='red')
ax.set_xlim([0,100])

print(sum(df[(df['year']==2022)]['sun_zenith']>70))
print(len(df[(df['year']==2022)]))

In [None]:
df['sun_zenith_over_70'] = df['sun_zenith']>70
summary_22 = df[(df['year']==2022)].groupby(['year','month']).agg(
    count_total_products=('sun_zenith','count'),
    count_zenith_over_70=('sun_zenith_over_70','sum'),
    mean_zenith_value=('sun_zenith','mean'),
    median_zenith_value=('sun_zenith','median'),
).reset_index()
summary_22

## Product Count by MGRS Tile

### Annual

In [None]:
title = f"Sentinel-2 Level-1C by Year"
plot_multiple_product_count(df, n_cols=3, group='year', title=title, cbar_label='Count of Products')

### Annual (2022)

In [None]:
title = f"Sentinel-2 Level-1C Product Count by MGRS tile (2022)"
plot_mgrs_product_count(df[df['year']==2022], title=title, cbar_label='Count of Products')

### Monthly (2022)

In [None]:
title = f"Sentinel-2 Level-1C by Month (2022)"
plot_multiple_product_count(df[df['year']==2022], 
                            n_cols=4, 
                            group='month_name', 
                            title=title, cbar_label='Count of Products')


## Intersection based

### Annual Revisit Count (2022)

In [None]:
# appriximate cloudcover with overlapping products
title = f'Sentinel-2 Level 1C Revisit Frequency (2022)'
cbar_label = 'Revisit Count'
df.geometry = df.geometry.buffer(0)
intersections = calculate_valid_intersections(df[df['year']==2022], points_df=points_in_roi, date_col='beginposition')
plot_intersection_frequency(intersections, plot_var='revisit_count', title=title, cbar_label=cbar_label, shape=(1000,1000))
#plot_intersection_frequency(intersections, plot_var='revisit_count', title=title, cbar_label=cbar_label, shape=(220,220), force_max_val=100)

### Monthly (2022)

In [None]:
title = f"Sentinel-2 Level-1C Revisit Frequency by Month (2022)"
cbar_label = 'Revisit Count'
intersections = calculate_valid_intersections(
    df[(df['year']==2022)], 
    points_df=points_in_roi,
    date_col='beginposition')
plot_multiple_intersection_frequency(intersections, 
                                     'revisit_count', 
                                     group='month_name', 
                                     n_cols=4, 
                                     title=title, 
                                     cbar_label=cbar_label,shape=(1000,1000))

### Cloud Cover (2022)

In [None]:
# appriximate cloudcover with overlapping products
title = f'Sentinel-2 Level 1C Approximate Cloud Cover (2022)'
cbar_label = 'Mean Observation Cloud Cover (%)'
#intersections = calculate_valid_intersections(df[df['year']==2022], points_df=points_in_roi, date_col='beginposition')
plot_intersection_frequency(intersections, plot_var='mean_cloudcover', title=title, cbar_label=cbar_label, shape=(1000,1000), cmap='plasma')

### Cloudfree revisits (2022)

In [None]:
# appriximate cloudcover with overlapping products
title = f'Sentinel-2 Level 1C Predominantly Cloud Free (<35%) Observations (2022)'
cbar_label = 'Revisit Count'
intersections = calculate_valid_intersections(df[(df['year']==2022) & (df['cloudcoverpercentage']<35)], 
                                              points_df=points_in_roi, 
                                              date_col='beginposition')
plot_intersection_frequency(intersections, 
                            plot_var='revisit_count', 
                            title=title, 
                            cbar_label=cbar_label, 
                            shape=(1000,1000), 
                            cmap='plasma', 
                            fixed_vals=True)

# Sentinel-2 Level 2

## Load and Process

In [None]:
df = None
filename = f'metadata/Sentinel-2_S2MSI2A_-50N_products.json'
product = filename.split('_')[1]
with open(filename, 'r') as f:
     df = json.load(f)

# # engineering
df = pd.DataFrame.from_dict(df, orient='index')
df = add_metadata(df, date_col='beginposition', max_date=MAX_DATE)
print(list(df))
df = filter_results_with_geojson(df, roi_shape, plot=True)

In [None]:
m, y = year_month_product_summary(df)
y

In [None]:
df['sizeGB'].mean()

In [None]:
title = 'Sentinel-2 Level 2A - Weekly Products' 
plot_timeseries_products(df, title=title, stack_col='sat_id', date_col='beginposition',count_freq='7D', plot_freq='1M')

In [None]:
# assign a cloud coverage category based on - https://landsat.usgs.gov/landsat-8-cloud-cover-assessment-validation-data
df['cloud_cover_category'] = df['cloudcoverpercentage'].apply(
    lambda x : 'Clear (<35%)' if x < 35 else ('Cloudy (>65%)' if x>65 else 'MidClouds (35-65%)'))

In [None]:
title = 'Sentinel-1 Level 1 SLC - Weekly Products' 
plot_timeseries_products(df, title=title, stack_col='sat_id', date_col='beginposition',count_freq='7D', plot_freq='1M')

### Sun Zenith

In [None]:
# summary by year
df['illuminationzenithangle'].plot(kind='hist')
df['illuminationzenithangle_over70'] = df['illuminationzenithangle'] > 70
summary = df.groupby('year').agg(
    count_total_products=('illuminationzenithangle','count'),
    count_zenith_over_70=('illuminationzenithangle_over70','sum'),
    mean_zenith_value=('illuminationzenithangle','mean'),
    median_zenith_value=('illuminationzenithangle','median'),
).reset_index()
summary['perc_products_zenith_over_70'] = 100*(summary['count_zenith_over_70']/summary['count_total_products']).round(2)
summary

# 2022 summary by month
df_2022 = df[df['year']==2022]
summary_22 = df_2022.groupby(['year','month']).agg(
    count_total_products=('illuminationzenithangle','count'),
    count_zenith_over_70=('illuminationzenithangle_over70','sum'),
    mean_zenith_value=('illuminationzenithangle','mean'),
    median_zenith_value=('illuminationzenithangle','median'),
).reset_index()

summary_22['perc_products_zenith_over_70'] = 100*(summary_22['count_zenith_over_70']/summary_22['count_total_products']).round(2)
summary[summary['year'].isin([2022,2023])]

## Product Count by MGRS Tile

### Annual

In [None]:
title = f"Sentinel-2 Level-2A by Year"
plot_multiple_product_count(df, n_cols=3, group='year', title=title, cbar_label='Count of Products')

### Annual (2022)

In [None]:
title = f"Sentinel-2 Level-2A Product Count by MGRS tile (2022)"
plot_mgrs_product_count(df[df['year']==2022], title=title, cbar_label='Count of Products')

### Monthly (2022)

In [None]:
title = f"Sentinel-2 Level-2A by Month (2022)"
plot_multiple_product_count(df[df['year']==2022], 
                            n_cols=4, 
                            group='month_name', 
                            title=title, cbar_label='Count of Products')


## Intersection based

### Annual Revisit Count (2022)

In [None]:
# appriximate cloudcover with overlapping products
title = f'Sentinel-2 Level 2A Revisit Frequency (2022)'
cbar_label = 'Revisit Count'
df.geometry = df.geometry.buffer(0)
intersections = calculate_valid_intersections(df[df['year']==2022], points_df=points_in_roi, date_col='beginposition')
plot_intersection_frequency(intersections, plot_var='revisit_count', title=title, cbar_label=cbar_label, shape=(1000,1000))
#plot_intersection_frequency(intersections, plot_var='revisit_count', title=title, cbar_label=cbar_label, shape=(220,220), force_max_val=100)

### Cloud Cover (2022)

In [None]:
# appriximate cloudcover with overlapping products
title = f'Sentinel-2 Level 2A Approximate Cloud Cover (2022)'
cbar_label = 'Mean Observation Cloud Cover (%)'
plot_intersection_frequency(intersections, plot_var='mean_cloudcover', title=title, cbar_label=cbar_label, shape=(1000,1000), cmap='plasma')

### Cloudfree revisits (2022)

In [None]:
# appriximate cloudcover with overlapping products
title = f'Sentinel-2 Level 2A Predominantly Cloud Free (<35%) Observations (2022)'
cbar_label = 'Revisit Count'
intersections = calculate_valid_intersections(df[(df['year']==2022) & (df['cloudcoverpercentage']<35)], 
                                              points_df=points_in_roi, 
                                              date_col='beginposition')
plot_intersection_frequency(intersections, plot_var='revisit_count', title=title, cbar_label=cbar_label, shape=(1000,1000), cmap='plasma', fixed_vals=True)

# Sentinel-3 OLCI

In [None]:
df = None
filename = f'metadata/Sentinel-3_OLCI_-50N_products.json'
product = filename.split('_')[1]
with open(filename, 'r') as f:
     df = json.load(f)

# # engineering
df = pd.DataFrame.from_dict(df, orient='index')
df = add_metadata(df, date_col='beginposition', max_date=MAX_DATE, CRS=3031)
#print(list(df))
#df = filter_results_with_geojson(df, roi_shape, plot=True)

## Product Filtering


In [None]:
# both level 1 and level 2 products are in the same file
# products - https://sentinels.copernicus.eu/ca/web/sentinel/user-guides/sentinel-3-olci/processing-levels
# !! LFR/LRR seems to include coverage over water, seen in cop scihub
# revisit time - https://sentinels.copernicus.eu/ca/web/sentinel/user-guides/sentinel-3-olci/coverage
df[['productlevel','producttype']].value_counts()

In [None]:
#df = df[df['producttype']=='OL_2_LFR___'] # full resolution
df = df[df['producttype']=='OL_2_LRR___'] # reduced resolution

In [None]:
title = f'{filename} - Weekly Products' 
sf.plot_timeseries_products(df, title=title, stack_col='sat_id', date_col='beginposition',count_freq='7D', plot_freq='1M')

In [None]:
df_m_sum, df_y_sum = year_month_product_summary(df)
df_y_sum