In [None]:
import matplotlib.pyplot as plt
import datetime
import shapely.wkt
import geopandas as gpd
import cartopy.crs as ccrs
import cartopy
import rasterio
from rasterio.features import rasterize
import collections
import json
import os
import pandas as pd
import math
import matplotlib
import datetime
import numpy as np

import shared_functions as sf

from pyproj import Geod
geod = Geod(ellps="WGS84")
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

## Functions

In [None]:
def filter_results_with_geojson(df, filename, plot=False):
    gdf_inclusion = gpd.read_file(filename)
    l1 = len(df)
    df = df[df.geometry.intersects(gdf_inclusion.geometry.values[0])]
    print(f'{l1 - len(df)} products have been removed')
    if plot:
        plt.rcParams["figure.figsize"] = [10,8]
        ax = plt.axes(projection=ccrs.PlateCarree(), title='Product Search Area - 50deg south excl. South America and Falkland Islands')
        ax.add_feature(cartopy.feature.LAND)
        ax.add_feature(cartopy.feature.OCEAN)
        ax.add_geometries(gdf_inclusion.geometry, crs=ccrs.PlateCarree(), alpha=0.7)
    return df

def get_land_gdf(plot=False):
    land = gpd.read_file('shapefiles/ne_10m_land/ne_10m_land.shp')
    gdf_inclusion = gpd.read_file('shapefiles/50south_excl_argentina_falkand_mid.geojson')
    # filter for land south of 50 degrees (small islands etc)
    south_land = shapely.intersection(
        shapely.ops.unary_union(gdf_inclusion.geometry),
        shapely.ops.unary_union(land.geometry),
    )
    south_land = gpd.GeoDataFrame({'geometry': [south_land]},crs="EPSG:4326")
    south_land = south_land.set_geometry('geometry')
    south_land = south_land.to_crs(3031)
    if plot:
        plt.rcParams["figure.figsize"] = [5,4]
        south_land.plot()
    return south_land

# Create the Dataset

In [None]:
def create_dataset(data):

    # columns to read for each dataset
    s_cols = ['footprint','beginposition','sensoroperationalmode','polarisationmode','platformname','producttype','filename','size']
    s_rename = {'filename':'id',
                'footprint':'geometry',
                'beginposition':'time',
                'platformname': 'satellite',
                'cloudcoverpercentage' : 'cloudcover'}
    lsat_cols = ['spatial_coverage','start_time','satellite','landsat_product_id','data_type']
    lsat_rename = {
        'landsat_product_id':'id',
        'spatial_coverage':'geometry',
        'start_time':'time',
        'data_type':'producttype',
        'cloud_cover': 'cloudcover'
    }
    spot_rename = {
        'content.properties.SceneID':'id',
        'content.properties.PolygonWKT':'geometry',
        'content.properties.DataDate':'time',
        'content.properties.PlatformName':'satellite',
        'content.properties.DataFileSize':'size',
    }

    cmr_rename = {
        'time_start': 'time',
        'size_MB' : 'size',
    }

    #final set of cols we want
    all_cols = [
        'id',
        'geometry',
        'time',
        'satellite',
        'sensoroperationalmode',
        'polarisationmode',
        'producttype',
        'size',
        'cloudcover',
        'uuid',
    ]

    #pd.concat([df,df1], axis=0, ignore_index=True)

    df = pd.DataFrame(columns=all_cols)

    for i,filename in enumerate(data):

        print(filename)
        with open(filename, 'r') as f:
            df_sat = json.load(f)
            if 'SPOT' in filename:
                df_sat=pd.json_normalize([df_sat[k] for k in df_sat.keys()])
            else:
                df_sat = pd.DataFrame.from_dict(df_sat, orient='index')

        if 'Sentinel' in filename:
            df_sat = df_sat.rename(s_rename, axis='columns')
            df_sat['size'] = df_sat['size'].apply(lambda x : float(x.split(' ')[0]) if 'MB' in x else float(x.split(' ')[0]))
            df_sat['time'] = df_sat['time'].apply(lambda x : str(x).split('.')[0])
            df_sat['time'] = pd.to_datetime(df_sat['time'], format="%Y-%m-%d %H:%M:%S")
            df_sat['geometry'] = df_sat['geometry'].apply(lambda x : shapely.wkt.loads(x))
        if 'landsat' in filename:
            # label satellite
            df_sat['satellite'] = df_sat['satellite'].apply(lambda x : 'Landsat ' + str(x) if 'LAND' not in str(x) else 'Landsat ' + x.split('_')[-1])
            df_sat['size'] = df_sat['satellite'].map({'Landsat 8':919,
                                            'Landsat 9':919,
                                            'Landsat 7':235,
                                            'Landsat 5':150,
                                            'Landsat 4':150})
            
            df_sat = df_sat.rename(lsat_rename, axis='columns')
            df_sat['time'] = df_sat['time'].apply(lambda x : str(x).split('.')[0])
            df_sat['time'] = pd.to_datetime(df_sat['time'], format="%Y-%m-%d %H:%M:%S")
            df_sat['geometry'] = df_sat['geometry'].apply(lambda x : shapely.wkt.loads(x))
        if 'SPOT' in filename:
            df_sat = df_sat.rename(spot_rename, axis='columns')
            df_sat['time'] = pd.to_datetime(df_sat['time'])
            df_sat['size'] = df_sat['size']/1E6
            df_sat['geometry'] = df_sat['geometry'].apply(lambda x : shapely.wkt.loads(x))
        if 'ERS' in filename:
            df_sat = sf.preprocess_cmr_df(df_sat, crs=4326, lon_first=True)
            df_sat = df_sat.rename(cmr_rename, axis='columns')
            df_sat['satellite'] = 'ERS-1' if 'ERS-1' in filename else 'ERS-2'
        if 'RADARSAT' in filename:
            df_sat = sf.preprocess_cmr_df(df_sat, crs=4326, lon_first=True)
            df_sat = df_sat.rename(cmr_rename, axis='columns')
            df_sat['satellite'] = 'RADARSAT-1'
        if 'JERS' in filename:
            df_sat = sf.preprocess_cmr_df(df_sat, crs=4326, lon_first=True, time_start='time')
            df_sat = df_sat.rename(cmr_rename, axis='columns')
            df_sat['satellite'] = 'JERS-1'
            df_sat = df_sat.loc[:,~df_sat.columns.duplicated()].copy() # drop duplicated size column
        if 'ALOS' in filename:
            df_sat = sf.preprocess_cmr_df(df_sat, crs=4326, lon_first=True)
            df_sat = df_sat.rename(cmr_rename, axis='columns')
            df_sat['satellite'] = 'ALOS-1'
        if (('TERRA' in filename) or ('AQUA' in filename)):
            df_sat = sf.preprocess_cmr_df(df_sat, crs=4326, lon_first=True)
            df_sat = df_sat.rename(cmr_rename, axis='columns')
            df_sat['satellite'] = 'TERRA' if 'TERRA' in filename else 'AQUA'
        if (('SUOMI' in filename) or ('JPSS' in filename)):
            df_sat = sf.preprocess_cmr_df(df_sat, crs=4326, lon_first=True)
            df_sat = df_sat.rename(cmr_rename, axis='columns')
            df_sat['satellite'] = 'SUOMINPP' if 'SUOMI' in filename else 'JPSS1'

        for col in all_cols:
            if col not in df_sat.columns:
                df_sat[col] = ''

        df = pd.concat([df,df_sat[all_cols]], axis=0, ignore_index=True)

        df_sat = '' #clear mem for reload

        # if i==0:
        #      break

    # data engineering
    df['time'] = df['time'].apply(lambda x : x.tz_localize(None))
    MAX_DATE = datetime.datetime.strptime('12/06/23', '%d/%m/%y')
    df = df[df['time']<MAX_DATE] #filter for date
    df['size'] =  df['size'].astype(float)
    df['month'] = df['time'].dt.month
    df['month_name'] = df['time'].dt.month_name()
    df['year'] = df['time'].dt.year
    df = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326")

    # add mission
    df['mission'] = df['satellite'].map({
        'Sentinel-2' : 'Sentinel-2', 
        'Sentinel-1': 'Sentinel-1', 
        'Sentinel-1 (IW/EW)': 'Sentinel-1 (IW/EW)', 
        'Landsat 8': 'Landsat 8/9', 
        'Landsat 9': 'Landsat 8/9', 
        'Landsat 7': 'Landsat 7', 
        'Landsat 4': 'Landsat 4', 
        'Landsat 5': 'Landsat 5', 
        'SPOT1': 'SPOT 1-5',
        'SPOT2': 'SPOT 1-5',
        'SPOT3': 'SPOT 1-5',
        'SPOT4': 'SPOT 1-5',
        'SPOT5': 'SPOT 1-5',
        'ERS-1': 'ERS', 
        'ERS-2': 'ERS', 
        'RADARSAT-1': 'RADARSAT-1',
        'JERS-1':'JERS-1',
        'ALOS-1':'ALOS-1 (PALSAR)',
        'TERRA':'MODIS',
        'AQUA':'MODIS',
        'SUOMINPP':'VIIRS',
        'JPSS1':'VIIRS',
    })

    
    return df

In [None]:
data = [
    "metadata/Sentinel-2_S2MSI1C_-50N_products.json",
    "metadata/Sentinel-1_GRD_-50N_products.json",
    "metadata/Sentinel-1_SLC_-50N_products.json",
    "metadata/landsat_ot_c2_l1_-50N_products.json",
    "metadata/landsat_etm_c2_l1_-50N_products.json",
    "metadata/landsat_tm_c2_l1_-50N_products.json",
    "metadata/SPOT_L1A.json",
    "metadata/ERS-1_LEVEL1_-50N_products.json",
    "metadata/ERS-2_LEVEL1_-50N_products.json",
    "metadata/RADARSAT-1_LEVEL1_-50N_products.json",
    "metadata/JERS-1_LEVEL1_-50N_products.json",
    "metadata/ALOS_PALSAR_LEVEL1.5_-50N_products.json",
]

#df = create_dataset(data)

In [None]:
SAR_sats = ['Sentinel-1', 'ERS-1','ERS-2','RADARSAT-1', 'Sentinel-1 (IW/EW)', 'JERS-1', 'ALOS-1']

In [None]:
# filter our wave formate of SAR
df = df[df['sensoroperationalmode']!='WV']
# specify onlu IW/EW considered
df['satellite'] = df['satellite'].apply(lambda x : x + ' (IW/EW)' if x == 'Sentinel-1' else x)

In [None]:
# filter for 50 south
roi_shape = 'shapefiles/50south_excl_argentina_falkand_mid.geojson'
df = filter_results_with_geojson(df, roi_shape, plot=True)
# reproject to antarctic coordinates - solves issues at antimeridian
df = df.to_crs(3031)

In [None]:
# calculate area
df['area'] = df.geometry.area/1E6 #km2
# get the bbox
from shapely.geometry import box
df['bounds'] = df.geometry.apply(lambda x : box(*x.bounds))

In [None]:
south_land = get_land_gdf(plot=True)

In [None]:
df.sample(100).plot()

In [None]:
# test some land/sea splitting
#t = df[(df['satellite']=='Sentinel-1') & (df['sensoroperationalmode']=='EW')].sample(10_000)
t = df[(df['satellite']=='Sentinel-2')].sample(5_000)
t['land_bounds'] = t['bounds'].apply(lambda x : x.buffer(0).intersection(south_land.geometry))
t['ocean_bounds'] = t.apply(lambda x : x['bounds'].buffer(0).difference(x['land_bounds'].buffer(0)), axis=1)
# t['ocean_bounds'] = t['bounds'].apply(lambda x : south_intsct(x,south_land.geometry))
t['land_area'] = t['land_bounds'].apply(lambda x : x.area/1E6) #km2
t['ocean_area'] = t['ocean_bounds'].apply(lambda x : x.area/1E6) #km2
ax = plt.axes(projection=ccrs.SouthPolarStereo())
east, west, south, north = -180, 180, -90, -50
ax.set_extent((east, west, south, north+1), ccrs.PlateCarree())
ax.add_feature(cartopy.feature.LAND)
ax.add_feature(cartopy.feature.OCEAN)
ax.gridlines(draw_labels=True)
ax.add_geometries(t.head(5000)['ocean_bounds'], crs=ccrs.SouthPolarStereo(), alpha=0.6, color='blue', linewidth=0.1)
ax.add_geometries(t.head(5000)['land_bounds'], crs=ccrs.SouthPolarStereo(), alpha=0.6, color='green', linewidth=0.1)
#ax.add_feature(cartopy.feature.COASTLINE)

# Summary Volume and Area Coverage Plots

In [None]:
def plot_land_coverage(df, ax=False, title='', group='satellite', area_freq='14D',):
    df['round_time'] = df['time'].dt.round(area_freq)
    idx = pd.date_range(df['round_time'].min(), df['round_time'].max()).round(area_freq) # fill big gaps with 0
    south_land = get_land_gdf()
    sat_coverage = (df
                    .groupby(['round_time',group]).apply(
            lambda x: shapely.ops.unary_union(x['bounds']))
            )
    sat_coverage = sat_coverage.reset_index().rename(columns={0:'polygon'})
    sat_coverage['land_poly'] = sat_coverage['polygon'].apply(lambda x : x.buffer(0).intersection(south_land.geometry))
    sat_coverage['ocean_poly'] = sat_coverage.apply(lambda x : x['polygon'].buffer(0).difference(x['land_poly'].buffer(0)), axis=1)
    sat_coverage['all_area'] = sat_coverage['polygon'].apply(lambda x : x.area)/1E6
    sat_coverage['land_area'] = sat_coverage['land_poly'].apply(lambda x : x.area)/1E6
    sat_coverage['ocean_area'] = sat_coverage['ocean_poly'].apply(lambda x : x.area)/1E6
    
    if not ax:
        fig, ax = plt.subplots(1,1,figsize=(15,4))
    ax.axhline(y=14_200_000, color='gray', linestyle='--', alpha=0.5)
    ax.annotate('   Antarctica land area (14.2 million km2)', (sat_coverage.round_time.iloc[2], 13_200_000), color='black', alpha=0.5, fontsize=11)
    for sat in sat_coverage.drop_duplicates(group)[group].to_list():
        #label = sat if sat!='Sentinel-1' else sat + ' (EW/IW)'
        pltdf = sat_coverage[sat_coverage[group]==sat].set_index('round_time')
        pltdf = pltdf.reindex(idx, fill_value=0)
        ax.plot(pltdf.index, pltdf.land_area, label=f'{sat}', linewidth=1.2, alpha=0.9)
    ax.set_ylim([0,14_200_000*1.1])
    ax.set_xlim([min(sat_coverage.round_time), max(sat_coverage.round_time)])
    leg_title = 'Satellite Mission' if group == 'mission' else group
    ax.legend(loc='upper right', title=leg_title, bbox_to_anchor=(0.13, 0.8)) #optical 1984
    #ax.legend(loc='upper right', title=leg_title, bbox_to_anchor=(0.98, 0.9)) # SAR
    ax.set_ylabel(f'area covered (km2)')
    plt.title(title)

    # ax[i].yaxis.set_major_formatter(ticker.FormatStrFormatter('%.1e'))
    # pltdf = sat_coverage.pivot(index='round_time', columns='satellite', values=['land_area'])
    # pltdf.plot.line(ax=ax)

def plot_timeseries_products(df, 
                             title='',
                             stack_col='satellite', 
                             date_col='time',
                             count_freq='7D', 
                             plot_freq='1M', 
                             area_freq='12D', 
                             vol=True, 
                             split_area=False,
                             split_instrument=True,
                             log_axis=False):
    
    import seaborn as sns
    import matplotlib.ticker as ticker
    sns.set_theme()

    if split_area:
        fig, ax = plt.subplots(3,1,figsize=(15,9), gridspec_kw={'height_ratios': [2, 2, 4]})
    else:
        fig, ax = plt.subplots(2,1,figsize=(15,9), gridspec_kw={'height_ratios': [2, 4]})

    # list of SAR sats
    SAR_sats = ['Sentinel-1', 'ERS-1','ERS-2','RADARSAT-1', 'Sentinel-1 (IW/EW)','JERS-1', 'ALOS-1']
    print('SAR sats')
    print(SAR_sats)

    #product count by count freq
    df['round_time'] = df[date_col].dt.round(count_freq)
    df['area_round_time'] = df[date_col].dt.round(area_freq)
    area_idx = pd.date_range(df['area_round_time'].min(), df['area_round_time'].max()).round(area_freq) # fill big gaps with 0
    if not vol:
        # count
        c = df[['round_time',stack_col,'id']].groupby(['round_time',stack_col]).count().reset_index()
        c = c.pivot(index='round_time', columns=[stack_col], values='id').fillna(0)
    else:
        # size sum
        c = df[['round_time',stack_col,'size']].groupby(['round_time',stack_col]).sum().reset_index()
        c = c.pivot(index='round_time', columns=[stack_col], values='size').fillna(0)/1E6 # TB
    #upscale to plot fewer
    c = c.resample(plot_freq).max()

    # get area of optical imagery
    opt_coverage = df[~(df['satellite'].isin(SAR_sats))].groupby('area_round_time').apply(
        lambda x: shapely.ops.unary_union(x['bounds']))
    opt_area = opt_coverage.apply(lambda x : x.area)/1E6
    opt_area = opt_area.reindex(area_idx, fill_value=0)

    # get radar area covered
    sar_coverage = df[df['satellite'].isin(SAR_sats)].groupby('area_round_time').apply(
        lambda x: shapely.ops.unary_union(x['bounds']))
    sar_area = sar_coverage.apply(lambda x : x.area)/1E6
    sar_area = sar_area.reindex(area_idx, fill_value=0)

    if split_area:
        # get coverage over land
        south_land = get_land_gdf()
        opt_coverage = opt_coverage.to_frame(name='all_area')
        opt_coverage['land_area'] = opt_coverage['all_area'].apply(lambda x : x.buffer(0).intersection(south_land.geometry))
        opt_coverage['ocean_area'] = opt_coverage.apply(lambda x : x['all_area'].buffer(0).difference(x['land_area'].buffer(0)), axis=1)
        opt_land_area = opt_coverage['land_area'].apply(lambda x : x.area)/1E6
        opt_ocean_area = opt_coverage['ocean_area'].apply(lambda x : x.area)/1E6
        opt_land_area = opt_land_area.reindex(area_idx, fill_value=0)
        opt_ocean_area = opt_ocean_area.reindex(area_idx, fill_value=0)

        # get coverage over ice & water
        south_land = get_land_gdf()
        sar_coverage = sar_coverage.to_frame(name='all_area')
        sar_coverage['land_area'] = sar_coverage['all_area'].apply(lambda x : x.buffer(0).intersection(south_land.geometry))
        sar_coverage['ocean_area'] = sar_coverage.apply(lambda x : x['all_area'].buffer(0).difference(x['land_area'].buffer(0)), axis=1)
        sar_land_area = sar_coverage['land_area'].apply(lambda x : x.area)/1E6
        sar_ocean_area = sar_coverage['ocean_area'].apply(lambda x : x.area)/1E6
        sar_land_area = sar_land_area.reindex(area_idx, fill_value=0)
        sar_ocean_area = sar_ocean_area.reindex(area_idx, fill_value=0)

    # create arrays to iterate through for area plots
    if not split_area:
        labels = [['Optical','SAR IW/EW']]
        area_dfs = [[opt_area, sar_area]] 
        colors = [['red','orange']]
        linestyles = [['solid','solid']] 
        legend_titles = ['Instrument'] 
        plt_titles = ['']
    else:
        if split_instrument:
            # split optical and sar into two difference plots
            labels = [['Land', 'Sea & Ice'],['Land', 'Sea & Ice']]
            area_dfs = [[opt_land_area, opt_ocean_area], [sar_land_area, sar_ocean_area]]
            colors = [['green', 'blue'],['green', 'blue']]
            linestyles = [['solid', 'solid'],['dashed', 'dashed']]
            legend_titles = ['Area Covered','Area Covered']
            plt_titles = ['Optical satellite coverage of the Antarctic region (50 degrees south excl. South America & Falkland Islands)',
                          'SAR satellite coverage of the Antarctic region (50 degrees south excl. South America & Falkland Islands)']
        else:
            # split land and sea into two different plots
            labels = [['Optical', 'SAR (EW/IW)'],['Optical', 'SAR (EW/IW)']]
            area_dfs = [[opt_land_area, sar_land_area], [opt_ocean_area, sar_ocean_area]]
            colors = [['green', 'green'],['blue', 'blue']]
            linestyles = [['solid', 'dashed'],['solid', 'dashed']]
            legend_titles = ['Instrument','Instrument']
            plt_titles = ['Land area coverage by instrument','Sea & Ice area coverage by instrument']


    for i, dfs in enumerate(area_dfs):
        lbs = labels[i]
        cs = colors[i]
        lst = linestyles[i]
        ax[i].axhline(y=14_200_000, color='gray', linestyle='--', alpha=0.5)
        ax[i].annotate('Antarctica land area (14.2 million km2)', (opt_area.index[int(len(opt_area)*0.3)], 15_200_000), color='black', alpha=0.5, fontsize=11)
        ax[i].plot(dfs[0].index, dfs[0].values, color=cs[0], label=f'{lbs[0]}', linewidth=1, alpha=0.9, linestyle=lst[0])
        ax[i].plot(dfs[1].index, dfs[1].values, color=cs[1], label=f'{lbs[1]}', linewidth=1, alpha=0.9, linestyle=lst[1])
        ax[i].set_ylim([0,max(dfs[0].max(),dfs[1].max(), 14_200_000)*1.2])
        ax[i].legend(loc='upper left', title=legend_titles[i])
        ax[i].set_ylabel(f'area covered (km2)')
        ax[i].set_xlim([min(c.index), max(c.index)])
        ax[i].set_title(plt_titles[i])
        ax[i].yaxis.set_major_formatter(ticker.FormatStrFormatter('%.1e'))


    plt_num = 2 if split_area else 1
    c.plot(kind='bar', stacked=True, width=0.9, ax=ax[plt_num])
    ticklabels = ['']
    for i in range(1,len(c.index)):
        ticklabels.append('') if c.index[i].year == c.index[i-1].year else ticklabels.append(c.index[i].year)
    ax[plt_num].xaxis.set_major_formatter(ticker.FixedFormatter(ticklabels))
    ax[plt_num].set_xlabel('')
    if not vol:
        ax[plt_num].set_ylabel(f'products per week')
        ax[plt_num].set_title('Number of data products per week for key satellites')
    else:
        ax[plt_num].set_ylabel(f'data volume (TB) per week')
        ax[plt_num].set_title('Data volume (TB) per week by satellite')
    if vol:
        ax[plt_num].yaxis.set_major_formatter(ticker.FormatStrFormatter('%.1f'))
    else:
        ax[plt_num].yaxis.set_major_formatter(ticker.FormatStrFormatter('%.0f'))
    if log_axis:
        ax[plt_num].set_yscale('log')
        ax[plt_num].set_ylim([0.01, 10])
        ax[plt_num].yaxis.set_major_formatter(ticker.FormatStrFormatter('%.2f'))
    ax[plt_num].set_xticklabels(ticklabels, rotation = 45, fontsize='10')
    ax[plt_num].legend(loc='upper left', title='Satellite')
    plt.suptitle(title,y=0.99)
    plt.tight_layout()
    plt.show()
    return opt_area, c

## Split Graph by Instrument

In [None]:
a = plot_timeseries_products(
    df[(df['year']>1992) & (df['satellite']!='Landsat 4')], #[~(df['satellite'].isin(['Landsat 9']))]
    stack_col = 'mission',
    count_freq='7D', 
    plot_freq='3M',
    area_freq='60D',
    title='Optical and SAR coverage for key satellites over the Antarctic region (50 degrees south excl. South America & Falkland Islands)',
    vol=True,
    split_area=True,
    split_instrument=True,
    log_axis=False
    )

## After 1995 - Split Graph by Instrument 

In [None]:
a = plot_timeseries_products(
    df[(df['year']>1984) 
    & (df['satellite']!='Landsat 4')
    #& (~(df['satellite'].isin(SAR_sats)))
    ], #[~(df['satellite'].isin(['Landsat 9']))]
    count_freq='7D', 
    plot_freq='3M',
    area_freq='14D',
    title='Optical and SAR coverage for key satellites over the Antarctic region (50 degrees south excl. South America & Falkland Islands)',
    vol=True,
    split_area=True,
    split_instrument=True,
    log_axis=True
    )

## After 1998 - Split Graph by Land/Sea

In [None]:
a = plot_timeseries_products(
    df[(df['year']>1998) & (df['satellite']!='Landsat 4')], #[~(df['satellite'].isin(['Landsat 9']))]
    count_freq='7D', 
    plot_freq='3M',
    area_freq='12D',
    title='Optical and SAR coverage for key satellites over the Antarctic region (50 degrees south excl. South America & Falkland Islands)',
    vol=True,
    split_area=True,
    split_instrument=False
    )

## Full Time Coverage

In [None]:
a = plot_timeseries_products(
    df, #[~(df['satellite'].isin(['Landsat 9']))]
    count_freq='7D', 
    plot_freq='3M',
    area_freq='12D',
    title='Optical and SAR coverage for key satellites over the Antarctic region (50 degrees south excl. South America & Falkland Islands)',
    vol=True,
    split_area=True,
    split_instrument=True
    )

## Total Area Coverage no Land/Sea Split

In [None]:
a = plot_timeseries_products(
    df[(df['year']>1998) & ((df['satellite']!='Landsat 4'))], #[~(df['satellite'].isin(['Landsat 9']))]
    count_freq='7D', 
    plot_freq='3M',
    area_freq='12D',
    title='Optical and SAR coverage for key satellites over the Antarctic region (50 degrees south excl. South America & Falkland Islands)',
    vol=True,
    #split_area=True
    )

# Optical Land Coverage by Satellite

In [None]:
title = 'Land coverage of key satellites over the Antarctic region (50 degrees south excl. South America & Falkland Islands)'
plot_land_coverage(df[(df['year'] > 1998)],title=title)

In [None]:
title = 'Land coverage of key optical satellites missions over the Antarctic region (50 degrees south excl. South America & Falkland Islands)'
plot_land_coverage(
    df[
        (df['year'] > 1984)
        & (~df['satellite'].isin(SAR_sats))
        & (df['satellite']!='Landsat 4')
        #& (df['satellite']=='Sentinel-2')
        ]
    ,title=title
    , group='mission')

In [None]:
plt.show()

# SAR Coverage of Land

In [None]:
title = 'Land coverage of key SAR satellite missions over the Antarctic region (50 degrees south excl. South America & Falkland Islands)'
plot_land_coverage(
    df[
        (df['year'] > 1990)
        & (df['satellite'].isin(SAR_sats))
        #& (df['satellite']!='Landsat 4')
        #& (df['satellite']=='Sentinel-2')
        ]
    ,title=title
    ,group='mission',
    area_freq='60D')

In [None]:
title = 'Land coverage of key SAR satellite missions over the Antarctic region (50 degrees south excl. South America & Falkland Islands)'
plot_land_coverage(
    df[
        (df['year'] > 1990)
        & (df['satellite'].isin(SAR_sats))
        #& (df['satellite']!='Landsat 4')
        #& (df['satellite']=='Sentinel-2')
        ]
    ,title=title
    ,group='mission',
    area_freq='14D')

In [None]:
plot_land_coverage(df,title=title)

In [None]:
sf.plot_timeseries_products(df[df['year']>1984], 
	title=f'Weekly Products' ,
	stack_col='satellite', 
	date_col='time',
	count_freq='7D', 
    plot_freq='2M')

In [None]:
df[
        (df['year'] > 1984)
        & (~df['satellite'].isin(SAR_sats))
        & (df['satellite']=='Sentinel-2')
        ]

# Ovelapping passes

In [None]:
df = None

data = [
    "metadata/AQUA_MYD01_-50N_products.json",
    "metadata/TERRA_MOD01_-50N_products.json",
    "metadata/JPSS1_VJ103IMG_-50N_products.json",
    "metadata/SUOMINPP_VNP03IMG_-50N_products.json",
    "metadata/Sentinel-2_S2MSI1C_-50N_products.json",
    "metadata/landsat_ot_c2_l1_-50N_products.json",
]

#dataset takes about 20 mins to make on mac m1
# df = create_dataset(data)
# roi_shape = 'shapefiles/50south_excl_argentina_falkand_mid.geojson'
# df = filter_results_with_geojson(df, roi_shape, plot=True)
# df['cloudcover'] = df['cloudcover'].fillna(0)
# df.to_parquet('metadata/s2_lsat_modis_viirs_combined.parquet', compression='snappy')
df = gpd.read_parquet('metadata/s2_lsat_modis_viirs_combined.parquet')


In [None]:
df = df.to_crs(3031)
df.shape

In [None]:
df_sat1 = df[(
    (df['year']==2022) & (df['mission']=='Landsat 8/9') & (df['month']==1)
    )]
df_sat2 = df[(
    (df['year']==2022) & (df['mission']=='Sentinel-2') & (df['month']==1)
    )]

for df_ in [df_sat1, df_sat2]:
    df_['timestamp'] = df_.time.values.astype(np.int64) // 10 ** 9
    df_['timestamp_min'] = df_.time.values.astype(np.int64) // 10 ** 9 - delta
    df_['timestamp_max'] = df_.time.values.astype(np.int64) // 10 ** 9 + delta
    print(df_.shape)
df_sat1 = df_sat1[['id','timestamp','timestamp_min','timestamp_max']]
df_sat2 = df_sat2[['id','timestamp','timestamp_min','timestamp_max']]
df_sat1.columns = ['sat1_' + x for x in list(df_sat1)]
df_sat2.columns = ['sat2_' + x for x in list(df_sat2)]

from pandasql import sqldf
pysqldf = lambda q: sqldf(q, globals())

sqlcode = '''
select *
from df_sat1
inner join df_sat2 on df_sat1.sat1_id!=df_sat2.sat2_id
where df_sat1.sat1_timestamp >= df_sat2.sat2_timestamp_min and df_sat1.sat1_timestamp <= df_sat2.sat2_timestamp_max
'''

df_join = pysqldf(sqlcode)

# add geometries back
id_to_geom = dict(zip(df.id, df.geometry))
df_join['sat1_geometry'] = df_join['sat1_id'].map(id_to_geom)
df_join['sat2_geometry'] = df_join['sat2_id'].map(id_to_geom)
df_join['intersects'] = df_join.apply(lambda x : x.sat1_geometry.buffer(0).intersects(x.sat2_geometry.buffer(0)), axis=1)
df_join['intersect_geometry'] = df_join.apply(lambda x : x.sat1_geometry.buffer(0).intersection(x.sat2_geometry.buffer(0)), axis=1)
df_join['intersect_area'] = df_join['intersect_geometry'].apply(lambda x : x.area/1E6)
iscts = df_join[df_join['intersects']].sort_values('intersect_area', ascending=False)

In [None]:
# to gdf and plot
isct_df = gpd.GeoDataFrame(iscts, geometry='intersect_geometry', crs=f"EPSG:{3031}")
isct_df.plot(edgecolor='black', alpha=0.4)

import shared_functions as sf
sf.plot_results_footprint_map(isct_df, bounds=(-180,180,-90,-60), title='Landsat 8/9 & Sentinel-2 Overlaps (within 5 mins) - Jan 2022')

In [None]:
single = df[df['id'].isin(iscts[['sat1_id','sat2_id']].iloc[1].to_list())]
#single.plot(edgecolor='black', alpha=0.3)
sf.plot_results_footprint_map(single, bounds=(-180,180,-90,-60))
single

In [None]:
single.id.to_list()

In [None]:
from utils.download import LandsatDownloader, S2Downloader
# read credentials from file
with open("credentials/credentials_earthexplorer.txt", "r") as f:
     txt = str(f.read())
     uid = txt.split('\n')[1].split('login')[-1][1:]
     pswd = txt.split('\n')[2].split('password')[-1][1:]

downloader = LandsatDownloader(uid, pswd)
downloader.download('LC09_L1GT_118107_20220102_20230503_02_T2')

# read credentials from file
with open(f"credentials/credentials_cophub.txt", "r") as f:
   txt = str(f.read())
   user = txt.split('\n')[1].split('login')[-1][1:]
   password = txt.split('\n')[2].split('password')[-1][1:]

downloader = S2Downloader(user, password)
downloader.download('S2B_MSIL1C_20220102T025609_N0301_R003_T45DXF_20220102T040624')

In [None]:
import utils.spatial as spatial
import cv2
import numpy as np
import matplotlib.pyplot as plt

img, meta = spatial.make_bgr_tif('LC09_L1GT_118107_20220102_20230503_02_T2',crs=3031, satellite='landsat')
img = spatial.normalise_bands(img, n_bands=3, p_min=15, p_max=20)
img = img.astype(np.uint8)*255
plt.imshow(cv2.cvtColor(np.transpose(img, (1, 2, 0)), cv2.COLOR_BGR2RGB))
plt.show()
meta

# Critical baseline for DEMS