# Download Matching Imagery

This notebook will use the RADARSAT-1 metadata to find images that correspond to lake ice measurements taken in a given folder. These images can then be used to analyze the lakes.

In [1]:
import boto3
import os
import traceback
from botocore.config import Config
from botocore import UNSIGNED

## 1. Finding Imagery for Measurements

This code checks the dates and coordinates of measurements and compares them against dates and coordinates in the R1 dataset. It then creates a new dataframe containing only measurements where matching imagery is found, as well as the file name of the matching imagery.

In [1]:
import pandas as pd

def get_coord_1(row):
    row = str(row)
    x = row.split('(')[1].split(' ')[0]
    return x

def get_coord_2(row):
    row = str(row)
    y = row.split('(')[1].split(' ')[1].split(')')[0]
    return y

def get_year(row):
    if type(row) == str:
        return row.split('-')[0]
    return 0
def get_month(row):
    if type(row) == str:
        return row.split('-')[1]
    return 0

df = pd.read_excel("lakeice-measurements.xlsx") # Reads the data

df_r1 = pd.read_csv('r1_data_with_aws_solved_true.csv') # Reads the R1 metadata file

df_r1['long'] = [get_coord_1(row) for row in df_r1['scene-centre']] # Changes the coordinates to separate columns
df_r1['lat'] = [get_coord_2(row) for row in df_r1['scene-centre']] # Changes the coordinates to separate columns
df_r1['month'] = [get_month(row) for row in df_r1['start-date']] # Changes the date to separate columns
df_r1['year'] = [get_year(row) for row in df_r1['start-date']] # Changes the date to separate columns
df_r1 = df_r1.astype({'long': 'float64', 'lat': 'float64', 'month': 'int64', 'year': 'int64'})

def has_imagery(row, df_r1):
    # Checks if a measurement has nearby coverage around the same time
    df_restricted = df_r1[(df_r1['long'] >= float(row[1]['LONG']-15.0)) & (df_r1['long'] <= float(row[1]['LONG'])+15.0) & (df_r1['lat'] >= float(row[1]['LAT'])-15.0) & (df_r1['lat'] <= float(row[1]['LAT'])+15.0)]
    if df_restricted.empty:
        return None
    else:
        year = int(str(row[1]['DATE']).split('-')[0])
        month = int(str(row[1]['DATE']).split('-')[1])
        df_restricted = df_restricted[(df_restricted['year'] == year) & (df_restricted['month'] == month)]
        if df_restricted.empty:
            return None
        else:
            print(df_restricted['download_link'].str.split('https://s3-ca-central-1.amazonaws.com/radarsat-r1-l1-cog/').to_list()[0][1])
            return df_restricted['download_link'].str.split('https://s3-ca-central-1.amazonaws.com/radarsat-r1-l1-cog/').to_list()[0][1]

df['has_coverage'] = [has_imagery(row, df_r1) for row in df.iterrows()]

output_df = df[df['has_coverage'].notnull()]

In [3]:
output_df

Unnamed: 0,ID,DATE,TIME,NAME,LAT,LONG,ICE_COVER,has_coverage
2675,2676.0,1997-01-05,1225.0,Lac ST Jean,48.6,72.1,10.0,1997/1/RS1_M0109173_SCWB_19970124_011859_HH_SC...
2678,2679.0,1997-01-05,1225.0,Lake Simcoe,44.4,79.3,4.0,1997/1/RS1_M0109173_SCWB_19970124_011859_HH_SC...
2679,2680.0,1997-01-05,1225.0,Reserv. Caniapiscau,54.0,70.0,10.0,1997/1/RS1_M0109173_SCWB_19970124_011859_HH_SC...
2686,2687.0,1997-01-05,1225.0,Lac Bienville,55.0,73.0,10.0,1997/1/RS1_M0109173_SCWB_19970124_011859_HH_SC...
2688,2689.0,1997-01-05,1225.0,Lac Champlain,44.5,73.3,0.0,1997/1/RS1_M0109173_SCWB_19970124_011859_HH_SC...
...,...,...,...,...,...,...,...,...
74713,74714.0,2008-11-28,1800.0,Oneida Lake,43.2,75.9,0.0,2008/11/RS1_N0577401_F3_20081106_133715_HH_SGF...
74765,74766.0,2008-11-28,1800.0,Lake Rossignol,44.2,65.1,0.0,2008/11/RS1_N0577401_F3_20081106_133715_HH_SGF...
74770,74771.0,2008-11-28,1800.0,Lake Simcoe,44.4,79.3,0.0,2008/11/RS1_N0577401_F3_20081106_133715_HH_SGF...
74777,74778.0,2008-11-28,1800.0,Sebago Lake,43.9,70.6,0.0,2008/11/RS1_N0577401_F3_20081106_133715_HH_SGF...


## 2. Restricting Data to 0% or 100% Ice Coverage

We then restrict our data to either 0% ice coverage of 100% ice coverage, and then we output CSVs to make future usage of it easier.

In [4]:
output_zeros = output_df[output_df['ICE_COVER'] == 0].copy()
output_hundreds = output_df[output_df['ICE_COVER'] == 10].copy()

In [5]:
output_zeros.to_csv('zero_ice_coverage.csv')
output_hundreds.to_csv('hundred_ice_coverage.csv')

In [6]:
output_zeros_unique = output_zeros.drop_duplicates('has_coverage').copy()
output_hundreds_unique = output_hundreds.drop_duplicates('has_coverage').copy()

In [7]:
output_zeros_unique.to_csv('zero_ice_coverage_unique.csv')
output_hundreds_unique.to_csv('hundred_ice_coverage_unique.csv')

## 3. Download Files

Using the dataframes, we can download all the unique imagery. We put 0% and 100% lakes into separate folders to be used in the future.

In [8]:
MY_CONFIG = Config(
    region_name = 'ca-central-1',
    signature_version = UNSIGNED,
    retries = {
        'max_attempts': 10,
        'mode': 'standard'
    }
)

BUCKET_NAME = 'radarsat-r1-l1-cog'
S3_CLIENT = boto3.client('s3', config=MY_CONFIG)
S3_RESOURCE = boto3.resource('s3', config=MY_CONFIG)

def download_file(file_name):
    """ Download a file from the RADARSAT-1 bucket.

    :param file_name: Name of the file to download
    """

    # If the file directory does not exist, it must be created
    if not os.path.exists(os.path.dirname(file_name)):
        os.makedirs(os.path.dirname(file_name))

    try:
        print("Downloading:", file_name)
        bucket = S3_RESOURCE.Bucket(BUCKET_NAME)
        bucket.download_file(file_name, file_name)
    except Exception as e:
        print(e)

In [16]:
path_zero = os.getcwd() + '\0Percent'
path_hundred = os.getcwd() + '\100Percent'

if not os.path.exists(path_hundred):
    print("Creating folder...")
    os.makedirs(path_hundred)
if not os.path.exists(path_zero):
    print("Creating folder...")
    os.makedirs(path_zero)

Creating folder...


In [21]:
os.chdir('100Percent')
for row in output_hundreds_unique.iterrows():
    download_file(row[1][-1])

In [22]:
os.chdir('../0Percent')
for row in output_zeros_unique.iterrows():
    download_file(row[1][-1])