# Identify files in Sentinel-1 archive to create 'frame' scene stacks

Requires 'run_S1_bursts' in GAMMA repo (S1_FRAME_CREATION dir) to be run first to generate up to date dataframes for each track. These dataframes contain the metadata and burst information for each zip file in the archive, and are used as input for this notebook.

If adding scenes to existing frame list, use 'add_S1_frame_stack_list.ipynb' instead

In [None]:
# developed 13 Mar 2019, S Lawrie, GA

import os
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
import datetime
import fiona

burst_dir = "/g/data1/dg9/SENTINEL-1_BURSTS"

%matplotlib inline 
# required for correct plotting in jupyter notebook

### Track and frame details

In [None]:
## PROJECT DETAILS

# project name used in INSAR_ANALYSIS directory
project = "xxxx"

# track and frame details from master frame list (numbers only)
track = xxx
frame = xx

# orientation "Ascending" or "Descending"
orient = "xxxx"

# polarisation "VV" or "HH"
polar = "xxxx

# time period to find matching scenes 
start_period = 20140101
end_period = 201xxxxx

### Set up project processing directory

In [None]:
# Create project processing directory
insar_dir = "/g/data1/dg9/INSAR_ANALYSIS"
project_dir = os.path.join(insar_dir,project,"S1","GAMMA")
kml_dir = os.path.join(project_dir,"kmls")

# make directories
if not os.path.exists(project_dir):
    os.makedirs(project_dir)
if not os.path.exists(kml_dir):
    os.makedirs(kml_dir)
    
if orient == "Ascending":
    orient2 = "A"
elif orient == "Descending":
    orient2 = "D"   

### Functions to identify matching scenes, create KML file and produce text file for input into GAMMA

In [None]:
class id_stack_scenes:
    def __init__(self,burst_dir,project_dir,kml_dir,orient,orient2,track,frame,polar,start_period,end_period):
        self.burst_dir = burst_dir
        self.project_dir = project_dir
        self.kml_dir = kml_dir
        self.orient = orient
        self.orient2 = orient2
        self.track = track
        self.frame = frame
        self.polar = polar
        self.start = start_period
        self.end = end_period
        
    def subset_archive(self):
        "Load saved dataframes and subset them by orientation, polarisation, track and frame." 
        "Find which archive bursts fit within the master burst buffer coordinates."
        
        # load saved master burst coordinates dataframe
        input_master = '%s/MASTER_BURSTS/S1_IW_SLC_%s_Master_Bursts_Dataframe' %(self.burst_dir,self.orient)
        self.df_master = pd.read_pickle(input_master)
        
        # load saved archive burst coordinates dataframe
        input_archive = '%s/ARCHIVE_TRACKS/%s_track_data/S1_IW_SLC_T%s%s_Dataframe' %(self.burst_dir,self.orient,self.track,self.orient2)
        self.df_archive = pd.read_pickle(input_archive)
        
        # subset master burst coordinates to desired scene stack
        self.df_master_subset = self.df_master.loc[(self.df_master['Pass'] == self.orient) & 
                                                   (self.df_master['RelOrbit'] == self.track) & 
                                                   (self.df_master['Frame'] == self.frame) & 
                                                   (self.df_master['Polar'].str.contains(self.polar))]

        # create KML file of master frame
        master_merged = self.df_master_subset.dissolve(by='Mode') # merge all polygons into one
        master_merged.reset_index(level=0, inplace=True) 
        master_merged2 = master_merged.drop(columns=['Buffer']) # remove buffer column to enable correct coordinates to be used
        fiona.supported_drivers['KML'] = 'rw'
        out_kml = '%s/S1_T%s%s_F%s_%s.kml' %(self.kml_dir,self.track,self.orient2,self.frame,self.polar)
        master_merged2.to_file(out_kml, driver='KML')
        
        # subset archive burst coordinates to desired track by polarisation and date range
        df_subset = self.df_archive.loc[(self.df_archive['Polar'] == self.polar)]
        self.df_archive_subset = df_subset[df_subset['Date'].between(start_period, end_period, inclusive=True)]

        # create blank dataframe to save matching bursts in archive
        self.df_archive_matching = pd.DataFrame([]) 
        
        # select archive bursts that fall within master bursts
        swaths = self.df_master_subset.Swath.unique()
        self.swath_list = swaths.tolist()
    
        for x in self.swath_list:
            swath_bursts = self.df_master_subset.loc[self.df_master_subset['Swath'] == x]
            bursts = swath_bursts['BurstNum']
            burst_list = bursts.tolist()  
    
            for y in burst_list:
                burst_row = swath_bursts.loc[swath_bursts['BurstNum'] == y].reset_index(drop=True)
                buffer = burst_row['Buffer'][0]
                frame = burst_row['Frame'][0]
                mas_burst_num = burst_row['BurstNum'][0]
                mas_swath = burst_row['Swath'][0]

                # iterate over archive and append matching bursts to blank dataframe
                for i, row in self.df_archive_subset.iterrows():
                    poly = row['Extent']
                    check = buffer.contains(poly)
                    if check == True:
                        date = row[2]
                        pass1 = row[3]
                        polar = row[4]
                        rel_orbit = row[5]
                        ipf = row[6]
                        raw_date = row[7]
                        org_burst_num = row[8]
                        org_swath = row[9]
                        grid = row[11]
                        zip_file = row[12]
                        org_max_burst = row[13]  
                        df_temp = gpd.GeoDataFrame([[date,pass1,polar,rel_orbit,frame,ipf,raw_date,org_swath,
                                                     org_burst_num,org_max_burst,mas_swath,mas_burst_num,poly,
                                                     grid,zip_file]],
                                                   columns = ['Date','Pass','Polar','RelOrbit','Frame','IPFVer',
                                                              'RawDate','OrgSwath','OrgBurstNum','OrgMaxBurst',
                                                              'MasSwath','MasBurstNum','Extent','GridDir','ZipFile'],
                                                   geometry='Extent')                    
                        self.df_archive_matching = self.df_archive_matching.append(df_temp, ignore_index=True)    

        return self.df_archive_matching
        
    def dem_extent(self):        
        "Use master burst coordinates to determine DEM extent for auto DEM generation in GAMMA."       
        
        # merge burst polygons to single polygon (using burst coords)
        buffer_coords = self.df_master_subset[['Mission','Buffer']]
        polys1 = gpd.GeoDataFrame(buffer_coords,geometry='Buffer') 
        polys2 = polys1.dissolve(by='Mission')
        minx = polys2.bounds.at['S1','minx']
        miny = polys2.bounds.at['S1','miny']
        maxx = polys2.bounds.at['S1','maxx']
        maxy = polys2.bounds.at['S1','maxy']

        # create new dataframe with overall frame coords
        ul = Point(minx,maxy)
        ur = Point(maxx,maxy)
        lr = Point(maxx,miny)
        ll = Point(minx,miny)
        pointList = [ul, ur, lr, ll]
        pol2 = Polygon([[p.x, p.y] for p in pointList]) # creates shapely polygon
        frame_extent = gpd.GeoDataFrame([[pol2]],columns = ['Extent'],geometry='Extent')

        # add additional buffer to frame extent
        poly = frame_extent.at[0,'Extent']
        frame_buffer = poly.buffer(+0.3, cap_style=2,join_style=2)
        frame_extent.at[0,'Buffer'] = frame_buffer
        frame2 =  gpd.GeoDataFrame(frame_extent,geometry='Buffer')

        # extract coords for text file
        fr_minx = frame_extent.bounds.at[0,'minx']
        fr_miny = frame_extent.bounds.at[0,'miny']
        fr_maxx = frame_extent.bounds.at[0,'maxx']
        fr_maxy = frame_extent.bounds.at[0,'maxy']
        self.dem_coords = fr_minx,fr_miny,fr_maxx,fr_maxy
        
        return self.dem_coords    

    def remove_duplicates(self):
        "Identifies and removes duplicates."  

        # add temporary index column for this step
        self.df_archive_matching['idx'] = self.df_archive_matching.index

        scene_date_dups = self.df_archive_matching[self.df_archive_matching.duplicated(subset=['Date','MasSwath',
                                                                                               'MasBurstNum'],
                                                                                       keep=False)]
        # 1: identify all unique dates (i.e. no duplicates)
        dup_scene_date_index = scene_date_dups.index.tolist() 
        df_uniq = self.df_archive_matching
        df_uniq.drop(df_uniq.index[dup_scene_date_index], inplace = True)

        # 2: identify remaining duplicates that have same date but diff IPF version
        ipf_max = scene_date_dups.groupby('Date').IPFVer.transform(max)
        ipf_dups = scene_date_dups.loc[scene_date_dups.IPFVer == ipf_max]
        ipf_uniq = ipf_dups.drop_duplicates(subset=['Date','IPFVer','MasSwath','MasBurstNum'], keep=False)

        # 3: identify remaining duplicates that have same date and IPF version but diff raw date
        ipf_uniq_index = ipf_uniq.index.tolist()
        raw_date_dups1 = ipf_dups[~ipf_dups['idx'].isin(ipf_uniq_index)]
        raw_date_max = raw_date_dups1.groupby('Date').RawDate.transform(max)
        raw_date_dups = raw_date_dups1.loc[raw_date_dups1.RawDate == raw_date_max]
        raw_date_uniq = raw_date_dups.drop_duplicates(subset=['Date','RawDate','MasSwath','MasBurstNum'],
                                                      keep=False)

        # 4: remove remaining duplcates (i.e. all details the same, keep one row)
        raw_date_uniq_index = raw_date_uniq.index.tolist()
        remain_date_dups1 = raw_date_dups[~raw_date_dups['idx'].isin(raw_date_uniq_index)]
        remain_date_uniq = remain_date_dups1.drop_duplicates(subset = ['Date','IPFVer','RawDate','MasSwath',
                                                                       'MasBurstNum'],keep='first')

        # 5: join all unique dataframes together to get final matching archive dataframe
        df_no_dups1 = pd.concat([df_uniq,ipf_uniq,raw_date_uniq,remain_date_uniq],axis=0,sort=False).reset_index(drop=True)
        df_no_dups = df_no_dups1.drop(columns='idx',axis=1)
       
        # 6: sort final dataframe
        self.df_archive_matching_no_dups = df_no_dups.sort_values(['Date','MasSwath','MasBurstNum'],
                                                                  ascending=[True,True,True])  
     
        return self.df_archive_matching_no_dups
        
    def first_full_scene(self):      ##### WILL NEED MODIFYING TO DEAL WITH ALL SCENES WHICH DON'T HAVE 12 BURSTS
        "Identify first scene which has 12 master bursts (use for resizing any scenes which don't have 12 bursts)"
    
        dates = self.df_archive_matching_no_dups.Date.unique()
        self.dates_list = dates.tolist()
        df_dates = pd.DataFrame([]) 
    
        for x in self.dates_list:
            iw1_b1 = self.df_archive_matching_no_dups.loc[(self.df_archive_matching_no_dups['Date'] == x) &
                                                          (self.df_archive_matching_no_dups['MasSwath'] == 'IW1') & 
                                                          (self.df_archive_matching_no_dups['MasBurstNum'] == 1)]
            iw1_b12 = self.df_archive_matching_no_dups.loc[(self.df_archive_matching_no_dups['Date'] == x) & 
                                                           (self.df_archive_matching_no_dups['MasSwath'] == 'IW1') & 
                                                           (self.df_archive_matching_no_dups['MasBurstNum'] == 12)]  

            # check if results are returned for both bursts 1 and 12, if append dates to new dataframe
            if iw1_b1.empty or iw1_b12.empty:
                pass
            else:
                df_dates = df_dates.append(iw1_b1,ignore_index=True)
                df_dates = df_dates.append(iw1_b12,ignore_index=True)
        df1 = df_dates.sort_values(by=['Date'])
        self.first_date = df1['Date'].iloc[0]

    def org_mas_bursts(self):
        "Summarise the original bursts and the equivalant master burst numbers"

        self.org_mas_bursts = pd.DataFrame([]) 

        for w in self.dates_list:
            date_rows = self.df_archive_matching_no_dups.loc[self.df_archive_matching_no_dups['Date'] == w]  
            for x in self.swath_list:
                swath_rows = date_rows.loc[date_rows['MasSwath'] == x]
                zip_files = swath_rows.ZipFile.unique()
                zip_list = zip_files.tolist() 
                for z in zip_list:
                    rows = swath_rows.loc[swath_rows['ZipFile'] == z]     
                    grid = rows.iloc[0,13]      
                    min_org_burst = rows.OrgBurstNum.min()
                    max_org_burst = rows.OrgBurstNum.max()
                    if min_org_burst == max_org_burst:
                        org_burst = ('%s' % min_org_burst)
                    else:
                        org_burst = ('%s-%s') %(min_org_burst,max_org_burst)
                    min_mas_burst = rows.MasBurstNum.min()
                    max_mas_burst = rows.MasBurstNum.max()
                    if min_mas_burst == max_mas_burst:
                        mas_burst = ('%s' % min_mas_burst)
                    else:
                        mas_burst = ('%s-%s') %(min_mas_burst,max_mas_burst)
    
                    df1 = pd.DataFrame([[w,x,org_burst,mas_burst,grid,z]],columns = ['Date','Swath','OrgBurst',
                                                                                     'MasBurst','GridDir','ZipFile'])
                    self.org_mas_bursts = self.org_mas_bursts.append(df1, ignore_index=True, sort=False)              

        return self.org_mas_bursts
            
    def max_bursts_swath(self):  
        "Determine the maximum bursts per swath for each zip file"
        
        df_max_bursts_swath1 = pd.DataFrame([]) 
        
        for w in self.dates_list:
            date_rows = self.df_archive_matching_no_dups.loc[self.df_archive_matching_no_dups['Date'] == w]     
            date_zip_files = date_rows.ZipFile.unique()
            date_zip_list = date_zip_files.tolist()

            for x in self.swath_list:
                swath_zip = self.df_archive.loc[self.df_archive['Swath'] == x]
                zip_files = swath_zip.ZipFile.unique()
                zip_list = zip_files.tolist() 

                for y in date_zip_list:
                    for z in zip_list:
                        if y == z:
                            rows = swath_zip.loc[swath_zip['ZipFile'] == y]
                            grid = rows.iloc[0,11]
                            max_burst = rows['MaxBurst'].max()
                            swath = ("MaxBurst%s" % x)
                            df1 = pd.DataFrame([[w, grid, z, max_burst]],columns = ['Date','GridDir','ZipFile', swath])
                            df_max_bursts_swath1 = df_max_bursts_swath1.append(df1,ignore_index=True,sort=False)     

            self.df_max_bursts_swath = df_max_bursts_swath1.groupby(['Date','GridDir',
                                                                         'ZipFile'])[['MaxBurstIW1',
                                                                                      'MaxBurstIW2',
                                                                                      'MaxBurstIW3'
                                                                                     ]].first().reset_index()   
        return self.df_max_bursts_swath
            
    def download_files(self):
        "Create list of files to download"
        
        self.download_files = self.df_max_bursts_swath[['Date','GridDir','ZipFile']]
           
    def tot_bursts(self):
        "Determine total number of bursts by swath for each date"

        self.df_dates_total = pd.DataFrame([]) 
    
        for w in self.dates_list:
            date_rows = self.df_max_bursts_swath.loc[self.df_max_bursts_swath['Date'] == w]
            IW1_sum = date_rows['MaxBurstIW1'].sum()
            IW2_sum = date_rows['MaxBurstIW2'].sum()
            IW3_sum = date_rows['MaxBurstIW3'].sum()
                                   
            df1 = pd.DataFrame([[w, IW1_sum, IW2_sum, IW3_sum]],columns = ['Date','TotBurstsIW1','TotBurstsIW2',
                                                                           'TotBurstsIW3'])
            self.df_dates_total = self.df_dates_total.append(df1, ignore_index=True, sort=False)          

        return self.df_dates_total            
            
    def subset(self):
        "Determine bursts to subset by after scenes are concatenated"
        
        df_subset = pd.DataFrame([]) 

        for x in self.dates_list:
            date_rows = self.org_mas_bursts.loc[self.org_mas_bursts['Date'] == x]     

            # check how many zip files required for date
            zip1 = date_rows.ZipFile.unique()
            num_zip = zip1.shape[0]            
            
            # check if identified zipfiles are required for all three swaths (they are required for concatenation, 
            # but may not be for subsetting to final frame)
            IW1_rows = date_rows.loc[date_rows['Swath'] == 'IW1']
            IW2_rows = date_rows.loc[date_rows['Swath'] == 'IW2']
            IW3_rows = date_rows.loc[date_rows['Swath'] == 'IW3']
            IW1_rows2 = IW1_rows.drop(['Date','OrgBurst','MasBurst','GridDir'], axis=1)
            IW2_rows2 = IW2_rows.drop(['Date','OrgBurst','MasBurst','GridDir'], axis=1)
            IW3_rows2 = IW3_rows.drop(['Date','OrgBurst','MasBurst','GridDir'], axis=1)
    
            # merge dataframes in two steps (can only merge two at a time)
            merge1 = pd.merge(IW1_rows2, IW2_rows2, on=['ZipFile'], how='outer')
            merge2 = pd.merge(merge1, IW3_rows2, on=['ZipFile'], how='outer')
    
            # rename columns after merging
            merge2.columns = ['SwathIW1','ZipFile','SwathIW2','SwathIW3']
            # reorder columns after merging
            zip_swath_check = (merge2[['ZipFile','SwathIW1','SwathIW2','SwathIW3']]).reset_index(drop=True)
            
            # if single zip file for date and there are missing swaths, exclude from final list
            if num_zip == 1 & zip_swath_check.isnull().values.any() == True:
                pass
            else:
                for y in self.swath_list:
                    swath_rows = date_rows.loc[date_rows['Swath'] == y]
                    # check if first identified zip file is required for selected swath (if not, need to account 
                    # for this in subset)
                    first_swath_zip = swath_rows.iloc[0]['ZipFile']

                    check = zip_swath_check.loc[zip_swath_check['ZipFile'] == first_swath_zip]
                    check_idx = check.index

                    if check_idx == 1: 
                        row = self.df_archive_subset.loc[(self.df_archive_subset['Date'] == x) & 
                                                         (self.df_archive_subset['Swath'] == y) &
                                                         (self.df_archive_subset['ZipFile'] == first_swath_zip) & 
                                                         (self.df_archive_subset['BurstNum'] == 1)]
                        missing_zip_max_burst = row.iloc[0]['MaxBurst']
                    else:    
                        missing_zip_max_burst = 0

                    mas_start_burst = swath_rows.iloc[0]['MasBurst']
                    org_start_burst = swath_rows.iloc[0]['OrgBurst']
         
                    if mas_start_burst.find('-') == -1:
                        # no hyphen, single burst
                        first_mas_num = int(mas_start_burst)  
                        first_org_num = int(org_start_burst) 
                        if first_mas_num == 1:  
                            if num_zip == 1:
                                subset = first_org_num 
                            else:
                                start_subset = first_org_num + missing_zip_max_burst
                                end_subset = start_subset + 11
                                subset = ('%s-%s') %(start_subset,end_subset)             
                        else:
                            if num_zip == 1:
                                subset = first_org_num 
                            else:    
                                start_subset = first_mas_num + missing_zip_max_burst
                                end_subset = start_subset + 11
                                subset = ('%s-%s') %(start_subset,end_subset)                     
                    else:
                        # hyphen 
                        first_mas_num = int(mas_start_burst.split('-')[0])  
                        first_org_num = int(org_start_burst.split('-')[0])  
                        # check if org burst is 1 if not, may need to adjust subset so it starts in right place
                        if first_mas_num == 1:  
                            if num_zip == 1:
                                start_subset = first_org_num 
                                end_subset = org_start_burst.split('-')[1] 
                            else:
                                start_subset = first_org_num + missing_zip_max_burst
                                end_subset = start_subset + 11
                            subset = ('%s-%s') %(start_subset,end_subset)
                        else:
                            if num_zip == 1:
                                start_subset = first_org_num 
                                end_subset = org_start_burst.split('-')[1]
                            else:    
                                start_subset = first_mas_num
                                end_subset = start_subset + 11
                            subset = ('%s-%s') %(start_subset,end_subset)                        
                    
                    swath = ("Subset%s" % y)               
                    df1 = pd.DataFrame([[x,subset]],columns = ['Date',swath])
                    df_subset = df_subset.append(df1, ignore_index=True, sort=False)          

            df_bursts = pd.merge(self.df_dates_total, df_subset, on="Date")   

        self.subset_bursts = df_bursts.groupby(['Date'])[['TotBurstsIW1','TotBurstsIW2','TotBurstsIW3',
                                                          'SubsetIW1','SubsetIW2','SubsetIW3']].first().reset_index()
        
        return self.subset_bursts                
                
    def finalise_subset(self):
        "Check maximum number of bursts and determine if scene is complete for each date, finalise subset list"
        df_check_max = pd.DataFrame([])
        
        for w in self.dates_list:
            row = self.df_dates_total.loc[self.df_dates_total['Date'] == w]
            IW1 = int(row.iloc[0]['TotBurstsIW1'])
            # check if complete frame (i.e. 12 bursts present)
            if IW1 < 12:
                complete = 'no'
            elif IW1 == 12: 
                max_burst1 = self.subset_bursts.iloc[0]['SubsetIW1'].split('-')[0]
                max_burst = int(max_burst1)
                if max_burst < 12:
                    complete = 'no'
                else:
                    complete = 'yes'
            else:
                complete = 'yes'
                       
            df1 = pd.DataFrame([[w, complete]],columns = ['Date','CompleteFrame'])
            
            df_check_max = df_check_max.append(df1, ignore_index=True, sort=False)          
    
        # merge final details
        self.subset_final = pd.merge(self.subset_bursts, df_check_max, on="Date")   
        
        return self.subset_final

    
    def centre_date(self):
        "Find centre date for use with resizing, DEM and slave coregistration"
        
        # extract dates with complete frames only
        complete = self.subset_final.loc[self.subset_final['CompleteFrame'] == 'yes']
        complete_list = list(complete['Date'])

        # convert to date format to find middle date
        dates = []
        for i in complete_list:
            date = datetime.datetime.strptime(str(i),'%Y%m%d').strftime('%d/%m/%Y')
            dates.append(date)
    
        # find middle date (copes with list with an odd numbered length)    
        mid_line = int(len(dates))//2
        mid_date = datetime.datetime.strptime(dates[mid_line],'%d/%m/%Y')
        self.centre_date = mid_date.strftime('%Y%m%d')

        return self.centre_date
    
    
    def gamma_output(self):
        "Save output to text file for input into GAMMA."
 
        now = datetime.datetime.now().strftime("%d-%b-%Y")
        pd.set_option('display.max_colwidth', -1) # stops concatenating dataframe contents when displayed
        pd.options.display.float_format = '{:,.0f}'.format # changes default display of int numbers from float to int

        # files to download
        download_string = self.download_files.to_string(index=None) 

        # subset bursts
        subset_string = self.subset_final.to_string(index=None) 

        # org versus master bursts
        org_mas_string = self.org_mas_bursts.to_string(index=None) 
        
        # zip file max bursts
        max_bursts_string = self.df_max_bursts_swath.to_string(index=None)

        # write results to text file
        temp_out_file = '%s/temp_scene_stack.txt' %(self.project_dir)
        out_file = '%s/S1_T%s%s_F%s_%s_scene_stack.txt' %(self.project_dir,self.track,self.orient2,self.frame,self.polar)

        if os.path.exists(out_file):
            os.remove(out_file)

        f = open(temp_out_file, 'w+')
        f.write("SENTINEL-1 SARA ARCHIVE RESULTS FOR GAMMA PROCESSING\n")
        f.write("List created: %s\n" % now)
        f.write("\n")
        f.write("TRACK: %s\n" % self.track)
        f.write("FRAME: %s\n" % self.frame)
        f.write("POLARISATION: %s\n" % self.polar)
        f.write("ORIENTATION: %s\n" % self.orient)
        f.write("DATE_RANGE: %s - %s\n" %(self.start,self.end))
        f.write("\n")      
        f.write("RESIZE_MASTER: %s\n" % self.first_date)
        f.write("COREG_MASTER: %s\n" % self.centre_date)
        f.write("\n")       
        f.write("DEM_COORDINATES(minX,minY,maxX,maxY): %s\n" % str(self.dem_coords))
        f.write("\n")
        f.write("FILES_TO_DOWNLOAD\n")
        f.write("%s\n" % download_string)
        f.write("\n")
        f.write("SUBSET_BURSTS\n")
        f.write("%s\n" % subset_string)
        f.write("\n")
        f.write("ORG_BURSTS_V_MASTER_BURSTS\n")
        f.write("%s\n" % org_mas_string)
        f.write("\n")      
        f.write("MAX_BURSTS\n")
        f.write("%s\n" % max_bursts_string)
        f.close()                            
        
        ## fix leading white space before dates
        temp_out = []
        with open(temp_out_file, 'r') as f:
            for line in f:
                if line.startswith(" 2"):
                    line2 = line.lstrip()
                    temp_out.append(line2)
                else:
                    temp_out.append(line)

        with open(out_file, 'w') as f:
            for line in temp_out:
                f.write(line)
        f.close()     

        os.remove(temp_out_file)
        

### Run Functions

In [None]:
obj = id_stack_scenes(burst_dir,project_dir,kml_dir,orient,orient2,track,frame,polar,start_period,end_period)
obj.subset_archive()
obj.dem_extent()
obj.remove_duplicates() 
obj.first_full_scene()
obj.org_mas_bursts() 
obj.max_bursts_swath() 
obj.download_files()
obj.tot_bursts() 
obj.subset() 
obj.finalise_subset()
obj.centre_date()
obj.gamma_output()

### If errors occur, need to troubleshoot. May be due to error in input archive metadata (e.g. wrong format)

Code below enables running functions step by step to isolate issue

In [None]:
# maximise column widths when displaying dataframe results
pd.set_option('display.max_colwidth', -1)

In [None]:
# functions

obj = id_stack_scenes(burst_dir,project_dir,kml_dir,orient,orient2,track,frame,polar,start_period,end_period)
df1 = obj.subset_archive() # self.df_archive_matching
df_coord = obj.dem_extent() # self.dem_coords
df2 = obj.remove_duplicates() # self.df_archive_matching_no_dups
obj.first_full_scene()
df3 = obj.org_mas_bursts() # self.org_mas_bursts
df4 = obj.max_bursts_swath() # self.df_max_bursts_swath
obj.download_files()
df5 = obj.tot_bursts() # self.df_dates_total 
df6 = obj.subset() # self.subset_bursts
df7 = obj.finalise_subset() #self.df_subset_final
df8 = obj.centre_date() #self.centre_date
#obj.gamma_output()

In [None]:
# variables below pre-set so if a function is extracted to be run line-by line (copied to new cell), 
# no changes to the variables are required

df_archive_matching = df1
df_archive_matching_no_dups = df2
#df_dates_total = df5
#org_mas_bursts = df3

swaths = df_archive_matching_no_dups.MasSwath.unique()
swath_list = swaths.tolist()
dates = df_archive_matching_no_dups.Date.unique()
dates_list = dates.tolist()


In [None]:
# paste copy of function to run line by line if requried