In [None]:
import numpy as np

import os
from os import listdir
from os.path import isfile, join

import shlex, subprocess

from skimage.transform import rescale
from skimage.measure import block_reduce

from datetime import datetime, date, time, timedelta
from dateutil import parser
import time
from time import process_time

from sunpy.net import Fido
from sunpy.net.vso import attrs as avso
from sunpy.time import TimeRange #parse_time,

import astropy.units as u
from astropy.io import fits

import h5py

import csv

from tqdm import tqdm_notebook as tqdm

In [None]:
def readfits(filename):
    try:
        ft = fits.open(filename, memmap=False)
        hdr = ft[0].header
        data = ft[0].data
        axis1 = hdr['naxis1']
        axis2 = hdr['naxis2']
        axisnum = hdr['naxis']
        ft.close()
    
    except ValueError:
        axis1 = 1
        axis2 = 2
        axisnum = 3
        data = None
            
    return axis1,axis2,data,axisnum,hdr

def writefits(filename, data, home_dir):
    if not os.path.exists(f'{home_dir}{filename}.fits'):
        fitsname = fits.PrimaryHDU(data)
        fitsname.writeto(f'{home_dir}{filename}.fits')

def holes(filename):
    filename = str(filename)
    
    ft = fits.open(filename, memmap=False)
    hdr = ft[0].header
    data = ft[0].data
    ft.close()

    try:
        x_coord = hdr['CRPIX1']
        y_coord = hdr['CRPIX2']
    
    except KeyError:
        x_coord = hdr['naxis1'] / 2.
        y_coord = hdr['naxis2'] / 2.

    y_ind,x_ind = np.indices((hdr['naxis1'],hdr['naxis2']))
    rsquared = (x_ind - x_coord)**2 + (y_ind - y_coord)**2
    
    matches = ['96m', 'MDI']
    
    if 'efz' in filename: #good for all EIT products 
        rad = x_coord*np.sqrt(2)
        indices = np.where(rsquared.flatten() < rad**2)[0]
        zeros_ind = np.where(data.flatten()[indices] == 0.)[0]
        zeros_ind_len = len(zeros_ind)

        if zeros_ind_len > 100:
            return True #so image not useable as there are holes
        else:
            return False #can use this image
    
    elif any([x in filename for x in matches]):
        rad1 = float(x_coord)
        rad2 = 0.6*float(x_coord)
        indices_rad1 = np.where(rsquared.flatten() < rad1**2)[0]
        indices_rad2 = np.where(rsquared.flatten() < rad2**2)[0]
        zeros_ind = np.where(data.flatten()[indices_rad1] == 0.)[0]
        nan_ind = np.where(data.flatten()[indices_rad2] != data.flatten()[indices_rad2])[0]
        zeros_nan_ind_len = len(list(zeros_ind) + list(nan_ind))
        
        if zeros_nan_ind_len > 100:
            return True #so image not useable as there are holes
        else:
            return False #can use this image

    elif 'LASCO_C3' in filename:
        #print('LASCO_C3')
        rad = 0.8*x_coord
        indices = np.where(rsquared.flatten() < rad**2)[0]
        zeros_ind = np.where(data.flatten()[indices] == 0.)[0]
        zeros_ind_len = len(zeros_ind)  
        
        if zeros_ind_len > 100:
            return True #so image not useable as there are holes
        else:
            return False #can use this image   

    
    elif 'LASCO_C2' in filename:
        #print('LASCO_C2')
        rad1 = 160 #this seems good
        #print('rad1:', rad1)
        rad2 = int(x_coord)
        indices = np.where((rad2**2 > rsquared.flatten()) & (rsquared.flatten() > rad1**2))[0]
        zeros_ind = np.where(data.flatten()[indices] == 0.)[0]
        zeros_ind_len = len(zeros_ind)
     
        if zeros_ind_len > 100:
            return True #so image not useable as there are holes
        else:
            return False #can use this image
        

def data_reducer(data,flag,target_dimension,axis1_shape):
    scale_factor = int(axis1_shape/target_dimension)
    
    if flag == 'subsample':
        reduced_data = data[::scale_factor].T[::scale_factor].T #subsampling image; every other row,column
    elif flag == 'interp': #linear interpolation with anti_aliasing and range preserving
        reduced_data = rescale(data, (1/scale_factor), order=1, anti_aliasing=True, preserve_range=True)
    elif flag == 'minpool': #min pooling each block
        reduced_data = block_reduce(data, block_size=(scale_factor,scale_factor), func=np.min)
    elif flag == 'maxpool': #max pooling each block
        reduced_data = block_reduce(data, block_size=(scale_factor,scale_factor), func=np.max)
    
    return reduced_data


def downsample_header(header, factor):
    """
    Downsample FITS header using the FITS convention correspomnding to the downsampling of images
    """

    header_new = header.copy()

    header_new.update(NAXIS1 = header_new['NAXIS1']/factor, NAXIS2 = header_new['NAXIS1']/factor)
    header_new.update(CDELT1 = header_new['CDELT1']*factor, CDELT2 = header_new['CDELT2']*factor)
    header_new.update(CRPIX1 = header_new['CRPIX1']/factor, CRPIX2 = header_new['CRPIX2']/factor)
    header_new.update(RSUN_OBS = header_new['RSUN_OBS']/factor, R_SUN = header_new['R_SUN']/factor)
    header_new.update(X0 = header_new['X0']/factor, Y0 = header_new['Y0']/factor)
    header_new.update(CROP_RAD = header_new['CROP_RAD']/factor)


    return header_new

def prev_time_resumer(home_dir, base, time_range_orig, date_time_end): 
    #CAN RE-RUN PROGRAM FROM THE LAST DATE ON WHICH STOPPED; WILL PICK UP THE TIMES THAT ARE PRESENT AND CONTINUE!
    # CHECKS WHETHER THE START DAY THAT ENTERED IS ALREADY CONTAINED IN THE FILES OF PREVIOUS DAY AND START_DATE FROM THAT EXACT TIME! 
    # ALSO THIS WORKS IF START ON A NEW DAY AND ARE LOOKING BACK ON THE PREVIOUS DAY.
    
    print('base:', base)
    filepath = home_dir + base + '/'

    data_files_pre = [f for f in listdir(filepath) if isfile(join(filepath, f))]
    data_files = np.sort(data_files_pre)
    
    if len(data_files) != 0:
        prev_time_pre = data_files[-1]
        if 'EIT' in str(prev_time_pre):
            prev_time = [prev_time_pre.split('_')[2]]
        else:
            prev_time = [prev_time_pre.split('_')[3]]  
            
        time_orig_pre = str(time_range_orig.start)
        time_orig = ''.join(time_orig_pre.split(' ')[0].split('-'))
        
        if str(prev_time[0][0:8]) == time_orig:
            time_begin = prev_time[0]
            time_range = TimeRange(time_begin, date_time_end)
        else:
            time_range = time_range_orig            
    
    elif len(data_files) == 0:
        prev_time = []
        time_range = time_range_orig   
    
    return prev_time, time_range


def date_name_maker(date_name):

    date_name_chunks = [date_name[i:i+2] for i in range(0,len(date_name),2)]
    date_name_new = ''.join(date_name_chunks[0:2])+'-'+'-'.join(date_name_chunks[2:4])+'-'+':'.join(date_name_chunks[4:7])
    
    return date_name_new


def data_name_selector(home_dir, base, date_start, date_finish):

    print('base:', base)
    filepath = home_dir + base + '/'

    data_files_pre = [f for f in listdir(filepath) if isfile(join(filepath, f))]
    data_files = np.sort(data_files_pre)
    print('len(data_files):', len(data_files)) 
    
    if len(data_files) != 0: 
        time_start_name_pre = data_files[0] 
        time_finish_name_pre = data_files[-1]
        
        if 'EIT' in str(time_start_name_pre):
            time_start_name = str(time_start_name_pre.split('_')[2])
            time_finish_name = str(time_finish_name_pre.split('_')[2])        
        else:         
            time_start_name = str(time_start_name_pre.split('_')[3])
            time_finish_name = str(time_finish_name_pre.split('_')[3])
            
        time_start_name_new = date_name_maker(time_start_name)
        time_finish_name_new = date_name_maker(time_finish_name)
    
    else: 
        time_start_name_new = date_start 
        time_finish_name_new = date_finish  
        
    return time_start_name_new, time_finish_name_new


def data_cuber(home_dir, base, date_start, date_finish, flag, time_window, target_dimension):

    print('base:', base)
    filepath = home_dir + base + '/'

    data_files_pre = [f for f in listdir(filepath) if isfile(join(filepath, f))]
    data_files = np.sort(data_files_pre) #to have chronological order and to sink order with list of individual product times
    print('len(data_files):', len(data_files))
    
    data_content_list = []
    header_down_list=[]
    for elem in data_files:
        axdim1,axdim2,data_content,axisnum,hdr = readfits(f'{filepath}{elem}')
        hdr_down=downsample_header(hdr,int(axdim1/target_dimension))
        if 'SOHO' in elem:
            data_content_list.append(data_content)
            header_down_list.append(hdr_down)

    if data_content_list:
        data_content_stack = np.stack(data_content_list)

    else:
        data_content_stack = []
    
    time_start_name_new, time_finish_name_new = data_name_selector(home_dir, base, date_start, date_finish)
    
    data_content_stack = np.stack(data_content_list)
    header_down_stack = np.stack(header_down_list)
    data_cube = h5py.File(f'{home_dir}{time_start_name_new}_to_{time_finish_name_new}_{base}_{flag}_{time_window}_{target_dimension}.h5', 'w')
    data_cube.create_dataset(f'{base}_{target_dimension}', data=data_content_stack, compression="gzip")
    data_cube.create_dataset(f'{base}_{target_dimension}_header', data=header_down_stack, compression="gzip")

    data_cube.close()
                            
    return data_cube


In [None]:
def product_search(base,time_range,date_time_start):
    if 'EIT' in base:
        wavelen = int(base[3:6])
        product_results = Fido.search(avso.Time(time_range,date_time_start),avso.Source('SOHO'),avso.Instrument('EIT'),avso.Provider('SDAC'),avso.Wavelength(wavelen * avso.u.Angstrom, wavelen * avso.u.Angstrom))
    
    elif 'MDI' in base:
        product_results = Fido.search(avso.Time(time_range,date_time_start),avso.Source('SOHO'),avso.Instrument('MDI'),avso.Provider('SDAC'),avso.Physobs('LOS_MAGNETIC_FIELD'))
    
    elif 'LASCO' in base:
        detector = base.split('_')[1]
        product_results = Fido.search(avso.Time(time_range,date_time_start),avso.Provider('SDAC'),avso.Source('SOHO'),avso.Instrument('LASCO'),avso.Detector(detector))
    
    return product_results

In [None]:
def index_of_sizes(base,product_results):
    
    matches = ['171', '304', '284']
    
    if 'EIT195' in base:
        size_list = [elem['size'] for elem in product_results.get_response(0)[:]]
        print(np.unique(size_list), len(size_list))
        ind_2059 = np.where(np.array(size_list) == 2059)[0]
        ind_523 = np.where(np.array(size_list) == 523)[0]
        print(len(ind_2059))
        print(len(ind_523))
        ind = np.sort(list(ind_2059) + list(ind_523)) #important to sort here since combining two lists!
        print(len(ind))
        
    elif 'MDI' in base:
        size_list = [elem['size'] for elem in product_results.get_response(0)[:]]
        print(np.unique(size_list), len(size_list))
        ind = np.where(np.array(size_list) == 4115.0)[0]
        print(len(ind))        
        
    elif 'LASCO' in base:
        size_list = [int(np.ceil(elem['size'] / 100.0))*100 for elem in product_results.get_response(0)[:]]
        print(np.unique(size_list), len(size_list))
        ind = np.where(np.array(size_list) == 2100.0)[0] 
        print(len(ind))
        
    elif any([x in base for x in matches]):
        size_list = [elem['size'] for elem in product_results.get_response(0)[:]]
        print(np.unique(size_list), len(size_list))
        ind = np.where(np.array(size_list) == 2059)[0]        
        print(len(ind))
        
    return ind
   

In [None]:
def fetch_indices(base,ind,product_results,time_window,look_ahead,prev_time):
    
    all_size_sieved_times_pre = [] #local list to populate at each loop
    all_time_window_sieved_times_product_times = []  #local list to populate at each loop

    for value in ind:
        all_size_sieved_times_pre.append(product_results.get_response(0)[int(value)]['time']['start'])
    all_size_sieved_times = list(np.unique(all_size_sieved_times_pre))
    all_size_sieved_times_aug = prev_time + all_size_sieved_times #prev_time = [] for the very first loop and [last best time from previous loop] for subsequent loops.

    if all_size_sieved_times_aug:
        if prev_time:
            local_time_range = TimeRange(all_size_sieved_times_aug[0],timedelta(hours=time_window)).next() #next() is the important difference here.
        else:
            local_time_range = TimeRange(all_size_sieved_times_aug[0],timedelta(hours=time_window))       
        for i,time_value in enumerate(all_size_sieved_times_aug):
            if time_value in local_time_range:
                all_time_window_sieved_times_product_times.append(time_value)
                local_time_range = TimeRange(time_value,timedelta(hours=time_window)).next() #important distinction between this local_time_range and the intializing one is the presence of time_value          
            elif parser.parse(time_value) > local_time_range.end: 
                all_time_window_sieved_times_product_times.append(time_value)
                local_time_range = TimeRange(time_value,timedelta(hours=time_window)).next()
            else:
                continue 
                
    new_inds = [np.where(np.array(all_size_sieved_times_pre) == entry)[0][0] for entry in all_time_window_sieved_times_product_times]
    fetch_indices_product = ind[new_inds]
    
    return all_size_sieved_times_pre, fetch_indices_product
    

In [None]:
def product_retriever(base,product_results,indiv_ind,url_prefix,home_dir):
    
    fileid = product_results.get_response(0)[int(indiv_ind)]['fileid']
    item_wget =  url_prefix + fileid
    cmd = 'wget' + ' ' + '--retry-connrefused' + ' ' + '--waitretry=1' + ' ' + '--read-timeout=20' + ' ' + '--timeout=15' + ' ' + '-t' + ' ' + '0' + ' ' + '--continue' + ' ' + item_wget + ' ' + '-P' + ' ' + f'{home_dir}{base}'     
    args = shlex.split(cmd)    
    
    try: 
        wget_output = subprocess.check_output(args, stderr=subprocess.STDOUT)
    except subprocess.CalledProcessError as err:
        print('Error output:\n', err.output) 
        print('sleep for 15 minutes and then retry command')
        time.sleep(900)
        wget_output = subprocess.check_output(args, stderr=subprocess.STDOUT)
        
    downloaded_fileid = fileid.split('/')[-1]
    query_result = [f'{home_dir}{base}/{downloaded_fileid}']
    
    return query_result

In [None]:
def product_distiller(fetch_indices_product_orig, base, all_size_sieved_times_pre, ind, product_results, look_ahead, time_window, url_prefix, flag, target_dimension, home_dir):
    
    holes_product_list = []
    unreadable_file_ids_product_list = []
    
    all_time_window_sieved_times_product_times = []
    all_time_window_sieved_times_product_times_inds_list = []
    
    fetch_indices_product = fetch_indices_product_orig.copy()
    
    for i,elem in enumerate(fetch_indices_product):
        if (i > len(fetch_indices_product)-1): #fetch_indices_product is modified in the program to account for times corresponding to holes in images. When all fitting times exhausted then break out of loop.
            break         
        indiv_ind = fetch_indices_product[i]
        query_result = product_retriever(base,product_results,indiv_ind,url_prefix,home_dir) #item -> indiv_ind is a member of fetch_indices_product
        axis1_product, axis2_product, data_product, axisnum_product = readfits(query_result[0])
    
        if (data_product is not None) and (axis1_product == axis2_product) and (axisnum_product == 2):

            if not holes(query_result[0]): #so if not True; so no holes; can use image
                reduced_product_data = data_reducer(data_product,flag,target_dimension,axis1_product)
                time_data = product_results.get_response(0)[int(indiv_ind)]['time']['start']
                writefits(f'{base}/SOHO_{base}_{time_data}_{target_dimension}', reduced_product_data, home_dir)
                os.remove(query_result[0]) #delete original downloaded file
                all_time_window_sieved_times_product_times.append(time_data)
                all_time_window_sieved_times_product_times_inds_list.append(indiv_ind)

            elif holes(query_result[0]): #so if True, if there are holes
                time_data = product_results.get_response(0)[int(indiv_ind)]['time']['start'] 
                hole_loc = url_prefix + product_results.get_response(0)[int(indiv_ind)]['fileid']                       
                holes_product_list.append((hole_loc, str(time_data)))
                hole_time_val = product_results.get_response(0)[int(indiv_ind)]['time']['start']
                os.remove(query_result[0]) #delete original downloaded file
                ind_timespickup = np.where(np.array(all_size_sieved_times_pre) == hole_time_val)[0][0]
                zoomed_time_range = TimeRange(str(hole_time_val),timedelta(hours=time_window))

                fetch_inds_to_try_list = [] 
                #the zeroth entry didn't have it so that's why plus 1 in the brackets
                for time_val in all_size_sieved_times_pre[ind_timespickup+1: ind_timespickup + look_ahead]:
                    if time_val in zoomed_time_range: #this is the next fitting time in the list, slightly less than 2hrs seperated theoretically
                        ind_next_good_time = np.where(np.array(all_size_sieved_times_pre) == time_val)[0][0]
                        fetch_indices_next_good = ind[ind_next_good_time]
                        fetch_inds_to_try_list.append(fetch_indices_next_good)

                for index in fetch_inds_to_try_list:
                    query_result_next = product_retriever(base,product_results,index,url_prefix,home_dir)
                    axis1_next_good,axis2_next_good,data_next_good,axisnum_next_good = readfits(query_result_next[0])

                    if (data_next_good is not None) and (axis1_next_good == axis2_next_good) and (axisnum_next_good == 2):

                        if not holes(query_result_next[0]): #so if not True; so no holes; can use image
                            reduced_product_data = data_reducer(data_next_good,flag,target_dimension,axis1_next_good)
                            time_data = product_results.get_response(0)[int(index)]['time']['start']
                            writefits(f'{base}/SOHO_{base}_{time_data}_{target_dimension}', reduced_product_data, home_dir)

                            all_time_window_sieved_times_product_times.append(time_data) #(time_val) #unsorted time location
                            all_time_window_sieved_times_product_times_inds_list.append(index)
                            os.remove(query_result_next[0]) #delete original downloaded file
                            
                            indiv_ind_modified_list = []
                            localized_time_range = TimeRange(str(time_data),timedelta(hours=time_window)).next() 
                            print('localized_time_range:', localized_time_range)
                            for tval in all_size_sieved_times_pre: 
                                if parser.parse(tval) < localized_time_range.start:
                                    continue 
                                elif tval in localized_time_range:
                                    ind_time_new = np.where(np.array(all_size_sieved_times_pre) == tval)[0][0]
                                    indiv_ind_modified_new = ind[ind_time_new]
                                    indiv_ind_modified_list.append(indiv_ind_modified_new)
                                    localized_time_range = TimeRange(str(tval),timedelta(hours=time_window)).next()                                
                                else:
                                    ind_time_new = np.where(np.array(all_size_sieved_times_pre) == tval)[0][0]
                                    indiv_ind_modified_new = ind[ind_time_new]            
                                    next_orig_index = np.where(np.array(fetch_indices_product_orig) == np.array(indiv_ind_modified_new))[0]
                                    if len(next_orig_index) != 0:
                                        indiv_ind_modified_new = fetch_indices_product_orig[next_orig_index[0]]
                                        ind_next_index = np.where(np.array(ind) == indiv_ind_modified_new)[0][0]
                                        tval = all_size_sieved_times_pre[ind_next_index]
                                        indiv_ind_modified_list.append(indiv_ind_modified_new)
                                        localized_time_range = TimeRange(str(tval),timedelta(hours=time_window)).next()
                                    else:
                                        ind_time_new = np.where(np.array(all_size_sieved_times_pre) == tval)[0][0]
                                        indiv_ind_modified_new = ind[ind_time_new]
                                        indiv_ind_modified_list.append(indiv_ind_modified_new)
                                        localized_time_range = TimeRange(str(tval),timedelta(hours=time_window)).next()
                                    
                            print('indiv_ind_modified_list:', indiv_ind_modified_list)
                            
                            if indiv_ind_modified_list:
                                fetch_indices_product = list(np.zeros(i+1)) + list(indiv_ind_modified_list) #trick to add zeros to maintain same length as original fetch_indices_product
                            else:
                                fetch_indices_product = list(np.zeros(i+1))
                            break

                        elif holes(query_result_next[0]): #so if True, if there are holes
                            time_data = product_results.get_response(0)[int(index)]['time']['start']
                            hole_loc = url_prefix + product_results.get_response(0)[int(index)]['fileid']
                            holes_product_list.append((hole_loc, str(time_data)))
                            os.remove(query_result_next[0])
                            continue 

                    elif (data_next_good is None) or (axis1_next_good != axis2_next_good) or (axisnum_next_good != 2):
                        unreadable_file_ids_product_list.append(product_results.get_response(0)[int(index)]['fileid'])
                        os.remove(query_result_next[0])
                        continue


        elif (data_product is None) or (axis1_product != axis2_product) or (axisnum_product != 2):
            unreadable_file_ids_product_list.append(product_results.get_response(0)[int(indiv_ind)]['fileid'])
            bad_time_val = product_results.get_response(0)[int(indiv_ind)]['time']['start']
            os.remove(query_result[0])
            ind_timespickup = np.where(np.array(all_size_sieved_times_pre) == bad_time_val)[0][0]
            zoomed_time_range = TimeRange(str(bad_time_val),timedelta(hours=time_window))

            fetch_inds_to_try_list = [] #gets reset for each new item
            for time_val in all_size_sieved_times_pre[ind_timespickup+1: ind_timespickup + look_ahead]:
                if time_val in zoomed_time_range: #this is the next fitting time in the list, slightly less than 2hrs seperated theoretically
                    ind_next_good_time = np.where(np.array(all_size_sieved_times_pre) == time_val)[0][0]
                    fetch_indices_next_good = ind[ind_next_good_time]
                    fetch_inds_to_try_list.append(fetch_indices_next_good)

            for index in fetch_inds_to_try_list:
                query_result_next = product_retriever(base,product_results,index,url_prefix,home_dir)
                axis1_next_good,axis2_next_good,data_next_good, axisnum_next_good = readfits(query_result_next[0])

                if (data_next_good is not None) and (axis1_next_good == axis2_next_good) and (axisnum_next_good == 2):

                    if not holes(query_result_next[0]): #so if not True; so no holes; can use image
                        reduced_product_data = data_reducer(data_next_good,flag,target_dimension,axis1_next_good)
                        time_data = product_results.get_response(0)[int(index)]['time']['start']
                        writefits(f'{base}/SOHO_{base}_{time_data}_{target_dimension}', reduced_product_data, home_dir)

                        all_time_window_sieved_times_product_times.append(time_data) #(time_val) #unsorted time location
                        all_time_window_sieved_times_product_times_inds_list.append(index)
                        os.remove(query_result_next[0])
                        
                        indiv_ind_modified_list = []
                        localized_time_range = TimeRange(str(time_data),timedelta(hours=time_window)).next()
                        for tval in all_size_sieved_times_pre:
                            if parser.parse(tval) < localized_time_range.start:
                                continue 
                            elif tval in localized_time_range:
                                ind_time_new = np.where(np.array(all_size_sieved_times_pre) == tval)[0][0]
                                indiv_ind_modified_new = ind[ind_time_new]
                                indiv_ind_modified_list.append(indiv_ind_modified_new)
                                localized_time_range = TimeRange(str(tval),timedelta(hours=time_window)).next()                                
                            else:
                                ind_time_new = np.where(np.array(all_size_sieved_times_pre) == tval)[0][0]
                                indiv_ind_modified_new = ind[ind_time_new]   
                                next_orig_index = np.where(np.array(fetch_indices_product_orig) == np.array(indiv_ind_modified_new))[0]
                                if len(next_orig_index) != 0:
                                    indiv_ind_modified_new = fetch_indices_product_orig[next_orig_index[0]]
                                    ind_next_index = np.where(np.array(ind) == indiv_ind_modified_new)[0][0]
                                    tval = all_size_sieved_times_pre[ind_next_index]
                                    indiv_ind_modified_list.append(indiv_ind_modified_new)
                                    localized_time_range = TimeRange(str(tval),timedelta(hours=time_window)).next()
                                else:
                                    ind_time_new = np.where(np.array(all_size_sieved_times_pre) == tval)[0][0]
                                    indiv_ind_modified_new = ind[ind_time_new]
                                    indiv_ind_modified_list.append(indiv_ind_modified_new)
                                    localized_time_range = TimeRange(str(tval),timedelta(hours=time_window)).next()

                        print('indiv_ind_modified_list:', indiv_ind_modified_list)
                        
                        if indiv_ind_modified_list:
                            fetch_indices_product = list(np.zeros(i+1)) + list(indiv_ind_modified_list)
                        else:
                            fetch_indices_product = list(np.zeros(i+1))
                        break

                    elif holes(query_result_next[0]): #so if True, if there are holes
                        time_data = product_results.get_response(0)[int(index)]['time']['start']
                        hole_loc = url_prefix + product_results.get_response(0)[int(index)]['fileid']
                        holes_product_list.append((hole_loc, str(time_data)))
                        os.remove(query_result_next[0])
                        continue 

                elif (data_next_good is None) or (axis1_next_good != axis2_next_good) or (axisnum_next_good != 2):
                    unreadable_file_ids_product_list.append(product_results.get_response(0)[int(index)]['fileid'])
                    os.remove(query_result_next[0])
                    continue
    
    all_time_window_sieved_times_product_times_modified = all_time_window_sieved_times_product_times

    return all_time_window_sieved_times_product_times_modified, holes_product_list, unreadable_file_ids_product_list

In [None]:
"""
Keeps first track of all times of all fits files gathered.
"""
def csv_writer(base, home_dir, date_start, date_finish, flag, time_window, target_dimension, all_time_window_sieved_times_sorted):
    with open(f'{home_dir}{date_start}_to_{date_finish}_{base}_times_{flag}_{time_window}_{target_dimension}.csv', 'a') as f: #appending lines so not overwriting the file
        writer = csv.writer(f, delimiter='\n')
        writer.writerow(all_time_window_sieved_times_sorted)


In [None]:
'''def main(date_start, date_finish, target_dimension, time_increment, time_window, flag, home_dir, bases):''' 
    
date_start = '1996-01-01'
date_finish = '2011-05-01'
target_dimension = 128
time_window = 6
flag = 'subsample'
home_dir = '/home/carl/Documents/'
bases = 'EIT195, MDI_96m, LASCO_C2, LASCO_C3, EIT171, EIT304, EIT284' #or a subset of these products

date_time_pre_start = date_start + '-0000'
date_time_start= parser.parse(date_time_pre_start)
print('date_time_start:', date_time_start)

date_time_pre_end = date_finish + '-2359'
date_time_end = parser.parse(date_time_pre_end)
print('date_time_end:', date_time_end)

print('target_dimension:', target_dimension)
print('flag:', flag)
print('home_dir:', home_dir)

time_increment = 60

url_prefix = 'https://seal.nascom.nasa.gov/'
print('url_prefix:', url_prefix)

look_ahead = int(np.ceil(time_window*60/10.)) #should sufficiently cover all 7 products based on their cadence.
print('look_ahead:', look_ahead)

diff_start_finish_total_sec = (date_time_end - date_time_start).total_seconds()
print('diff_start_finish_total_sec:', diff_start_finish_total_sec)

total_sec = timedelta(days = time_increment).total_seconds()
print('total_sec:', total_sec)

num_loops = np.ceil(diff_start_finish_total_sec/total_sec) + 1 #num_loops would be equal to 94 + 1 for 19960101-0000' - '20110501-0000'; discete number of loops so go over rather than under
print('num_loops:', num_loops)

base_list = bases.split(',')
for base in tqdm(base_list):
    
    start_process_time = process_time() #initialize clock per product type 
    
    base = base.strip(' ')
    holes_list = []
    unreadable_file_ids_product_list_global = []
    
    print(f'***{base}***')
    base_dir = home_dir + base
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)    

    time_range = TimeRange(date_time_start, timedelta(days = time_increment)) #time_range re-initialized here
    #print('time_range:', time_range)

    prev_time, time_range_modified = prev_time_resumer(home_dir, base, time_range, date_time_end)
    for t_value in tqdm(np.arange(num_loops)): #main workhorse loop
        print('t_value:', t_value)
        print('prev_time:', prev_time)
        
        if time_range_modified.end > date_time_end:
            time_range_modified = TimeRange(time_range_modified.start, date_time_end)
            
        product_results = product_search(base,time_range_modified,date_time_start)
        product_results_number = product_results.file_num
        if product_results_number != 0:
            ind = index_of_sizes(base,product_results)
            all_size_sieved_times_pre, fetch_indices_product_orig = fetch_indices(base,ind,product_results,time_window,look_ahead,prev_time)          
            
            if len(fetch_indices_product_orig) != 0:
                
                all_time_window_sieved_times_product_times_modified, holes_product_list, unreadable_file_ids_product_list_local = product_distiller(fetch_indices_product_orig, base, all_size_sieved_times_pre, ind, product_results, look_ahead, time_window, url_prefix, flag, target_dimension, home_dir)
                
                if holes_product_list:
                    holes_list.append(holes_product_list)

                if unreadable_file_ids_product_list_local:
                    unreadable_file_ids_product_list_global.append(unreadable_file_ids_product_list_local)

                all_time_window_sieved_times_sorted = np.unique(all_time_window_sieved_times_product_times_modified)

                print(f'{base} np.unique(all_size_sieved_times_pre):', np.unique(all_size_sieved_times_pre), len(np.unique(all_size_sieved_times_pre)))
                print(f'{base} list(all_time_window_sieved_times_sorted):', list(all_time_window_sieved_times_sorted), len(all_time_window_sieved_times_sorted))

                prev_time = [] #reset to empty list
                if len(all_time_window_sieved_times_sorted) != 0:
                    prev_time.append(all_time_window_sieved_times_sorted[-1])

                csv_writer(base,home_dir, date_start, date_finish, flag, time_window, target_dimension, all_time_window_sieved_times_sorted)

        
        time_range_modified.next() #Sunpy iterator to go for the next 2 months #also have time_range_modified.previous() to go back.    
        #print('time_range_modified next:', time_range_modified)
        
    print(f'{base} holes_list', holes_list)
    print(f'{base} unreadable_file_ids_product_list_global:', unreadable_file_ids_product_list_global)

    data_cuber(home_dir, base, date_start, date_finish, flag, time_window, target_dimension)
        
    end_process_time = process_time()
    time_of_process = end_process_time - start_process_time
    print(f'{base} time of process in seconds:', time_of_process)