# Step 1 to create GOES data timeseries

- Creates files with lists of all GOES files in 1 day

- Run this before creating GOES timeseries data

- Set start date

- Set number of days to increment through (does 1 day at a time)

This primarily supports creating the historical GOES data. This has to access 288 files per day, and with all those requests, there is risk of failure. Having a static list of filenames to go through was a quick solution to be able to make sure we started again at the same place we ended when the program died.

You can check what data is available here: [https://noaa-goes16.s3.amazonaws.com/index.html#ABI-L2-MCMIPC/]()



In [2]:
#create goes data
from goes2go import GOES
import xarray as xr
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

#import configuration location and filepath
from myconfig import *
output_path = output_path_goes_fname

def get_start_end_time(fname):
    #goes filenames structure https://geonetcast.wordpress.com/2017/04/27/goes-16-file-naming-convention/
    #use filename to find start/end times for data
    tem = str(fname).split('/')
    tem2,i = tem[5],25
    dt_start = datetime.strptime(tem2[i:i+13], '%Y%j%h%M%S')
    tem2,i = tem[5],41
    dt_end = datetime.strptime(tem2[i:i+13], '%Y%j%h%M%S')
    return dt_start,dt_end


In [7]:
now = datetime.now()
start_year = int(now.strftime("%Y"))
start_month = int(now.strftime("%m"))
start_day = int(now.strftime("%d"))
start_hour = int(now.strftime("%H"))
print(start_year, start_month, start_day,start_hour)

2024 10 24 16


In [10]:
# Open the GOES-R image
G = GOES(satellite=16, product="ABI-L2-MCMIPC", domain='C')  #ABI-L2-DMWVC ABI-L2-DMWC not at all points #
for incr in range(0,number_days_to_process):

    date_start = datetime(start_year, start_month, start_day) - timedelta(days=incr)
    if incr == 0:
        date_end = now #date_start + timedelta(days=1)  
    else:
        date_end = date_start + timedelta(days=1)  
    start_time = date_start.strftime("%Y%m%d")
    end_time = date_end.strftime("%Y%m%d %H:%M")
    
    print(start_time,end_time)
    # Produce a pandas DataFrame of the available files in a time range
    df = G.df(start=start_time, end=end_time)
    print(len(df), df.file[0])
    # Save DataFrame to a CSV file
    files_fname = output_path+'goes_filenames_test_'+start_time+'-'+end_time+'.csv'
    df.to_csv(files_fname)


20241024 20241024 16:30
198 noaa-goes16/ABI-L2-MCMIPC/2024/298/00/OR_ABI-L2-MCMIPC-M6_G16_s20242980001174_e20242980003547_c20242980004062.nc
20241023 20241024 00:00
287 noaa-goes16/ABI-L2-MCMIPC/2024/297/00/OR_ABI-L2-MCMIPC-M6_G16_s20242970001174_e20242970003547_c20242970004059.nc
20241022 20241023 00:00
288 noaa-goes16/ABI-L2-MCMIPC/2024/296/00/OR_ABI-L2-MCMIPC-M6_G16_s20242960001173_e20242960003559_c20242960004069.nc
20241021 20241022 00:00
288 noaa-goes16/ABI-L2-MCMIPC/2024/295/00/OR_ABI-L2-MCMIPC-M6_G16_s20242950001173_e20242950003559_c20242950004062.nc
20241020 20241021 00:00
288 noaa-goes16/ABI-L2-MCMIPC/2024/294/00/OR_ABI-L2-MCMIPC-M6_G16_s20242940001172_e20242940003551_c20242940004074.nc
20241019 20241020 00:00
288 noaa-goes16/ABI-L2-MCMIPC/2024/293/00/OR_ABI-L2-MCMIPC-M6_G16_s20242930001172_e20242930003545_c20242930004068.nc
20241018 20241019 00:00
288 noaa-goes16/ABI-L2-MCMIPC/2024/292/00/OR_ABI-L2-MCMIPC-M6_G16_s20242920001171_e20242920003550_c20242920004066.nc
20241017 2024

# ONLY RUN CODE BELOW HERE FOR 2021
FIX FOR 2021 where day 243 hour 16 is missing on cloud

In [None]:
#FIX FOR 2021 where day 243 hour 16 is missing on cloud

date_start = datetime(start_year, start_month, start_day) + timedelta(days=incr)
date_end = date_start + timedelta(hours=15)  
start_time = date_start.strftime("%Y%m%d %H:00")
end_time = date_end.strftime("%Y%m%d %H:59")
    
print(start_time,end_time)
    
    # Open the GOES-R image
G = GOES(satellite=16, product="ABI-L2-MCMIPC", domain='C')  #ABI-L2-DMWVC ABI-L2-DMWC not at all points #
    # Produce a pandas DataFrame of the available files in a time range
df = G.df(start=start_time, end=end_time)

date_start = datetime(start_year, start_month, start_day) + timedelta(days=incr) 
date_start = date_start + timedelta(hours=17)  
date_start2 = datetime(start_year, start_month, start_day) + timedelta(days=incr)
date_end = date_start2 + timedelta(days=1)
start_time = date_start.strftime("%Y%m%d %H:00")
end_time = date_end.strftime("%Y%m%d %H:00")
    
print(start_time,end_time)
    
    # Open the GOES-R image
G = GOES(satellite=16, product="ABI-L2-MCMIPC", domain='C')  #ABI-L2-DMWVC ABI-L2-DMWC not at all points #
    # Produce a pandas DataFrame of the available files in a time range
df2 = G.df(start=start_time, end=end_time)
df3 = pd.concat([df, df2], ignore_index=True)

date_start = datetime(start_year, start_month, start_day) + timedelta(days=incr)
date_end = date_start + timedelta(days=1)  
start_time = date_start.strftime("%Y%m%d")
end_time = date_end.strftime("%Y%m%d")

files_fname = output_path+'goes_filenames_test_'+start_time+'-'+end_time+'.csv'
df3.to_csv(files_fname)
