# Creating timeseries of the GOES data for 8 speakers

- 8 channels (need 8 timeseries)
- 12 minute piece, GOES samples every 5-minutes

The artist wanted a viewer to feel like weather was moving through the room. 

This means we must think about time and space.

Space:
So, the 8 speakers should correspond to 8 different locations around the viewer. The spatial scale of the data variablity will need to be explored so we can determine the right distance these locations should be. Maybe if they are only 1 km apart, there isn't enough of a 'difference' for the sound to feel immersive.  But if the locations are too far apart, then they won't be correlated to eachother and may just sound like 8 unconnected sounds.

Time: 
GOES CONUS data has 5-minute sampling. 
The piece is 12 minutes long - so if we went with realtime data, we would have 2 data points that could be interpolated together,
but basically two gradient tones during 12 minutes is not very dynamic, 
but that might be okay? The other issues is that I think this surface data is meant to be dominate during only the first 3 minutes,
so maybe we are only really looking at 3 minutes of gradients. If that is the case, in order to get a sense of weather, 
it might be necessary to create a compressed timeseries of data over the last hour or so.

GOES:

GOES has full disk, CONUS, mesoscale options as well as many different products.
I'm using CONUS below as it has high spatial resolution, 5-minute sampling.

## Steps in code below:
- The code looks at two different GOES datasets. We don't really want one with cloud masks (missing data). 
- Georeference the coordinate system so we can look up data around a latitude and longitude
- Calculate 8 points near a reference location (Miami)
- Create timeseries of data at those points 

In [5]:
#from goes2go import GOES
#import matplotlib.pyplot as plt
from datetime import datetime, timedelta
import warnings
import numpy as np
warnings.filterwarnings('ignore')
#import cartopy.crs as ccrs
#import cartopy.feature as cfeature
import pandas as pd
import s3fs
import xarray as xr
import os

#list of GOES products https://github.com/blaylockbk/goes2go/blob/main/goes2go/product_table.txt

# Calculate latitude and longitude from GOES ABI fixed grid projection data
#GOES ABI fixed grid projection is a map projection relative to the GOES satellite  
#Units: latitude in °N (°S < 0), longitude in °E (°W < 0)  
#See GOES-R Product User Guide (PUG) Volume 5 (L2 products) Section 4.2.8 for details & example of calculations  
#"file_id" is an ABI L1b or L2 .nc file opened using the netCDF4 library  
#code from https://www.star.nesdis.noaa.gov/atmospheric-composition-training/python_abi_lat_lon.php  
#Acknowledgement:  NOAA/NESDIS/STAR Aerosols and Atmospheric Composition Science Team  
#Their code is written for numpy arrays not xarray, so I updated it to work with xarray datasets  

#import configuration location and filepath
from myconfig import *
output_path = output_path_data

# Target latitude and longitude MIAMI
#target_lat = 19.71361111111111  # Example: Latitude of Milwaukee Trench
#target_lon = -67.31083333333333  # Example: Longitude of Milwaukee Trench

#read the dataset so we know what we have already ready or not
#gfile = xr.open_dataset("./data/goes_data_time.nc").load()
#gfile.close()

def calculate_degrees(file_id):
    # Read in GOES ABI fixed grid projection variables and constants
    x_coordinate_1d = file_id.variables['x'][:]  # E/W scanning angle in radians
    y_coordinate_1d = file_id.variables['y'][:]  # N/S elevation angle in radians
    projection_info = file_id.variables['goes_imager_projection']
    lon_origin = projection_info.attrs.get('longitude_of_projection_origin')
    H = projection_info.attrs.get('perspective_point_height')+projection_info.attrs.get('semi_major_axis')
    r_eq = projection_info.attrs.get('semi_major_axis')
    r_pol = projection_info.attrs.get('semi_minor_axis')
    
    # Create 2D coordinate matrices from 1D coordinate vectors
    x_coordinate_2d, y_coordinate_2d = np.meshgrid(x_coordinate_1d, y_coordinate_1d)
    
    # Equations to calculate latitude and longitude
    lambda_0 = (lon_origin*np.pi)/180.0  
    a_var = np.power(np.sin(x_coordinate_2d),2.0) + (np.power(np.cos(x_coordinate_2d),2.0)*(np.power(np.cos(y_coordinate_2d),2.0)+(((r_eq*r_eq)/(r_pol*r_pol))*np.power(np.sin(y_coordinate_2d),2.0))))
    b_var = -2.0*H*np.cos(x_coordinate_2d)*np.cos(y_coordinate_2d)
    c_var = (H**2.0)-(r_eq**2.0)
    r_s = (-1.0*b_var - np.sqrt((b_var**2)-(4.0*a_var*c_var)))/(2.0*a_var)
    s_x = r_s*np.cos(x_coordinate_2d)*np.cos(y_coordinate_2d)
    s_y = - r_s*np.sin(x_coordinate_2d)
    s_z = r_s*np.cos(x_coordinate_2d)*np.sin(y_coordinate_2d)
    
    # Ignore numpy errors for sqrt of negative number; occurs for GOES-16 ABI CONUS sector data
    np.seterr(all='ignore')
    
    abi_lat = (180.0/np.pi)*(np.arctan(((r_eq*r_eq)/(r_pol*r_pol))*((s_z/np.sqrt(((H-s_x)*(H-s_x))+(s_y*s_y))))))
    abi_lon = (lambda_0 - np.arctan(s_y/(H-s_x)))*(180.0/np.pi)
    
    return abi_lat, abi_lon

def forward_fill_2d(arr):
    # Loop through each column
    for i in range(arr.shape[1]):
        mask = np.isnan(arr[:, i])
        # Forward fill NaN values
        arr[mask, i] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), arr[~mask, i])
    return arr

def find_nearest_indices(lat_arr, lon_arr, target_lat, target_lon):
    # Find the nearest latitude index
    lat_idx = (np.abs(lat_arr - target_lat)).argmin()
    # Find the nearest longitude index
    lon_idx = (np.abs(lon_arr - target_lon)).argmin()
    return lat_idx, lon_idx

def calculate_points(istep,lon_idx,lat_idx):
    #how big do we want to have the box?
    #istep is how many grid points away from the center that we want to go   
    # List of points you want to subset around point x
    #   *  *  *
    #   *  x  *
    #   *  *  *
    #north_point = [lat_idx+istep,lon_idx]
    #east_point = [lat_idx,lon_idx+istep]
    #south_point = [lat_idx-istep,lon_idx]
    #west_point = [lat_idx,lon_idx-istep]
    #northeast_point = [lat_idx+istep,lon_idx+istep]
    #northwest_point = [lat_idx+istep,lon_idx-istep]
    #southeast_point = [lat_idx-istep,lon_idx+istep]
    #southwest_point = [lat_idx-istep,lon_idx-istep]
    points = [
        {"i": int(lon_idx), "j": int(lat_idx)+istep, "name": 'N'},
        {"i": int(lon_idx)+istep, "j": int(lat_idx)+istep, "name": 'NE'},
        {"i": int(lon_idx)+istep, "j": int(lat_idx), "name": 'East'},
        {"i": int(lon_idx)+istep, "j": int(lat_idx)-istep, "name": 'SE'},
        {"i": int(lon_idx), "j": int(lat_idx)-istep, "name": 'S'},
        {"i": int(lon_idx)-istep, "j": int(lat_idx)-istep, "name": 'SW'},
        {"i": int(lon_idx)-istep, "j": int(lat_idx), "name": 'W'},
        {"i": int(lon_idx)-istep, "j": int(lat_idx)+istep, "name": 'NW'},
    ]
    return points

def get_start_end_time(fname):
    #goes filenames structure https://geonetcast.wordpress.com/2017/04/27/goes-16-file-naming-convention/
    #use filename to find start/end times for data
    tem = str(fname).split('/')
    tem2,i = tem[5],25
    dt_start = datetime.strptime(tem2[i:i+13], '%Y%j%h%M%S')
    tem2,i = tem[5],41
    dt_end = datetime.strptime(tem2[i:i+13], '%Y%j%h%M%S')
    return dt_start,dt_end

def already_read(fname,gfile):
    start_time,end_time = get_start_end_time(fname)
    isum = gfile.read_value.loc[start_time:end_time].sum().data
    if isum>0:
        return True
    else:
        return False

#def set_already_read(fname,gfile):
#    start_time,end_time = get_start_end_time(fname)
#    gfile.read_value.loc[start_time:end_time] = 1
#    gfile.to_netcdf("./data/goes_data_time.nc")
#    return gfile


# now we have some data points, we need to create a timeseries of data

goes2go downloads the data before reading it  
since we are looking at timeseries and there are like 288 files each day (5 min data)  
i don't want to download all that data  
so i'm trying to figure out if I can lazy load it  


In [6]:
# connect to S3 bucket with data
#start_time='20231206'
#end_time='20231213'
#files_fname = './../../goes_filenames_test_'+start_time+'-'+end_time+'.csv'
#df = pd.read_csv(files_fname)
#df.file[0]
fs = s3fs.S3FileSystem(anon=True) #connect to s3 bucket!
#file_location=df.file
#file_ob = [fs.open('s3://'+file) for file in file_location]        #open connection to files
#print(file_ob[0])

In [9]:
for incr in range(0,number_days_to_process):

    date_start = datetime(start_year, start_month, start_day) + timedelta(days=incr)
    date_end = date_start + timedelta(days=1)  

    start_time = date_start.strftime("%Y%m%d")
    end_time = date_end.strftime("%Y%m%d")
    files_fname = output_path+'goes_filenames_test_'+start_time+'-'+end_time+'.csv'
    df = pd.read_csv(files_fname)

    file_location=df.file
    file_ob = [fs.open('s3://'+file) for file in file_location]        #open connection to files
    print(file_ob[0])

#    if incr==3:
#        iii=209
#        ds = xr.open_dataset(".\..\goes_timeseries_20231209-20231210.nc")
#        ds.load()
#        ds.close()
#    else:
#        iii=1
    
    for i in range(1,len(file_ob)):
        fname = file_ob[i]
    #    if already_read(fname,gfile):
    #        continue    
        ds = xr.open_dataset(file_ob[i]) #note file is super messed up formatting
        #ds2 = ds#.isel(time=0)
        #calculate lat/lon
        abi_lat, abi_lon = calculate_degrees(ds)
        abi_lat = forward_fill_2d(abi_lat.copy())
        abi_lon = forward_fill_2d(abi_lon.copy())
        #note this isn't going to be perfect because used 1d so run one time, then again with closer values
        # Find nearest indices
        lat_idx, lon_idx = find_nearest_indices(abi_lat[:,0], abi_lon[0,:], target_lat, target_lon)
        lat_idx, lon_idx = find_nearest_indices(abi_lat[:,lon_idx], abi_lon[lat_idx,:], target_lat, target_lon)
        stime,etime = get_start_end_time(fname)
        ds_tem = ds.isel(y=slice(lat_idx-25,lat_idx+25),x=slice(lon_idx-25,lon_idx+25)).CMI_C10.load()
        for istep in range(1,20):
            points = calculate_points(istep,25,25) #lon_idx,lat_idx)
            point_data=ds_tem.isel(y=points[0].get('j'), x=points[0].get('i')) #, method="nearest")
            for p in range(len(points)):
                if p>0:
                    tem=ds_tem.isel(y=points[p].get('j'), x=points[p].get('i')) #, method="nearest")
                    point_data = xr.concat([point_data, tem], dim="points_index")
            if istep>1:
                step_data = xr.concat([step_data,point_data], dim="step")
            else:
                step_data = point_data
        if i>1:
            all_data = xr.concat([all_data,step_data], dim="time")
        else:
            all_data = step_data
        nc_fname = output_path+'goes_timeseries_lat'+target_lat+'_lon'+target_lon+'_time_'+start_time+'-'+end_time+'.nc'
        csv_fname = output_path+'goes_timeseries_lat'+target_lat+'_lon'+target_lon+'_time_'+start_time+'-'+end_time+'.csv'
        all_data.to_netcdf(nc_fname)
    
        all_data_df = all_data.to_dataframe()
        all_data_df.to_csv(csv_fname)
        
        #gfile = set_already_read(fname,gfile)
        print(incr,i)

<File-like object S3FileSystem, noaa-goes16/ABI-L2-MCMIPC/2023/341/00/OR_ABI-L2-MCMIPC-M6_G16_s20233410001174_e20233410003558_c20233410004065.nc>


TypeError: can only concatenate str (not "float") to str