# Histogram of GOES-16, GOES-17, Empirical CDF, and AUC for Full disk samples in region of interest.

## Import libraries

In [55]:
import numpy as np
import scipy.stats as st
import itertools
import pandas as pd
# npPath = '/sharedData/scratch/all_npy3/'
# ncPath = '/sharedData/scratch/april_data/'
# acmPath = '/sharedData/scratch/all_npy3/'
# DATAPATH = '/sharedData/scratch/'
import requests
import re
import os
#import os.path as op
from os import path as op
import matplotlib.pyplot as plt
%matplotlib inline
import xarray as xr
import netCDF4
from pathlib import Path
from subprocess import Popen

import matplotlib.dates as mdates
from datetime import datetime

In [56]:
import logging
logger = logging.getLogger()

## Define paths

In [57]:
storage = Path('../storage/')

## Functions for unfiltered cloud mask

In [58]:
def Rad2BT(rad, planck_fk1, planck_fk2, planck_bc1, planck_bc2):
    """Radiances to Brightness Temprature (using black body equation)"""
    invRad = np.array(rad)**(-1)
    arg = (invRad*planck_fk1) + 1.0
    T = (- planck_bc1+(planck_fk2 * (np.log(arg)**(-1))) )*(1/planck_bc2) 
    return T

In [59]:
def createUnfilteredPlotArray(ncFile,npFile,npPath):#Filtered Histrogram for cloud clear sky mask
    Tmean= []
    times = []
    for ncf, npf in zip(ncFile, npFile):
        imageBox = np.load(op.join(npPath,npf))
        myFile = xr.open_dataset(op.join(ncPath,ncf))
        planck_fk1 = float(myFile['planck_fk1'].data)
        planck_fk2 = float(myFile['planck_fk2'].data) 
        planck_bc1 = float(myFile['planck_bc1'].data)                       
        planck_bc2 = float(myFile['planck_bc2'].data)     
        T = Rad2BT(imageBox.mean(), planck_fk1, planck_fk2, planck_bc1, planck_bc2)
        tString = ncf[31:38]
        times.append(tString)
        Tmean.append(T)
    return times, Tmean

In [60]:
def listurls(prefix,html):
    from bs4 import BeautifulSoup
    soup = BeautifulSoup(html.text)
    urllist = [elt['href'] for elt in soup.find_all(href=re.compile(prefix))]
    return urllist

In [None]:
def create_nc_Numpy(ncFile, pathOut):
    myFile = xr.open_dataset(ncFile)
    dat = myFile.metpy.parse_cf('Rad')
    geos = dat.metpy.cartopy_crs

    cartopy_extent_goes = geos.x_limits + geos.y_limits
    pyresample_extent_goes = (cartopy_extent_goes[0],
                                cartopy_extent_goes[2],
                                cartopy_extent_goes[1],
                                cartopy_extent_goes[3])
    goes_params = geos.proj4_params
    rad = dat.data
    
    def normIm(im,gamma=1.0,reverse=False):
        nim = ((im-np.nanmin(im))*(np.nanmax(im)-np.nanmin(im))**(-1))
        if reverse:#want clouds to be white
            nim = (1.0-nim**(gamma))
        return nim
    
    def goes_2_roi(geos_crs, 
               target_extent,
               target_rows,#actual length or base
               target_cols,#actual width or height
               cartopy_target_proj,
               data_key='Rad',
               radius_of_influence=50000):
        """Function that goes from loaded GOES data to data resampled in a projection for an extent"""
        cartopy_source_extent = geos_crs.x_limits + geos_crs.y_limits
        pyresample_source_extent = (cartopy_source_extent[0],
                                    cartopy_source_extent[2],
                                    cartopy_source_extent[1],
                                    cartopy_source_extent[3])
        rad = dat.data
        source_area = geometry.AreaDefinition('GOES-1X', 'Full Disk','GOES-1X', 
                                              geos_crs.proj4_params,
                                              rad.shape[1], rad.shape[0],
                                              pyresample_source_extent)
        area_target_def = geometry.AreaDefinition('areaTest', 'Target Region', 'areaTest',
                                            cartopy_target_proj.proj4_params,
                                            target_rows, target_cols,
                                            target_extent)
        geos_con_nn = image.ImageContainerNearest(rad, 
                                                source_area, 
                                                radius_of_influence=radius_of_influence)

        # Here we are using pyresample for the remapping
        area_proj_con_nn = geos_con_nn.resample(area_target_def)
        return area_proj_con_nn.image_data
        
    def cartopy_pyresample_toggle_extent(input_extent):
        return np.array(input_extent)[np.array([0,2,1,3])]

    def transform_cartopy_extent(source_extent,source_proj, target_proj):
        target_extent = target_proj.transform_points(source_proj, 
                                                     np.array(source_extent[:2]),
                                                     np.array(source_extent[2:])).ravel()
        # target_extent in 3D, must be in 2D
        return cartopy_pyresample_toggle_extent(np.array(target_extent)[np.array([0,1,3,4])])
    pc = ccrs.PlateCarree()
    mc = ccrs.Mercator()

    # Convert extent from pc to mc (both cylindrical projections)
    extent_pc = [-109.59326, -102.40674, 8.94659, -8.94656]
    
    target_extent_mc_cartopy = transform_cartopy_extent(extent_pc, pc, mc)
    target_extent_mc_pyresample = cartopy_pyresample_toggle_extent(target_extent_mc_cartopy)
    
    roi_rads = goes_2_roi(geos,
               target_extent_mc_pyresample,
               401,1001,
               mc)
    ####
    full_filename = op.join(pathOut,ncFile[:-3])
    np.save(full_filename,roi_rads)
    myFile.close()
    return

In [None]:
def download(filename,toPath, saveName):
    with open(filename, 'r') as fid:
        txt = fid.read()
    urls = txt.split()
    cmdlist = [ 'wget ' + url +' -P ' + toPath for url in urls +' -O '+ saveName]#if re.search('C07',url)
    print(len(urls),len(list(set(urls))),len(cmdlist))

    for url, cmd in zip(urls, cmdlist):
        print(cmd)
        if op.exists(url):
            continue
        pid = Popen(cmd, shell=True)
        pid.communicate()
    return

# Execution

In [63]:
templateURL = 'http://home.chpc.utah.edu/~u0553130/Brian_Blaylock/cgi-bin/'   \
    + 'goes16_download.cgi?source=aws&satellite=' \
    + 'noaa-goes{SS}&domain=F&product=ABI-L1b-Rad&date=20{yy}-{mm}-{dd}&hour={hr}'
Sat = 16
band = 8
year = 2020
month = 1
day = 1
hour = 0


search = itertools.product([Sat], [band], [year], [month], [day], [hour])

In [None]:
#check for GOES-16 and GOES-17 and plot what is available
for SS, bb, yy, mm, dd, hr in search:
    SS, bb, yy, mm, dd, hr =        
        str(SS).zfill(2),
        str(bb).zfill(2),
        str(yy).zfill(4),
        str(mm).zfill(2),
        str(dd).zfill(2),
        str(hr).zfill(2)
        
    '''Get URLS for download'''    
    req16 = requests.get(templateURL)
    SS = '17'
    req17 = requests.get(templateURL)
    if yy == '2018':
        product = '3'
    else:
        product = '6'
    prefix = f"OR_ABI-L1b-RadF-M{product}C" + bb
    prefix.format(**{'product':product})
    bandURLList16 = listurls(prefix,req16)#list of strings using beautiful soup
    bandURLList17 = listurls(prefix,req17)
    bandURLList16.sort()
    bandURLList17.sort()
    
    cnt = 0 #keep a counter to avoid crossing FD with similar file names and performance
    '''iterate over pairs of GOES NETCDF, download, and convert to numpy'''
    for i, FD16 in enumerate(req16): #Last string chars are of this format: c20192220009464.nc
        cYYYYDDDHHt = FD[-18:-9]
        for FD17 in req17[cnt:i]:
            if FD16[-18:-9] == FD17[-18:-9]:
                cnt +=1
                f17 = open(FD17,'w')
                download(op.join('buffer', f17),'buffer','nc17')
                npy17 = create_nc_Numpy(op.join('buffer', 'nc17'), 'buffer')
                f17.close()
                #break
            f16 = open(FD17,'w')
            download(op.join('buffer', f16),'buffer','nc16')
            npy16 = create_nc_Numpy(op.join('buffer', 'nc16'), 'buffer')
            f16.close()
 
        '''Create subplot figures and save'''
        fig, (ax1, ax2, ax3, ax4) = plt.subplots(2, 2)
        
        '''Histograms'''
#         ax.set_title(f'{hh}:{mm}')
#                 ax.set_ylabel('density (unitless)',fontsize = 16)
#                 ax.set_xlabel('Temperature (K)', fontsize = 16)

#                 Tbox = Rad2BT(imageBox, planck_fk1, planck_fk2, planck_bc1, planck_bc2)
#                 ax.hist(Tbox.ravel(), bins = bins, density = True, label = 'Temp dist.')
#                 #ax.plot([T,T], [0,0.3],'r', label = 'something')
#                 ax.axvline(T, color='r',label = 'NOAA avg.')
#             axes[0].legend(loc='upper left')
#         fig.savefig('..'+ op.join(DATAPATH,f"Histograms_filtered/G_{SS}_04-{str(8+dd).zfill(2)}-2019"))
        
        '''ECDF'''
        
        '''AUC'''
        
#     times17, mean17 = getTmean('17', npPath, band, day)
#     times16, mean16 = getTmean('16', npPath, band, day)

#     ptimes17 = [datetime.strptime(f"2019{t}", "%Y%j%H%M") for t in times17]
#     ptimes16 = [datetime.strptime(f"2019{t}", "%Y%j%H%M") for t in times16]
#     fig, ax1 = plt.subplots(figsize=(15,5))#constrained_layout=False
#     _ = ax1.plot_date(ptimes17, mean17 , label="GOES-17", color='tab:red', marker='+', markersize = 16)
#     _ = ax1.plot_date(ptimes16, mean16 , label='GOES-16', color='tab:blue', marker='+', markersize = 16, zorder=-1)
#     _ = ax1.set_ylim(220,260)
#     _ = ax1.tick_params('both', labelsize=18)
#     _ = ax1.legend()
#     _ = ax1.set_title(f'Mean Temperatures (K) for a 401 x 1001 Portion\n of Band 08 on 04-{str(8+j)}-2019', fontweight ='bold', fontsize = 22)
#     _ = ax1.set_ylabel('Temperature (K)',fontsize = 16)
#     _ = ax1.set_xlabel('time (hours)',fontsize = 16)
#     _ = ax1.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))


#     _ = plt.savefig('..'+ op.join(DATAPATH,f"Mean_temp_unfiltered/04-{str(8+j).zfill(2)}-2019"))


In [64]:
string = 'c20192220009464.nc'

In [65]:
len(string)

18

In [69]:
string[-18:-9]

'c20192220'

In [70]:
string[:100]

'c20192220009464.nc'