# Output timestamps of radiometer spikes in the 5-min time series for the QC/cal_files

## Jacquelyn Witte NCAR/EOL

### May 2022

In [1]:
import os
import csv

import numpy as np
import pathlib
import paramiko
import pandas as pd
import re
import xarray as xr

import warnings
warnings.filterwarnings('ignore')

#pd.set_option('display.max_columns', None)
#pd.set_option('display.max_rows', None)

In [2]:
# create the client
ssh = paramiko.SSHClient()
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(hostname='barolo.eol.ucar.edu',
            username='username goes here',
            key_filename='path to ssh key goes here',
            port=22)

In [3]:
remotepath = '/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/*nc'
localpath = os.path.join(pathlib.Path.home(),
                            'projects', 'swex', 'isfs', 'data')

# open an ftp channel
sftp_client = ssh.open_sftp()

# gather all the file into stdout
stdin, stdout, stderr = ssh.exec_command('ls '+remotepath)

# remove white spaces and \n newline characters. It's annoying.
nc_files = [line.strip() for line in stdout.readlines()]

# Print the 1st 5 files as a sanity check
nc_files[0:5]

['/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220315.nc',
 '/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220316.nc',
 '/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220317.nc',
 '/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220318.nc',
 '/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220319.nc']

In [4]:
# Define the sites and radiometer variables
# SWEX
sites = ['s'+str(i) for i in np.arange(18)+1]
desired_var = ['Rpile_in', 'Rpile_out', 'Rsw_in', 'Rsw_out']

In [6]:
# initialize a list
spikes_list = []
# Set the threshold
threshold = 5000.

# loop over each file
for remotefile in nc_files:
    # copy file to local directory
    localfile = os.path.join(localpath, os.path.basename(remotefile))
    sftp_client.get(remotefile, localfile, prefetch=True)
    
    data_xr = xr.open_dataset(localfile)
    print(remotefile)
    
#     date_in = pd.to_datetime(remotefile.split('/')[-1].split('_')[1].split('.')[0])
#     time = pd.date_range(start=date_in, freq='5T', periods=len(data_xr['time'])).strftime('%Y %b %d %H:%M:%S')

    # Convert to a dataframe and index to date only
    data_df = data_xr.drop_dims('sites').to_dataframe()

    # Find the radiation variables in the file
    keys = data_df.columns
    rad_var = []
    for v in desired_var:
        rad_var.extend([x for x in keys if re.search(v, x)])

    # Loop over the variables and collect the spike info
    for var in rad_var:
        res = data_df[[var]][abs(data_df[var]) > threshold]
        if len(res) > 0: spikes_list.append(res)

#     spikes_df = pd.DataFrame()
#     for var in rad_var:
#         res = data_df[var][data_df[var] > threshold]
#         if len(res) > 0: 
#             spikes_df = pd.concat([spikes_df, res.to_frame()]) 

/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220315.nc
/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220316.nc
/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220317.nc
/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220318.nc
/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220319.nc
/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220320.nc
/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220321.nc
/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220322.nc
/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220323.nc
/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220324.nc
/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220325.nc
/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220326.nc
/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220327.nc
/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220328.nc
/net/isf/isff/projects/SWEX/ISFS/netcdf/noqc_geo/isfs_20220329.nc
/net/isf/i

In [7]:
spikes_list

[                     Rpile_in_s1
 time                            
 2022-03-16 01:57:30 -1388589.625,
                      Rpile_in_s11
 time                             
 2022-03-17 22:57:30  -195592.8125,
                      Rpile_out_s9
 time                             
 2022-03-17 23:22:30   402598.3125,
                      Rpile_out_s18
 time                              
 2022-03-17 20:47:30    1175083.875,
                      Rpile_out_s10
 time                              
 2022-03-18 04:02:30   -98272.84375,
                      Rpile_out_s16
 time                              
 2022-03-20 10:22:30  -42394.300781,
                      Rpile_out_s18
 time                              
 2022-03-21 08:12:30 -218148.671875,
                      Rpile_in_s3
 time                            
 2022-03-22 13:12:30 -1399164.625,
                      Rpile_out_cr_s4
 time                                
 2022-03-22 03:57:30    -21537.056641,
                      Rpile_in_

In [10]:
# Export the list 
# Convert a list into rows for a column in csv
# output_csv = os.path.join(pathlib.Path.home(),'projects', 'swex', 'isfs',
#                          'swex_radiometer_spikes.csv')
# with open('swex_radiometer_spikes.csv', 'w', newline='') as csv_1:
#     csv_out = csv.writer(csv_1)
#     csv_out.writerows([spikes_list[index]] for index in range(0, len(spikes_list)))
    
# Easier to export using numpy
np.savetxt('swex_radiometer_spikes.csv', 
           spikes_list,
           fmt ='% s')

In [9]:
# close the connection
sftp_client.close()
ssh.close()