# 4.2: Sort missed data streams
Notebook 4.1 created text files containing the reference designator and data stream pairs that were not added to the missed test CSVs. These data streams either didn't have a matching test in the Gross Range and Climatology Test lookup tables, or the data for the first available deployment could not be loaded into the workspace. 
This notebook completes the following tasks:
    <ul><li>Sort strings of refdes-datastream pairs into one of two lists based on the issue printed to output</li>
    </ul>

In [1]:
# Import libraries
import re

In [2]:
def find_no_dataset(path_to_file):
    # Lists all data streams where data from the first
    # available deployment did not load in notebook
    # 4.1.
    file = open(path_to_file, "r")
    all_missing = list()
    for text in file.readlines():
        if "No dataset available" in text:
            missing = re.findall(".{8}-.{5}-.{2}-.{9}-[a-z_0-9]+", text)
            all_missing.append(missing)
    return all_missing

def find_no_qc_lookup(path_to_file):
    # Lists all data streams where no qcConfig objects
    # remained after filtering in notebook 4.1.
    file = open(path_to_file, "r")
    all_missing = list()
    for text in file.readlines():
        if "No existing qc-lookup table" in text:
            missing = re.findall(".{8}-.{5}-.{2}-.{9}-[a-z_0-9]+", text)
            all_missing.append(missing)
    return all_missing

def filter_streams(missing_qcConfig):
    # Remove streams where an automated
    # QC test is not expected according
    # to keywords in the stream name.
    drop = ["metadata", "diagnostic", "blank"]
    drop_mask = missing_qcConfig in drop
    missing_qcConfig = missing_qcConfig[drop_mask]
    return missing_qcConfig

In [3]:
array = "GI"
site = "GI05MOAS"
path = f"../data/processed/{array}_tests_completed1/{site}-typescript.txt"
# path = f"../data/processed/{array}_tests_completed2/{site}-output.txt"

In [4]:
no_deploy = find_no_dataset(path)
no_deploy

[['GI05MOAS-GL365-04-CTDGVM000-ctdgv_m_glider_instrument']]

In [5]:
no_qcConfig = find_no_qc_lookup(path)
no_qcConfig

[['GI05MOAS-PG575-06-PARADM000-parad_m_glider_instrument'],
 ['GI05MOAS-PG575-06-PARADM000-parad_m_glider_instrument'],
 ['GI05MOAS-PG575-05-NUTNRM000-nutnr_m_glider_instrument'],
 ['GI05MOAS-PG575-05-NUTNRM000-nutnr_m_glider_instrument'],
 ['GI05MOAS-PG575-04-FLORTO000-flort_o_glider_data'],
 ['GI05MOAS-PG575-04-FLORTO000-flort_o_glider_data'],
 ['GI05MOAS-PG575-03-FLORTM000-flort_m_sample'],
 ['GI05MOAS-PG575-03-FLORTM000-flort_m_sample'],
 ['GI05MOAS-PG575-02-DOSTAM000-dosta_abcdjm_glider_instrument'],
 ['GI05MOAS-PG575-02-DOSTAM000-dosta_abcdjm_glider_instrument'],
 ['GI05MOAS-PG575-01-CTDGVM000-ctdgv_m_glider_instrument'],
 ['GI05MOAS-PG575-01-CTDGVM000-ctdgv_m_glider_instrument'],
 ['GI05MOAS-PG575-00-ENG000000-glider_eng_metadata'],
 ['GI05MOAS-PG575-00-ENG000000-glider_eng_sci_telemetered'],
 ['GI05MOAS-PG575-00-ENG000000-glider_eng_telemetered'],
 ['GI05MOAS-PG575-00-ENG000000-glider_gps_position'],
 ['GI05MOAS-PG575-00-ENG000000-glider_eng_metadata'],
 ['GI05MOAS-PG575-00-ENG

In [8]:
# Filter for untested streams
def remove_streams(stream_list, phrase):
    filtered_list = [x for x in stream_list if phrase not in x[0]]
    return filtered_list

sans_power = remove_streams(no_qcConfig, "power")
sans_metadata = remove_streams(sans_power, "metadata")
sans_blank = remove_streams(sans_metadata, "blank")
sans_diagnostic = remove_streams(sans_blank, "diagnostic")
sans_dcleng = remove_streams(sans_diagnostic, "dcl_eng")
sans_cpmeng = remove_streams(sans_dcleng, "cpm_eng")
sans_methourly = remove_streams(sans_cpmeng, "metbk_hourly")
sans_hyd = remove_streams(sans_methourly, "hyd_o")
sans_wavfourier = remove_streams(sans_hyd, "wavss_a_dcl_fourier")
sans_wavmotion = remove_streams(sans_wavfourier, "wavss_a_dcl_motion")
sans_wavnondir = remove_streams(sans_wavmotion, "wavss_a_dcl_non_dir")
sans_mopakrate = remove_streams(sans_wavnondir, "mopak_o_dcl_rate")
sans_presfwav = remove_streams(sans_mopakrate, "wave_burst")
sans_wfpeng = remove_streams(sans_presfwav, "wfp_eng")
sans_offset = remove_streams(sans_wfpeng, "offset")
sans_sioeng = remove_streams(sans_offset, "sio_eng")
sans_gleng = remove_streams(sans_sioeng, "glider_eng")
sans_glgps = remove_streams(sans_gleng, "glider_gps")
sans_adcpcon = remove_streams(sans_glgps, "adcp_config")
sans_phctrl = remove_streams(sans_adcpcon, "imodem_control")
filtered_noQC = sans_phctrl
filtered_noQC

[['GI05MOAS-PG575-06-PARADM000-parad_m_glider_instrument'],
 ['GI05MOAS-PG575-06-PARADM000-parad_m_glider_instrument'],
 ['GI05MOAS-PG575-05-NUTNRM000-nutnr_m_glider_instrument'],
 ['GI05MOAS-PG575-05-NUTNRM000-nutnr_m_glider_instrument'],
 ['GI05MOAS-PG575-04-FLORTO000-flort_o_glider_data'],
 ['GI05MOAS-PG575-04-FLORTO000-flort_o_glider_data'],
 ['GI05MOAS-PG575-03-FLORTM000-flort_m_sample'],
 ['GI05MOAS-PG575-03-FLORTM000-flort_m_sample'],
 ['GI05MOAS-PG575-02-DOSTAM000-dosta_abcdjm_glider_instrument'],
 ['GI05MOAS-PG575-02-DOSTAM000-dosta_abcdjm_glider_instrument'],
 ['GI05MOAS-PG575-01-CTDGVM000-ctdgv_m_glider_instrument'],
 ['GI05MOAS-PG575-01-CTDGVM000-ctdgv_m_glider_instrument'],
 ['GI05MOAS-PG566-06-PARADM000-parad_m_glider_instrument'],
 ['GI05MOAS-PG566-05-NUTNRM000-nutnr_m_glider_instrument'],
 ['GI05MOAS-PG566-04-FLORTO000-flort_o_glider_data'],
 ['GI05MOAS-PG566-03-FLORTM000-flort_m_sample'],
 ['GI05MOAS-PG566-02-DOSTAM000-dosta_abcdjm_glider_instrument'],
 ['GI05MOAS-PG56