In [3]:
# Here I'm importing all the libraries necessaries to run my analysis

import datetime
import matplotlib.dates as mdates
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.colors as clr
import netCDF4 as nc
import os
from typing import List

# The fog() function is responsbible to identify the occurrence of fog during the dawn. Basically, it'll return TRUE or FALSE
# depending on the dataset. The inputs for this function are: the date and time when the measure was made (datas),  
# information about cloud occurence (nuvens), the time range we are analyzing (hi1, hf1), the cloud maximum height 
# and the minimum quantity of clouds detected during the time range so we could consider fog occurrence in that dawn 

def fog(datas, nuvens, hi1=0, hf1=9, altmin=200, Nmin=240):

# Transforming the inputs into a valid time in datetime    

    ti1 = datetime.time(hi1,0,0)
    tf1 = datetime.time(hf1,0,0)
    
# Using list comprehension to transform each of measures (date and time) into a valid datetime
    
    t = np.array([adate.time() for adate in datas])

# The variable mask it's an array that will receive the result of the comparison between every value in t (the array we've 
# just created) with the limits of the time  range, returning TRUE (if the value is within the time range) or FALSE (if the 
# value isn't within the time range)
    
    mask = ((ti1 <= t) & (t <= tf1))

# Selecting only clouds detected during the time range considered    
    
    selecao = nuvens[mask, :]

# Selecting only clouds classified as fog
    
    bases = selecao[:, 0] < altmin
    
# Counting the number of clouds detected
    
    N = np.nansum(bases)

# The function returns two pieces of information: if the number of detected clouds (fog - clouds with a maximum height of 200 m)
# is greater than the established minimum (Nmin) and the number of detected clouds (N)
    
    return N>=Nmin, N

# The converte_real() function is responsible for convert the datetime values to float (otherwise the function to create 
# the histogram would not work)

def converte_real(horario):
    hora = float(horario[0:2])
    minuto = float(horario[3:5])
    segundo = float(horario[7:])
    
    minuto = minuto/60
    segundo = segundo/(60*60)
    
    return hora+minuto+segundo

# The function

def quality_signal(estacao):
    
    listaOK = []
    listaERR = []

    for file in estacao:

        ds = nc.Dataset(file)

        betaraw = ds['beta_raw'][:]

        signal_hour = []

        for signal in betaraw:
            signal_hour.append(signal.mean())

        aux1 = []
        means = []

        i = 1
        for data in signal_hour: 
            if i != 240:
                aux1.append(data)
                i+=1
            else:
                i = 1
                means.append(sum(aux1)/len(aux1))
                aux1 = []

        if len(means)==24 and min(means)>=0:
            listaOK.append(file)
        elif len(means)==24 and min(means)<0:
            listaERR.append(file)
        else:
            listaERR.append(file)
            
        return listaOK, listaERR