In [1]:
from pathlib import Path
import math
import pandas as pd
from neptoon.quality_assesment.quality_assesment import (
    QualityAssessmentFlagBuilder,
    FlagRangeCheck,
    FlagSpikeDetectionUniLOF,
)

from neptoon.data_management.crns_data_hub import CRNSDataHub

from neptoon.data_management.site_information import SiteInformation

from neptoon.neutron_correction.neutron_correction import (
    CorrectionType,
    CorrectionTheory,
)
from neptoon.neutron_correction.correction_classes import Correction

from neptoon.data_management.data_audit import (
    DataAuditLog,
)
from neptoon.data_ingest_and_formatting.data_ingest import (
    ManageFileCollection, 
    ParseFilesIntoDataFrame,
    FormatDataForCRNSDataHub,
)

In [2]:
DataAuditLog.create()

<neptoon.data_management.data_audit.DataAuditLog at 0x10777e170>

In [None]:
# zip_file = (
#     Path().parent / "tests" / "data_ingest_and_formatting" / 
#     "mock_data" / "CRNS-station_data-Hydroinnova-A.zip"
# )

# file_manager = ManageFileCollection(data_location=zip_file)
# file_manager.get_list_of_files() 
# file_manager.filter_files() 

In [None]:
# file_parser = ParseFilesIntoDataFrame(file_manager)
# data = file_parser.make_dataframe()

In [None]:
# data_formatter = FormatDataForCRNSDataHub(data_frame=data)
# data

In [3]:
def import_crns_dataframe_and_format(filename):
    """
    This is a pseudo function that will eventually be replaced by the
    ingest routines. For now it converts a sample dataset into a format
    (which we can update later).
    """
    cwd = Path.cwd()
    crns_df_path = cwd / "tests" / "sample_crns_data" / filename
    crns_df = pd.read_csv(crns_df_path)
    crns_df["date_time_utc"] = pd.to_datetime(
        crns_df["date_time_utc"], dayfirst=True
    )
    crns_df.set_index(crns_df["date_time_utc"], inplace=True)
    crns_df.drop(["date_time_utc"], axis=1, inplace=True)
    crns_df = crns_df.replace("noData", math.nan)
    crns_df["epithermal_neutrons"] = pd.to_numeric(
        crns_df["epithermal_neutrons"]
    )
    crns_df["thermal_neutrons"] = pd.to_numeric(crns_df["thermal_neutrons"])
    crns_df["air_temperature"] = pd.to_numeric(crns_df["air_temperature"])
    crns_df["air_relative_humidity"] = pd.to_numeric(
        crns_df["air_relative_humidity"]
    )
    crns_df["precipitation"] = pd.to_numeric(crns_df["precipitation"])
    crns_df["air_pressure"] = pd.to_numeric(crns_df["air_pressure"])

    return crns_df


crns_df = import_crns_dataframe_and_format("CUC001.csv")
crns_df

Unnamed: 0_level_0,epithermal_neutrons,thermal_neutrons,air_temperature,air_relative_humidity,air_pressure,precipitation,AirTemperature_Sensor2,AirHumidity_Relative_Sensor2,AirPressure_Sensor2,Station_Id
date_time_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-06-23 13:00:00,427.0,683.0,33.9,33.6,1004.5,0.0,36.5,29.4,1004.6,Cunnersdorf_C_001
2016-06-23 14:00:00,753.0,388.0,33.8,33.7,1004.6,4.0,36.3,29.2,1004.3,Cunnersdorf_C_001
2016-06-23 15:00:00,745.0,362.0,33.2,37.7,1004.2,0.0,35.8,27.5,1003.9,Cunnersdorf_C_001
2016-06-23 16:00:00,758.0,383.0,32.3,45.0,1003.3,0.0,35.8,26.2,1003.4,Cunnersdorf_C_001
2016-06-23 17:00:00,802.0,392.0,30.8,53.3,1003.0,0.0,35.1,25.6,1003.1,Cunnersdorf_C_001
...,...,...,...,...,...,...,...,...,...,...
2021-07-21 09:00:00,710.0,391.0,20.1,69.5,1010.8,0.0,21,63.4,1007.3,Cunnersdorf_C_001
2021-07-21 10:00:00,674.0,398.0,20.0,69.1,1010.7,0.0,21.6,62,1007.4,Cunnersdorf_C_001
2021-07-21 11:00:00,736.0,362.0,21.1,63.2,1010.7,0.0,22.4,61.7,1007.3,Cunnersdorf_C_001
2021-07-21 12:00:00,744.0,359.0,20.9,64.5,1010.6,0.0,23,60.8,1007.2,Cunnersdorf_C_001


In [4]:
site_information = SiteInformation(
    latitude=90,
    longitude=90,
    elevation=0,
    reference_incoming_neutron_value=150,
    bulk_density=1.4,
    lattice_water=0.01,
    soil_organic_carbon=0,
)

site_information.add_custom_value("n0", 1000)
site_information.add_custom_value("biomass", 1)


In [5]:
data_hub = CRNSDataHub(
    crns_data_frame=crns_df, site_information=site_information
)
data_hub.validate_dataframe(schema="initial_check")

The `attach_nmdb_data()` method has defaults. Running the below code, without selecting values, will work too. 


```python
data_hub.attach_nmdb_data()
```



In [6]:
data_hub.attach_nmdb_data(
    station = "JUNG",
    new_column_name = "incoming_neutron_intensity",
    resolution = "60",
    nmdb_table = "revori"
    )
data_hub.crns_data_frame

Unnamed: 0_level_0,epithermal_neutrons,thermal_neutrons,air_temperature,air_relative_humidity,air_pressure,precipitation,AirTemperature_Sensor2,AirHumidity_Relative_Sensor2,AirPressure_Sensor2,Station_Id,incoming_neutron_intensity
date_time_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-06-23 13:00:00,427.0,683.0,33.9,33.6,1004.5,0.0,36.5,29.4,1004.6,Cunnersdorf_C_001,158.404
2016-06-23 14:00:00,753.0,388.0,33.8,33.7,1004.6,4.0,36.3,29.2,1004.3,Cunnersdorf_C_001,158.879
2016-06-23 15:00:00,745.0,362.0,33.2,37.7,1004.2,0.0,35.8,27.5,1003.9,Cunnersdorf_C_001,159.019
2016-06-23 16:00:00,758.0,383.0,32.3,45.0,1003.3,0.0,35.8,26.2,1003.4,Cunnersdorf_C_001,158.953
2016-06-23 17:00:00,802.0,392.0,30.8,53.3,1003.0,0.0,35.1,25.6,1003.1,Cunnersdorf_C_001,159.195
...,...,...,...,...,...,...,...,...,...,...,...
2021-07-21 09:00:00,710.0,391.0,20.1,69.5,1010.8,0.0,21,63.4,1007.3,Cunnersdorf_C_001,164.813
2021-07-21 10:00:00,674.0,398.0,20.0,69.1,1010.7,0.0,21.6,62,1007.4,Cunnersdorf_C_001,164.641
2021-07-21 11:00:00,736.0,362.0,21.1,63.2,1010.7,0.0,22.4,61.7,1007.3,Cunnersdorf_C_001,164.258
2021-07-21 12:00:00,744.0,359.0,20.9,64.5,1010.6,0.0,23,60.8,1007.2,Cunnersdorf_C_001,164.250


In [7]:
qa_flags = QualityAssessmentFlagBuilder()
qa_flags.add_check(
    FlagRangeCheck("air_relative_humidity", min_val=0, max_val=100),
    FlagRangeCheck("precipitation", min_val=0, max_val=20),
    FlagSpikeDetectionUniLOF("epithermal_neutrons"),
    # ...
)

data_hub.apply_quality_flags(custom_flags=qa_flags)
data_hub.flags_data_frame


Unnamed: 0_level_0,epithermal_neutrons,thermal_neutrons,air_temperature,air_relative_humidity,air_pressure,precipitation,AirTemperature_Sensor2,AirHumidity_Relative_Sensor2,AirPressure_Sensor2,Station_Id,incoming_neutron_intensity
date_time_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-06-23 13:00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2016-06-23 14:00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2016-06-23 15:00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2016-06-23 16:00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2016-06-23 17:00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
...,...,...,...,...,...,...,...,...,...,...,...
2021-07-21 09:00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2021-07-21 10:00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2021-07-21 11:00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2021-07-21 12:00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED


In [8]:
data_hub.select_correction(
    correction_type=CorrectionType.INCOMING_INTENSITY,
    correction_theory=CorrectionTheory.ZREDA_2012,
)

data_hub.correct_neutrons()
data_hub.crns_data_frame

Unnamed: 0_level_0,epithermal_neutrons,thermal_neutrons,air_temperature,air_relative_humidity,air_pressure,precipitation,AirTemperature_Sensor2,AirHumidity_Relative_Sensor2,AirPressure_Sensor2,Station_Id,incoming_neutron_intensity,correction_for_intensity,corrected_epithermal_neutron_count
date_time_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2016-06-23 13:00:00,427.0,683.0,33.9,33.6,1004.5,0.0,36.5,29.4,1004.6,Cunnersdorf_C_001,158.404,0.946946,404.345850
2016-06-23 14:00:00,753.0,388.0,33.8,33.7,1004.6,4.0,36.3,29.2,1004.3,Cunnersdorf_C_001,158.879,0.944115,710.918372
2016-06-23 15:00:00,745.0,362.0,33.2,37.7,1004.2,0.0,35.8,27.5,1003.9,Cunnersdorf_C_001,159.019,0.943284,702.746213
2016-06-23 16:00:00,758.0,383.0,32.3,45.0,1003.3,0.0,35.8,26.2,1003.4,Cunnersdorf_C_001,158.953,0.943675,715.305782
2016-06-23 17:00:00,802.0,392.0,30.8,53.3,1003.0,0.0,35.1,25.6,1003.1,Cunnersdorf_C_001,159.195,0.942241,755.677000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-07-21 09:00:00,710.0,391.0,20.1,69.5,1010.8,0.0,21,63.4,1007.3,Cunnersdorf_C_001,164.813,0.910122,646.186891
2021-07-21 10:00:00,674.0,398.0,20.0,69.1,1010.7,0.0,21.6,62,1007.4,Cunnersdorf_C_001,164.641,0.911073,614.063326
2021-07-21 11:00:00,736.0,362.0,21.1,63.2,1010.7,0.0,22.4,61.7,1007.3,Cunnersdorf_C_001,164.258,0.913198,672.113383
2021-07-21 12:00:00,744.0,359.0,20.9,64.5,1010.6,0.0,23,60.8,1007.2,Cunnersdorf_C_001,164.250,0.913242,679.452055


In [9]:
class NewIdeaForBiomass(Correction):
    """
    My new idea to correct for biomass with humidity
    """

    def __init__(self, 
                site_information,
                correction_type = CorrectionType.CUSTOM, 
                correction_factor_column_name: str = "new_biomass_correction",
                ):
        super().__init__(correction_type=correction_type, 
                         correction_factor_column_name=correction_factor_column_name)
        self.site_information = site_information
        self.humidity_column_name = "air_relative_humidity"

    @staticmethod
    def new_func(biomass, humidity):
        return 1-((biomass / humidity) / 1000)

    def apply(self, data_frame: pd.DataFrame):

        data_frame[self.correction_factor_column_name] = data_frame.apply(
            lambda row: self.new_func(
                row[self.humidity_column_name],
                self.site_information.biomass,
            ),
            axis=1,
        )
        return data_frame

In [10]:
data_hub.correction_factory.register_custom_correction(
    correction_type=CorrectionType.ABOVE_GROUND_BIOMASS,
    theory="my_new_idea",
    correction_class=NewIdeaForBiomass,
)

data_hub.select_correction(
    correction_type=CorrectionType.ABOVE_GROUND_BIOMASS,
    correction_theory="my_new_idea",
)
data_hub.correct_neutrons()
data_hub.crns_data_frame

Unnamed: 0_level_0,epithermal_neutrons,thermal_neutrons,air_temperature,air_relative_humidity,air_pressure,precipitation,AirTemperature_Sensor2,AirHumidity_Relative_Sensor2,AirPressure_Sensor2,Station_Id,incoming_neutron_intensity,correction_for_intensity,corrected_epithermal_neutron_count,new_biomass_correction
date_time_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2016-06-23 13:00:00,427.0,683.0,33.9,33.6,1004.5,0.0,36.5,29.4,1004.6,Cunnersdorf_C_001,158.404,0.946946,390.759829,0.9664
2016-06-23 14:00:00,753.0,388.0,33.8,33.7,1004.6,4.0,36.3,29.2,1004.3,Cunnersdorf_C_001,158.879,0.944115,686.960423,0.9663
2016-06-23 15:00:00,745.0,362.0,33.2,37.7,1004.2,0.0,35.8,27.5,1003.9,Cunnersdorf_C_001,159.019,0.943284,676.252680,0.9623
2016-06-23 16:00:00,758.0,383.0,32.3,45.0,1003.3,0.0,35.8,26.2,1003.4,Cunnersdorf_C_001,158.953,0.943675,683.117022,0.9550
2016-06-23 17:00:00,802.0,392.0,30.8,53.3,1003.0,0.0,35.1,25.6,1003.1,Cunnersdorf_C_001,159.195,0.942241,715.399416,0.9467
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-07-21 09:00:00,710.0,391.0,20.1,69.5,1010.8,0.0,21,63.4,1007.3,Cunnersdorf_C_001,164.813,0.910122,601.276902,0.9305
2021-07-21 10:00:00,674.0,398.0,20.0,69.1,1010.7,0.0,21.6,62,1007.4,Cunnersdorf_C_001,164.641,0.911073,571.631550,0.9309
2021-07-21 11:00:00,736.0,362.0,21.1,63.2,1010.7,0.0,22.4,61.7,1007.3,Cunnersdorf_C_001,164.258,0.913198,629.635817,0.9368
2021-07-21 12:00:00,744.0,359.0,20.9,64.5,1010.6,0.0,23,60.8,1007.2,Cunnersdorf_C_001,164.250,0.913242,635.627397,0.9355


In [11]:
data_hub.produce_soil_moisture_estimates()
data_hub.crns_data_frame

Unnamed: 0_level_0,epithermal_neutrons,thermal_neutrons,air_temperature,air_relative_humidity,air_pressure,precipitation,AirTemperature_Sensor2,AirHumidity_Relative_Sensor2,AirPressure_Sensor2,Station_Id,incoming_neutron_intensity,correction_for_intensity,corrected_epithermal_neutron_count,new_biomass_correction,epithermal_neutrons_smoothed,soil_moisture_crns,crns_measurement_depth
date_time_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2016-06-23 13:00:00,427.0,683.0,33.9,33.6,1004.5,0.0,36.5,29.4,1004.6,Cunnersdorf_C_001,158.404,0.946946,390.759829,0.9664,,,
2016-06-23 14:00:00,753.0,388.0,33.8,33.7,1004.6,4.0,36.3,29.2,1004.3,Cunnersdorf_C_001,158.879,0.944115,686.960423,0.9663,,,
2016-06-23 15:00:00,745.0,362.0,33.2,37.7,1004.2,0.0,35.8,27.5,1003.9,Cunnersdorf_C_001,159.019,0.943284,676.252680,0.9623,,,
2016-06-23 16:00:00,758.0,383.0,32.3,45.0,1003.3,0.0,35.8,26.2,1003.4,Cunnersdorf_C_001,158.953,0.943675,683.117022,0.9550,,,
2016-06-23 17:00:00,802.0,392.0,30.8,53.3,1003.0,0.0,35.1,25.6,1003.1,Cunnersdorf_C_001,159.195,0.942241,715.399416,0.9467,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-07-21 09:00:00,710.0,391.0,20.1,69.5,1010.8,0.0,21,63.4,1007.3,Cunnersdorf_C_001,164.813,0.910122,601.276902,0.9305,603.298134,0.328066,14.717004
2021-07-21 10:00:00,674.0,398.0,20.0,69.1,1010.7,0.0,21.6,62,1007.4,Cunnersdorf_C_001,164.641,0.911073,571.631550,0.9309,600.603073,0.333831,14.585181
2021-07-21 11:00:00,736.0,362.0,21.1,63.2,1010.7,0.0,22.4,61.7,1007.3,Cunnersdorf_C_001,164.258,0.913198,629.635817,0.9368,602.670882,0.329396,14.686236
2021-07-21 12:00:00,744.0,359.0,20.9,64.5,1010.6,0.0,23,60.8,1007.2,Cunnersdorf_C_001,164.250,0.913242,635.627397,0.9355,607.314342,0.319719,14.915274


In [None]:
# data_hub.save_and_archive_data(Path/path) TODO

In [12]:
DataAuditLog.archive_and_delete_log(site_name="TestQA")