In [1]:
from pathlib import Path
import math
import pandas as pd
from neptoon.quality_assesment.quality_assesment import (
    QualityAssessmentFlagBuilder,
    FlagRangeCheck,
    FlagSpikeDetectionUniLOF,
)

from neptoon.data_management.crns_data_hub import CRNSDataHub

from neptoon.data_management.column_information import ColumnInfo
from neptoon.data_management.site_information import SiteInformation

from neptoon.neutron_correction.neutron_correction import (
    CorrectionType,
    CorrectionTheory,
)
from neptoon.neutron_correction.correction_classes import Correction

from neptoon.data_management.data_audit import (
    DataAuditLog,
)
from neptoon.data_ingest_and_formatting.data_ingest import (
    CollectAndParseRawData, 
    ParseFilesIntoDataFrame,
    FormatDataForCRNSDataHub,
)

In [2]:
DataAuditLog.create()

<neptoon.data_management.data_audit.DataAuditLog at 0x107ce1ed0>

In [8]:
abspath = "" #/Users/power/Documents/code/neptoon/
data_creator = CollectAndParseRawData(
    path_to_yaml= abspath + "configuration_files/A101_station.yaml"
)
crns_df = data_creator.create_data_frame()

In [9]:
site_information = SiteInformation(
    site_name="x_some_site",
    latitude=51.37,
    longitude=12.55,
    elevation=140,
    reference_incoming_neutron_value=150,
    dry_soil_bulk_density=1.4,
    lattice_water=0.01,
    soil_organic_carbon=0,
    n0=700,
    cutoff_rigidity=2.94,
    site_biomass = 1
)

# site_information.add_custom_value("n0", 1500)
site_information.add_custom_value("biomass", 1)


In [10]:
data_hub = CRNSDataHub(
    crns_data_frame=crns_df, site_information=site_information
)
data_hub.validate_dataframe(schema="initial_check")

The `attach_nmdb_data()` method has defaults. Running the below code, without selecting values, will work too. 


```python
data_hub.attach_nmdb_data()
```



In [11]:
data_hub.attach_nmdb_data(
    station = "JUNG",
    new_column_name = "incoming_neutron_intensity",
    resolution = "60",
    nmdb_table = "revori"
    )
data_hub.crns_data_frame

Unnamed: 0_level_0,RecordNum,P1_mb,P3_mb,air_pressure,air_temperature,T2_C,T3_C,T4_C,T_CS215,air_relative_humidity,...,N1ET_sec,N2ET_sec,N1T_C,N1RH,N2T_C,N2RH,D1,Unnamed: 19_level_0,epithermal_neutrons_cph,incoming_neutron_intensity
Date Time(UTC),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-12-31 22:33:00+00:00,18374,1009.7,1008.70,1009.02,0.3,128.9,0.0,254.9,-1.2,83.2,...,900,900,2.3,68.4,2.3,68.2,0,,388.0,163.175
2016-12-31 22:48:00+00:00,18375,1009.5,1008.50,1008.88,0.2,128.9,-0.1,254.9,-1.2,83.4,...,900,900,2.3,68.5,2.3,68.3,0,,432.0,163.175
2016-12-31 23:03:00+00:00,18376,1009.4,1008.30,1008.73,0.2,128.9,-0.2,254.8,-1.4,83.4,...,900,900,2.3,68.5,2.2,68.3,0,,340.0,163.175
2016-12-31 23:18:00+00:00,18377,1009.2,1008.11,1008.51,0.1,128.9,-0.3,254.7,-1.3,83.3,...,900,900,2.2,68.5,2.2,68.2,0,,352.0,163.175
2016-12-31 23:33:00+00:00,18378,1009.0,1007.88,1008.33,0.0,128.9,-0.4,254.6,-1.1,83.6,...,900,900,2.1,68.6,2.1,68.4,0,,368.0,163.993
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-01-12 16:48:00+00:00,13464,1012.4,1011.33,1013.28,4.7,128.9,4.5,3.6,2.3,83.8,...,900,900,7.1,69.8,7.0,70.1,0,,420.0,164.084
2018-01-12 17:03:00+00:00,13465,1012.4,1011.41,1013.29,4.7,128.9,4.5,3.6,2.3,83.9,...,900,900,7.1,69.8,7.0,70.1,0,,332.0,164.084
2018-01-12 17:18:00+00:00,13466,1012.6,1011.48,1013.43,4.6,128.9,4.5,3.6,2.3,83.9,...,900,900,7.1,69.8,7.0,70.1,0,,400.0,164.084
2018-01-12 17:33:00+00:00,13467,1012.6,1011.47,1013.45,4.6,128.9,4.5,3.6,2.3,83.9,...,900,900,7.1,69.8,7.0,70.1,0,,332.0,164.779


In [12]:
data_hub.prepare_static_values()
data_hub.crns_data_frame

Unnamed: 0_level_0,RecordNum,P1_mb,P3_mb,air_pressure,air_temperature,T2_C,T3_C,T4_C,T_CS215,air_relative_humidity,...,latitude,longitude,elevation,reference_incoming_neutron_value,dry_soil_bulk_density,lattice_water,soil_organic_carbon,cutoff_rigidity,site_biomass,n0
Date Time(UTC),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-12-31 22:33:00+00:00,18374,1009.7,1008.70,1009.02,0.3,128.9,0.0,254.9,-1.2,83.2,...,51.37,12.55,140,150,1.4,0.01,0,2.94,1,700
2016-12-31 22:48:00+00:00,18375,1009.5,1008.50,1008.88,0.2,128.9,-0.1,254.9,-1.2,83.4,...,51.37,12.55,140,150,1.4,0.01,0,2.94,1,700
2016-12-31 23:03:00+00:00,18376,1009.4,1008.30,1008.73,0.2,128.9,-0.2,254.8,-1.4,83.4,...,51.37,12.55,140,150,1.4,0.01,0,2.94,1,700
2016-12-31 23:18:00+00:00,18377,1009.2,1008.11,1008.51,0.1,128.9,-0.3,254.7,-1.3,83.3,...,51.37,12.55,140,150,1.4,0.01,0,2.94,1,700
2016-12-31 23:33:00+00:00,18378,1009.0,1007.88,1008.33,0.0,128.9,-0.4,254.6,-1.1,83.6,...,51.37,12.55,140,150,1.4,0.01,0,2.94,1,700
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-01-12 16:48:00+00:00,13464,1012.4,1011.33,1013.28,4.7,128.9,4.5,3.6,2.3,83.8,...,51.37,12.55,140,150,1.4,0.01,0,2.94,1,700
2018-01-12 17:03:00+00:00,13465,1012.4,1011.41,1013.29,4.7,128.9,4.5,3.6,2.3,83.9,...,51.37,12.55,140,150,1.4,0.01,0,2.94,1,700
2018-01-12 17:18:00+00:00,13466,1012.6,1011.48,1013.43,4.6,128.9,4.5,3.6,2.3,83.9,...,51.37,12.55,140,150,1.4,0.01,0,2.94,1,700
2018-01-12 17:33:00+00:00,13467,1012.6,1011.47,1013.45,4.6,128.9,4.5,3.6,2.3,83.9,...,51.37,12.55,140,150,1.4,0.01,0,2.94,1,700


In [13]:
qa_flags = QualityAssessmentFlagBuilder()
qa_flags.add_check(
    FlagRangeCheck("air_relative_humidity", min_val=0, max_val=100),
    # FlagRangeCheck("precipitation", min_val=0, max_val=20),
    FlagSpikeDetectionUniLOF("epithermal_neutrons_cph"),
    # ...
)

data_hub.add_quality_flags(custom_flags=qa_flags)
data_hub.apply_quality_flags()
data_hub.flags_data_frame


Unnamed: 0_level_0,RecordNum,P1_mb,P3_mb,air_pressure,air_temperature,T2_C,T3_C,T4_C,T_CS215,air_relative_humidity,...,latitude,longitude,elevation,reference_incoming_neutron_value,dry_soil_bulk_density,lattice_water,soil_organic_carbon,cutoff_rigidity,site_biomass,n0
Date Time(UTC),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-12-31 22:33:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,...,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2016-12-31 22:48:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,...,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2016-12-31 23:03:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,...,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2016-12-31 23:18:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,...,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2016-12-31 23:33:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,...,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-01-12 16:48:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,...,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2018-01-12 17:03:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,...,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2018-01-12 17:18:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,...,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2018-01-12 17:33:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,...,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED


In [14]:
data_hub.select_correction(
    correction_type=CorrectionType.INCOMING_INTENSITY,
    correction_theory=CorrectionTheory.ZREDA_2012,
)
data_hub.select_correction(
    correction_type=CorrectionType.HUMIDITY,
    correction_theory=CorrectionTheory.ROSOLEM_2013
)

data_hub.select_correction(
    correction_type=CorrectionType.PRESSURE,
)

# data_hub.select_correction(
#     correction_type=CorrectionType.ABOVE_GROUND_BIOMASS
# )

data_hub.correct_neutrons()
data_hub.crns_data_frame

Unnamed: 0_level_0,RecordNum,P1_mb,P3_mb,air_pressure,air_temperature,T2_C,T3_C,T4_C,T_CS215,air_relative_humidity,...,actual_vapour_pressure,absolute_humidity,humidity_correction,mean_pressure,beta_coefficient,atmospheric_pressure_correction,corrected_epithermal_neutrons,corrected_epithermal_neutrons_uncertainty,corrected_epithermal_neutrons_upper_count,corrected_epithermal_neutrons_lower_count
Date Time(UTC),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-12-31 22:33:00+00:00,18374,1009.7,1008.70,1009.02,0.3,128.9,0.0,254.9,-1.2,83.2,...,5.196963,4.118130,1.022238,996.544228,0.007328,1.095732,399.508088,40.563900,440.071988,358.944187
2016-12-31 22:48:00+00:00,18375,1009.5,1008.50,1008.88,0.2,128.9,-0.1,254.9,-1.2,83.4,...,5.171866,4.099742,1.022139,996.544228,0.007328,1.094608,444.313858,42.754121,487.067979,401.559737
2016-12-31 23:03:00+00:00,18376,1009.4,1008.30,1008.73,0.2,128.9,-0.2,254.8,-1.4,83.4,...,5.171866,4.099742,1.022139,996.544228,0.007328,1.093406,349.307293,37.887696,387.194989,311.419598
2016-12-31 23:18:00+00:00,18377,1009.2,1008.11,1008.51,0.1,128.9,-0.3,254.7,-1.3,83.3,...,5.128361,4.066743,1.021960,996.544228,0.007328,1.091644,360.990297,38.481695,399.471992,322.508602
2016-12-31 23:33:00+00:00,18378,1009.0,1007.88,1008.33,0.0,128.9,-0.4,254.6,-1.1,83.6,...,5.109632,4.053374,1.021888,996.544228,0.007328,1.090205,374.994989,39.095930,414.090919,335.899059
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-01-12 16:48:00+00:00,13464,1012.4,1011.33,1013.28,4.7,128.9,4.5,3.6,2.3,83.8,...,7.157223,5.581648,1.030141,996.544228,0.007328,1.130477,447.128815,43.635304,490.764119,403.493511
2018-01-12 17:03:00+00:00,13465,1012.4,1011.41,1013.29,4.7,128.9,4.5,3.6,2.3,83.9,...,7.165764,5.588309,1.030177,996.544228,0.007328,1.130560,353.482925,38.799792,392.282717,314.683133
2018-01-12 17:18:00+00:00,13466,1012.6,1011.48,1013.43,4.6,128.9,4.5,3.6,2.3,83.9,...,7.115869,5.551396,1.029978,996.544228,0.007328,1.131720,426.237699,42.623770,468.861469,383.613929
2018-01-12 17:33:00+00:00,13467,1012.6,1011.47,1013.45,4.6,128.9,4.5,3.6,2.3,83.9,...,7.115869,5.551396,1.029978,996.544228,0.007328,1.131886,352.336773,38.673985,391.010759,313.662788


In [None]:
class NewIdeaForBiomass(Correction):
    """
    My new idea to correct for biomass with humidity
    """

    def __init__(self, 
                site_information,
                correction_type = CorrectionType.CUSTOM,
                 
                correction_factor_column_name: str = "new_biomass_correction",
                ):
        super().__init__(correction_type=correction_type, 
                         correction_factor_column_name=correction_factor_column_name)
        self.site_information = site_information
        self.humidity_column_name = "air_relative_humidity"

    @staticmethod
    def new_func(biomass, humidity):
        if biomass == 0:
            return 1
        return 1-((biomass / humidity) / 1000)

    def apply(self, data_frame: pd.DataFrame):

        data_frame[self.correction_factor_column_name] = data_frame.apply(
            lambda row: self.new_func(
                row[self.humidity_column_name],
                self.site_information.site_biomass, # Here we use a value from site_information
            ),
            axis=1,
        )
        return data_frame

In [None]:
data_hub.correction_factory.register_custom_correction(
    correction_type=CorrectionType.CUSTOM,
    theory="my_new_idea",
    correction_class=NewIdeaForBiomass,
)

data_hub.select_correction(
    correction_type=CorrectionType.CUSTOM,
    correction_theory="my_new_idea",
)

data_hub.prepare_static_values()
data_hub.correct_neutrons()
data_hub.crns_data_frame

In [15]:
data_hub.produce_soil_moisture_estimates()
data_hub.crns_data_frame

Unnamed: 0_level_0,RecordNum,P1_mb,P3_mb,air_pressure,air_temperature,T2_C,T3_C,T4_C,T_CS215,air_relative_humidity,...,beta_coefficient,atmospheric_pressure_correction,corrected_epithermal_neutrons,corrected_epithermal_neutrons_uncertainty,corrected_epithermal_neutrons_upper_count,corrected_epithermal_neutrons_lower_count,soil_moisture,soil_moisture_uncertainty_upper,soil_moisture_uncertainty_lower,crns_measurement_depth
Date Time(UTC),Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2016-12-31 22:33:00+00:00,18374,1009.7,1008.70,1009.02,0.3,128.9,0.0,254.9,-1.2,83.2,...,0.007328,1.095732,399.508088,40.563900,440.071988,358.944187,0.394226,0.628538,0.265714,13.413346
2016-12-31 22:48:00+00:00,18375,1009.5,1008.50,1008.88,0.2,128.9,-0.1,254.9,-1.2,83.4,...,0.007328,1.094608,444.313858,42.754121,487.067979,401.559737,0.255549,0.385953,0.174339,16.809846
2016-12-31 23:03:00+00:00,18376,1009.4,1008.30,1008.73,0.2,128.9,-0.2,254.8,-1.4,83.4,...,0.007328,1.093406,349.307293,37.887696,387.194989,311.419598,0.715636,1.377031,0.449504,10.316070
2016-12-31 23:18:00+00:00,18377,1009.2,1008.11,1008.51,0.1,128.9,-0.3,254.7,-1.3,83.3,...,0.007328,1.091644,360.990297,38.481695,399.471992,322.508602,0.612193,1.099928,0.394374,10.981227
2016-12-31 23:33:00+00:00,18378,1009.0,1007.88,1008.33,0.0,128.9,-0.4,254.6,-1.1,83.6,...,0.007328,1.090205,374.994989,39.095930,414.090919,335.899059,0.515990,0.873808,0.340216,11.820811
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-01-12 16:48:00+00:00,13464,1012.4,1011.33,1013.28,4.7,128.9,4.5,3.6,2.3,83.8,...,0.007328,1.130477,447.128815,43.635304,490.764119,403.493511,0.249059,0.378372,0.168734,17.047856
2018-01-12 17:03:00+00:00,13465,1012.4,1011.41,1013.29,4.7,128.9,4.5,3.6,2.3,83.9,...,0.007328,1.130560,353.482925,38.799792,392.282717,314.683133,0.675682,1.283722,0.425412,10.550279
2018-01-12 17:18:00+00:00,13466,1012.6,1011.48,1013.43,4.6,128.9,4.5,3.6,2.3,83.9,...,0.007328,1.131720,426.237699,42.623770,468.861469,383.613929,0.302479,0.467655,0.204850,15.355110
2018-01-12 17:33:00+00:00,13467,1012.6,1011.47,1013.45,4.6,128.9,4.5,3.6,2.3,83.9,...,0.007328,1.131886,352.336773,38.673985,391.010759,313.662788,0.686288,1.311666,0.431259,10.485608


In [11]:
df = data_hub.crns_data_frame

data_hub.save_data(append_yaml_hash_to_folder_name=True)

In [16]:
DataAuditLog.archive_and_delete_log(site_name='test')