In [1]:
from pathlib import Path

import pandas as pd
from neptoon.quality_control import (
    QualityAssessmentFlagBuilder,
    QualityCheck,
    QATarget,
    QAMethod,
)
from neptoon.corrections import (
    CorrectionType,
    CorrectionTheory,
)
from neptoon.calibration import CalibrationConfiguration
from neptoon.data_audit import (
    DataAuditLog,
)
from neptoon.io.read import (
    ConfigurationManager
)
from neptoon.workflow import ProcessWithYaml

In [2]:
DataAuditLog.create()

<neptoon.data_audit.data_audit.DataAuditLog at 0x105bbd0c0>

In [3]:
config = ConfigurationManager()
station_config_path = (
    Path.cwd() / "configuration_files" / "FSC001_station.yaml"
)
processing_config_path = (
    Path.cwd() / "configuration_files" / "v1_processing_method.yaml"
)

config.load_and_validate_configuration(
    name="station",
    file_path=station_config_path,
)
config.load_and_validate_configuration(
    name="processing",
    file_path=processing_config_path,
)

DataAuditLog.create()
yaml_processor = ProcessWithYaml(configuration_object=config)

## OPTION 1:
data_hub = yaml_processor.create_data_hub()

In [4]:
calib_df = pd.read_csv(
    Path.cwd() / "example_data" / "FSCD001_calibration.csv"
)
data_hub.calibration_samples_data = calib_df

The `attach_nmdb_data()` method has defaults. Running the below code, without selecting values, will work too. 


```python
data_hub.attach_nmdb_data()
```



In [5]:
data_hub.attach_nmdb_data(
    station = "JUNG",
    new_column_name = "incoming_neutron_intensity",
    resolution = "60",
    nmdb_table = "revori"
    )
data_hub.crns_data_frame

Unnamed: 0_level_0,epithermal_neutrons_raw,NeutronCount_Slow_Cum1h,AirTemperature,AirHumidity_Relative,air_pressure,Precipitation,air_temperature,air_relative_humidity,epithermal_neutrons_cph,incoming_neutron_intensity
DateTime_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-09 11:00:00+00:00,655.0,,,,996.2,,8.9,93.9,655.0,155.777
2014-01-09 12:00:00+00:00,715.0,431.0,,,994.3,,10.1,84.4,715.0,155.713
2014-01-09 13:00:00+00:00,741.0,473.0,,,993.6,,10.6,80.1,741.0,156.260
2014-01-09 14:00:00+00:00,713.0,488.0,,,992.6,,11.1,77.1,713.0,156.291
2014-01-09 15:00:00+00:00,734.0,506.0,,,991.3,,11.6,75.5,734.0,155.724
...,...,...,...,...,...,...,...,...,...,...
2020-06-09 19:00:00+00:00,1034.0,521.0,,,1010.0,,25.6,41.3,1034.0,168.061
2020-06-09 20:00:00+00:00,952.0,524.0,,,1010.2,,23.4,40.6,952.0,168.053
2020-06-09 21:00:00+00:00,924.0,473.0,,,1010.4,,21.8,40.6,924.0,167.782
2020-06-09 22:00:00+00:00,956.0,555.0,,,1010.4,,20.5,40.7,956.0,168.014


In [6]:
data_hub.prepare_static_values()
data_hub.crns_data_frame

Unnamed: 0_level_0,epithermal_neutrons_raw,NeutronCount_Slow_Cum1h,AirTemperature,AirHumidity_Relative,air_pressure,Precipitation,air_temperature,air_relative_humidity,epithermal_neutrons_cph,incoming_neutron_intensity,site_name,latitude,longitude,elevation,reference_incoming_neutron_value,site_cutoff_rigidity,n0
DateTime_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2014-01-09 11:00:00+00:00,655.0,,,,996.2,,8.9,93.9,655.0,155.777,Fuerstensee,53.319,13.122,66,152,2.55,1111
2014-01-09 12:00:00+00:00,715.0,431.0,,,994.3,,10.1,84.4,715.0,155.713,Fuerstensee,53.319,13.122,66,152,2.55,1111
2014-01-09 13:00:00+00:00,741.0,473.0,,,993.6,,10.6,80.1,741.0,156.260,Fuerstensee,53.319,13.122,66,152,2.55,1111
2014-01-09 14:00:00+00:00,713.0,488.0,,,992.6,,11.1,77.1,713.0,156.291,Fuerstensee,53.319,13.122,66,152,2.55,1111
2014-01-09 15:00:00+00:00,734.0,506.0,,,991.3,,11.6,75.5,734.0,155.724,Fuerstensee,53.319,13.122,66,152,2.55,1111
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-06-09 19:00:00+00:00,1034.0,521.0,,,1010.0,,25.6,41.3,1034.0,168.061,Fuerstensee,53.319,13.122,66,152,2.55,1111
2020-06-09 20:00:00+00:00,952.0,524.0,,,1010.2,,23.4,40.6,952.0,168.053,Fuerstensee,53.319,13.122,66,152,2.55,1111
2020-06-09 21:00:00+00:00,924.0,473.0,,,1010.4,,21.8,40.6,924.0,167.782,Fuerstensee,53.319,13.122,66,152,2.55,1111
2020-06-09 22:00:00+00:00,956.0,555.0,,,1010.4,,20.5,40.7,956.0,168.014,Fuerstensee,53.319,13.122,66,152,2.55,1111


You can use your IDE (e.g., in VSCode) to check which methods are available using the 

In [7]:
from neptoon.quality_control import WhatParamsDoINeed

WhatParamsDoINeed(QAMethod.SPIKE_UNILOF)


Required parameters for QAMethod.SPIKE_UNILOF:
--------------------------------------------------

Optional parameters for QAMethod.SPIKE_UNILOF:
--------------------------------------------------
algorithm - [Literal] (default: ball_tree):
    Algorithm used for calculating the n-nearest neighbors needed for LOF calculation.
    ['ball_tree', 'kd_tree', 'brute', 'auto']
periods_in_calculation - [time steps] (default: 20):
    Number of periods to be included into the LOF calculation
threshold - [decimal] (default: 1.5):
    Threshold for flagging

Further information about QAMethod.SPIKE_UNILOF:
--------------------------------------------------
https://rdm-software.pages.ufz.de/saqc/_api/saqc.SaQC.html#saqc.SaQC.flagUniLOF


<neptoon.quality_control.saqc_methods_and_params.WhatParamsDoINeed at 0x174fa4d90>

The parameters are optional and uses defaults, I'll stick with these.

In [8]:
qa_flags = QualityAssessmentFlagBuilder()
qa_flags.add_check(
    
    QualityCheck(
        target=QATarget.RELATIVE_HUMIDITY,
        method=QAMethod.RANGE_CHECK,
        parameters={
            "min":0,
            "max":100
                }),

    QualityCheck(
        target=QATarget.RAW_EPI_NEUTRONS,
        method=QAMethod.SPIKE_UNILOF,
    )
)

data_hub.add_quality_flags(custom_flags=qa_flags)
data_hub.apply_quality_flags()
data_hub.flags_data_frame


Unnamed: 0_level_0,epithermal_neutrons_raw,NeutronCount_Slow_Cum1h,AirTemperature,AirHumidity_Relative,air_pressure,Precipitation,air_temperature,air_relative_humidity,epithermal_neutrons_cph,incoming_neutron_intensity,site_name,latitude,longitude,elevation,reference_incoming_neutron_value,site_cutoff_rigidity,n0
DateTime_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2014-01-09 11:00:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2014-01-09 12:00:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2014-01-09 13:00:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2014-01-09 14:00:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2014-01-09 15:00:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-06-09 19:00:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2020-06-09 20:00:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2020-06-09 21:00:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED
2020-06-09 22:00:00+00:00,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED,UNFLAGGED


In [9]:
data_hub.select_correction(
    correction_type=CorrectionType.INCOMING_INTENSITY,
    correction_theory=CorrectionTheory.ZREDA_2012,
)
data_hub.select_correction(
    correction_type=CorrectionType.HUMIDITY,
    correction_theory=CorrectionTheory.ROSOLEM_2013
)

data_hub.select_correction(
    correction_type=CorrectionType.PRESSURE,
)

# data_hub.select_correction(
#     correction_type=CorrectionType.ABOVE_GROUND_BIOMASS
# )

data_hub.correct_neutrons()
data_hub.crns_data_frame

Unnamed: 0_level_0,epithermal_neutrons_raw,NeutronCount_Slow_Cum1h,AirTemperature,AirHumidity_Relative,air_pressure,Precipitation,air_temperature,air_relative_humidity,epithermal_neutrons_cph,incoming_neutron_intensity,...,actual_vapour_pressure,absolute_humidity,humidity_correction,mean_pressure,beta_coefficient,atmospheric_pressure_correction,corrected_epithermal_neutrons,corrected_epithermal_neutrons_uncertainty,corrected_epithermal_neutrons_upper_count,corrected_epithermal_neutrons_lower_count
DateTime_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-09 11:00:00+00:00,655.0,,,,996.2,,8.9,93.9,655.0,155.777,...,10.701509,8.221427,1.044396,1005.3464,0.007315,0.935287,624.297662,24.393328,648.690990,599.904334
2014-01-09 12:00:00+00:00,715.0,431.0,,,994.3,,10.1,84.4,715.0,155.713,...,10.426863,7.976494,1.043073,1005.3464,0.007315,0.922379,671.504506,25.112845,696.617351,646.391662
2014-01-09 13:00:00+00:00,741.0,473.0,,,993.6,,10.6,80.1,741.0,156.260,...,10.231579,7.813311,1.042192,1005.3464,0.007315,0.917668,689.362168,25.324352,714.686520,664.037816
2014-01-09 14:00:00+00:00,713.0,488.0,,,992.6,,11.1,77.1,713.0,156.291,...,10.181377,7.761298,1.041911,1005.3464,0.007315,0.910981,658.171251,24.648707,682.819958,633.522544
2014-01-09 15:00:00+00:00,734.0,506.0,,,991.3,,11.6,75.5,734.0,155.724,...,10.305865,7.842401,1.042349,1005.3464,0.007315,0.902359,673.870933,24.873030,698.743963,648.997903
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-06-09 19:00:00+00:00,1034.0,521.0,,,1010.0,,25.6,41.3,1034.0,168.061,...,13.557331,9.833193,1.053099,1005.3464,0.007315,1.034625,1018.941729,31.687580,1050.629310,987.254149
2020-06-09 20:00:00+00:00,952.0,524.0,,,1010.2,,23.4,40.6,952.0,168.053,...,11.681880,8.535778,1.046093,1005.3464,0.007315,1.036139,933.303383,30.248536,963.551920,903.054847
2020-06-09 21:00:00+00:00,924.0,473.0,,,1010.4,,21.8,40.6,924.0,167.782,...,10.599595,7.786982,1.042050,1005.3464,0.007315,1.037656,905.132482,29.776673,934.909154,875.355809
2020-06-09 22:00:00+00:00,956.0,555.0,,,1010.4,,20.5,40.7,956.0,168.014,...,9.810117,7.238899,1.039090,1005.3464,0.007315,1.037656,932.529804,30.160169,962.689973,902.369635


We've pulled some calibration data that doesn't exactly follow the expected naming convention in neptoon. We can change this in the `CalibrationConfiguration` by updating the expected column names for key columns. 

In [10]:
calibration_config = CalibrationConfiguration(
    date_time_column_name='DateTime_utc',
    profile_id_column='Profile_ID',
    distance_column='Distance_to_CRNS_m',
    sample_depth_column='Profile_Depth_cm',
    soil_moisture_gravimetric_column='SoilMoisture_g_g',
    bulk_density_of_sample_column='DryBulkDensity_g_cm3',
    soil_organic_carbon_column='SoilOrganicCarbon_g_g',
    lattice_water_column='LatticeWater_g_g',
)
data_hub.calibrate_station(config=calibration_config)
data_hub.calibrator.return_calibration_results_data_frame()

N0 number is 1111.0


  pd.to_datetime(


Unnamed: 0,calibration_day,field_average_soil_moisture_volumetric,field_average_soil_moisture_gravimetric,horizontal_footprint_radius_in_meters,optimal_N0,absolute_error
0,2014-01-17 12:00:00+00:00,0.178084,0.17972,139.721328,1031.0,0.000307
1,2014-11-17 12:00:00+00:00,0.143864,0.16831,149.961242,1191.0,0.00016


In [11]:
data_hub.produce_soil_moisture_estimates()
data_hub.crns_data_frame

Unnamed: 0_level_0,epithermal_neutrons_raw,NeutronCount_Slow_Cum1h,AirTemperature,AirHumidity_Relative,air_pressure,Precipitation,air_temperature,air_relative_humidity,epithermal_neutrons_cph,incoming_neutron_intensity,...,beta_coefficient,atmospheric_pressure_correction,corrected_epithermal_neutrons,corrected_epithermal_neutrons_uncertainty,corrected_epithermal_neutrons_upper_count,corrected_epithermal_neutrons_lower_count,soil_moisture,soil_moisture_uncertainty_upper,soil_moisture_uncertainty_lower,crns_measurement_depth
DateTime_utc,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-01-09 11:00:00+00:00,655.0,,,,996.2,,8.9,93.9,655.0,155.777,...,0.007315,0.935287,624.297662,24.393328,648.690990,599.904334,0.440815,0.519783,0.378213,12.530317
2014-01-09 12:00:00+00:00,715.0,431.0,,,994.3,,10.1,84.4,715.0,155.713,...,0.007315,0.922379,671.504506,25.112845,696.617351,646.391662,0.330370,0.383555,0.286613,14.457307
2014-01-09 13:00:00+00:00,741.0,473.0,,,993.6,,10.6,80.1,741.0,156.260,...,0.007315,0.917668,689.362168,25.324352,714.686520,664.037816,0.298437,0.345070,0.259640,15.246858
2014-01-09 14:00:00+00:00,713.0,488.0,,,992.6,,11.1,77.1,713.0,156.291,...,0.007315,0.910981,658.171251,24.648707,682.819958,633.522544,0.357249,0.415510,0.309644,13.890389
2014-01-09 15:00:00+00:00,734.0,506.0,,,991.3,,11.6,75.5,734.0,155.724,...,0.007315,0.902359,673.870933,24.873030,698.743963,648.997903,0.325886,0.377508,0.283261,14.559905
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-06-09 19:00:00+00:00,1034.0,521.0,,,1010.0,,25.6,41.3,1034.0,168.061,...,0.007315,1.034625,1018.941729,31.687580,1050.629310,987.254149,0.047171,0.058791,0.036707,40.992857
2020-06-09 20:00:00+00:00,952.0,524.0,,,1010.2,,23.4,40.6,952.0,168.053,...,0.007315,1.036139,933.303383,30.248536,963.551920,903.054847,0.081833,0.096972,0.068357,31.273724
2020-06-09 21:00:00+00:00,924.0,473.0,,,1010.4,,21.8,40.6,924.0,167.782,...,0.007315,1.037656,905.132482,29.776673,934.909154,875.355809,0.095873,0.112575,0.081078,28.718448
2020-06-09 22:00:00+00:00,956.0,555.0,,,1010.4,,20.5,40.7,956.0,168.014,...,0.007315,1.037656,932.529804,30.160169,962.689973,902.369635,0.082198,0.097337,0.068721,31.199992


In [12]:
data_hub.save_data(append_yaml_hash_to_folder_name=True)

ERROR:global_logger:Error: too many values to unpack (expected 2) 
Could not close DataAuditLog, presumed not created
