In [None]:
from pathlib import Path
import math
import pandas as pd
from neptoon.quality_assesment.quality_assesment import (
    QualityAssessmentFlagBuilder,
    FlagRangeCheck,
    FlagSpikeDetectionUniLOF,
)

from neptoon.data_management.crns_data_hub import CRNSDataHub

from neptoon.data_management.column_information import ColumnInfo
from neptoon.data_management.site_information import SiteInformation

from neptoon.neutron_correction.neutron_correction import (
    CorrectionType,
    CorrectionTheory,
)
from neptoon.neutron_correction.correction_classes import Correction

from neptoon.data_management.data_audit import (
    DataAuditLog,
)
from neptoon.data_ingest_and_formatting.data_ingest import (
    CollectAndParseRawData, 
    ParseFilesIntoDataFrame,
    FormatDataForCRNSDataHub,
)

In [None]:
DataAuditLog.create()

In [None]:
# zip_file = (
#     Path().parent / "tests" / "data_ingest_and_formatting" / 
#     "mock_data" / "CRNS-station_data-Hydroinnova-A.zip"
# )

# file_manager = ManageFileCollection(data_location=zip_file)
# file_manager.get_list_of_files() 
# file_manager.filter_files() 

In [None]:
# file_parser = ParseFilesIntoDataFrame(file_manager)
# data = file_parser.make_dataframe()

In [None]:
# data_formatter = FormatDataForCRNSDataHub(data_frame=data)
# data

In [None]:
data_creator = CollectAndParseRawData(
    path_to_yaml="/Users/power/Documents/code/neptoon/configuration_files/input_data_format.yaml"
)
crns_df = data_creator.create_data_frame()

In [None]:
# def import_crns_dataframe_and_format(filename):
#     """
#     This is a pseudo function that will eventually be replaced by the
#     ingest routines. For now it converts a sample dataset into a format
#     (which we can update later).
#     """
#     cwd = Path.cwd()
#     crns_df_path = cwd / "tests" / "sample_crns_data" / filename
#     crns_df = pd.read_csv(crns_df_path)
#     crns_df["date_time_utc"] = pd.to_datetime(
#         crns_df["date_time_utc"], dayfirst=True
#     )
#     crns_df.set_index(crns_df["date_time_utc"], inplace=True)
#     crns_df.drop(["date_time_utc"], axis=1, inplace=True)
#     crns_df = crns_df.replace("noData", math.nan)
#     crns_df["epithermal_neutrons"] = pd.to_numeric(
#         crns_df["epithermal_neutrons"]
#     )
#     crns_df["thermal_neutrons"] = pd.to_numeric(crns_df["thermal_neutrons"])
#     crns_df["air_temperature"] = pd.to_numeric(crns_df["air_temperature"])
#     crns_df["air_relative_humidity"] = pd.to_numeric(
#         crns_df["air_relative_humidity"]
#     )
#     crns_df["precipitation"] = pd.to_numeric(crns_df["precipitation"])
#     crns_df["air_pressure"] = pd.to_numeric(crns_df["air_pressure"])

#     return crns_df


# crns_df = import_crns_dataframe_and_format("CUC001.csv")
# crns_df

In [None]:
site_information = SiteInformation(
    latitude=51.37,
    longitude=12.55,
    elevation=140,
    reference_incoming_neutron_value=150,
    bulk_density=1.4,
    lattice_water=0.01,
    soil_organic_carbon=0,
    # mean_pressure=900,
    cutoff_rigidity=2.94,
    site_biomass = 1
)

site_information.add_custom_value("n0", 1000)
site_information.add_custom_value("biomass", 1)


In [None]:
data_hub = CRNSDataHub(
    crns_data_frame=crns_df, site_information=site_information
)
data_hub.validate_dataframe(schema="initial_check")

The `attach_nmdb_data()` method has defaults. Running the below code, without selecting values, will work too. 


```python
data_hub.attach_nmdb_data()
```



In [None]:
data_hub.attach_nmdb_data(
    station = "JUNG",
    new_column_name = "incoming_neutron_intensity",
    resolution = "60",
    nmdb_table = "revori"
    )
data_hub.crns_data_frame

In [None]:
data_hub.prepare_static_values()
data_hub.crns_data_frame

In [None]:
qa_flags = QualityAssessmentFlagBuilder()
qa_flags.add_check(
    FlagRangeCheck("air_relative_humidity", min_val=0, max_val=100),
    # FlagRangeCheck("precipitation", min_val=0, max_val=20),
    FlagSpikeDetectionUniLOF("epithermal_neutrons"),
    # ...
)

data_hub.apply_quality_flags(custom_flags=qa_flags)
data_hub.flags_data_frame


In [None]:
data_hub.select_correction(
    correction_type=CorrectionType.INCOMING_INTENSITY,
    correction_theory=CorrectionTheory.ZREDA_2012,
)
data_hub.select_correction(
    correction_type=CorrectionType.HUMIDITY,
    correction_theory=CorrectionTheory.ROSOLEM_2013
)

data_hub.select_correction(
    correction_type=CorrectionType.PRESSURE,
)

# data_hub.select_correction(
#     correction_type=CorrectionType.ABOVE_GROUND_BIOMASS
# )

data_hub.correct_neutrons()
data_hub.crns_data_frame

In [None]:
class NewIdeaForBiomass(Correction):
    """
    My new idea to correct for biomass with humidity
    """

    def __init__(self, 
                site_information,
                correction_type = CorrectionType.CUSTOM,
                 
                correction_factor_column_name: str = "new_biomass_correction",
                ):
        super().__init__(correction_type=correction_type, 
                         correction_factor_column_name=correction_factor_column_name)
        self.site_information = site_information
        self.humidity_column_name = "air_relative_humidity"

    @staticmethod
    def new_func(biomass, humidity):
        return 1-((biomass / humidity) / 1000)

    def apply(self, data_frame: pd.DataFrame):

        data_frame[self.correction_factor_column_name] = data_frame.apply(
            lambda row: self.new_func(
                row[self.humidity_column_name],
                self.site_information.site_biomass, # Here we use a value from site_information
            ),
            axis=1,
        )
        return data_frame

In [None]:
data_hub.correction_factory.register_custom_correction(
    correction_type=CorrectionType.CUSTOM,
    theory="my_new_idea",
    correction_class=NewIdeaForBiomass,
)

data_hub.select_correction(
    correction_type=CorrectionType.CUSTOM,
    correction_theory="my_new_idea",
)

data_hub.prepare_static_values()
data_hub.correct_neutrons()
data_hub.crns_data_frame

In [None]:
data_hub.produce_soil_moisture_estimates()
data_hub.crns_data_frame

In [None]:
# data_hub.save_and_archive_data(Path/path) TODO

In [None]:
DataAuditLog.archive_and_delete_log(site_name="TestQA")