# Bühlot data preprocessing

The purpose of this code is to read in all the collected data, sort it by their different variables and then safe it in the correct folder.
By running this code ALL the collected data will be processed, not just the new data. Therefore all the previous sorted data will be overwritten. The sorted data will be safed in a folder named "data_export".

This is a list of all the variables:
- discharge [m3/s]
- river water level [m]
- precipitation [mm]
- air temperature [°C]
- relative humidity [%]
- solar irradiance [W/m2]
- wind speed [m/s]
- snow water equivalent [mm]
- evapotranspiration [mm/d]
- volumetric water content [cm3/cm3]
- ground water level [m]

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
from glob import glob
from tqdm import tqdm

In [10]:
def preprocessing(filename, variable):
    """
    This function preprocesses the raw data files for the needed variable.
    It will seperate a data file into the different variables.
    It reads in the raw data to then create a tabel with the columns that are needed. 

    """    

    if variable == 'precipitation':
        
        # read in raw data
        df = pd.read_csv(filename, skiprows=1, na_values='Logged')
        
        # slice down to relevant columns
        df = df.iloc[:, [1,3]].copy()

        # rename columns
        df.columns = ['date_str', 'precipitation']
        
    elif variable == 'air temperature':
        
        # read in raw data
        df = pd.read_csv(filename, skiprows=1, na_values='Logged')
        
        # slice down to relevant columns
        df = df.iloc[:, [1,2]].copy()
        
        # rename columns
        df.columns = ['date_str', 'air temperature']
        
    elif variable == 'Table1_VWC':
        
        # read in raw data from table 1
        df = pd.read_csv(filename, skiprows=[1,2,3,4], na_values='Logged')

        # slice down to relevant columns
        df = df.iloc[:, [0,2]].copy()

        # rename columns
        df.columns = ['date_str', 'volumetric water content [m^3/m^3; %]']
        
    elif variable == 'Table1_EC':
        
        # read in raw data from table 1
        df = pd.read_csv(filename, skiprows=[1,2,3,4], na_values='Logged')

        # slice down to relevant columns
        df = df.iloc[:, [0,3]].copy()

        # rename columns
        df.columns = ['date_str', 'electric conductivity [dS/m]']
        
    elif variable == 'Table2_VWC':
        
        # read in raw data from table 2
        df = pd.read_csv(filename, skiprows=[1,2,3,4], na_values='Logged')

        # slice down to relevant columns
        df = df.iloc[:, [0,3]].copy()

        # rename columns
        df.columns = ['date_str', 'volumetric water content [m^3/m^3; %]']
        
    elif variable == 'Table2_EC':
        
        # read in raw data from table 2
        df = pd.read_csv(filename, skiprows=[1,2,3,4], na_values='Logged')

        # slice down to relevant columns
        df = df.iloc[:, [0,4]].copy()

        # rename columns
        df.columns = ['date_str', 'electric conductivity [dS/m]']
        
    elif variable == 'ground water level':
        
        # read in raw data
        df = pd.read_excel(filename, skiprows=[1,2,3,4,5,6,7,8,9,10,11], na_values='Logged')

        # slice down to relevant columns
        df = df.iloc[:, [1,4]].copy()

        # rename columns
        df.columns = ['date_str', 'water height [mm]']
        
    elif variable == 'water temperature':
        
        # read in raw data
        df = pd.read_excel(filename, skiprows=[1,2,3,4,5,6,7,8,9,10,11], na_values='Logged')

        # slice down to relevant columns
        df = df.iloc[:, [1,2]].copy()

        # rename columns
        df.columns = ['date_str', 'water temperature [°C]']
        
    elif variable == 'logger temperature':
        
        # read in raw data
        df = pd.read_excel(filename, skiprows=[1,2,3,4,5,6,7,8,9,10,11], na_values='Logged')

        # slice down to relevant columns
        df = df.iloc[:, [1,3]].copy()

        # rename columns
        df.columns = ['date_str', 'logger temperature [°C]']
        
    elif variable == 'ground water level csv':
        
        # read in raw data
        df = pd.read_csv(filename, na_values='Logged')

        # slice down to relevant columns
        df = df.iloc[:, [1,4]].copy()

        # rename columns
        df.columns = ['date_str', 'water height [mm]']
        
    elif variable == 'water temperature csv':
        
        # read in raw data
        df = pd.read_csv(filename, na_values='Logged')

        # slice down to relevant columns
        df = df.iloc[:, [1,2]].copy()

        # rename columns
        df.columns = ['date_str', 'water temperature [°C]']
        
    elif variable == 'logger temperature csv':
        
        # read in raw data
        df = pd.read_csv(filename, na_values='Logged')

        # slice down to relevant columns
        df = df.iloc[:, [1,3]].copy()

        # rename columns
        df.columns = ['date_str', 'logger temperature [°C]']
        
    elif variable == 'river water level 1':
        
        # read in raw data
        df = pd.read_csv(filename, na_values='Logged', sep=';', header=None)

        # rename columns
        df.columns = ['date_str', 'time', 'river water level 1 []']
        
    elif variable == 'river water level 2':
        
        # read in raw data
        df = pd.read_csv(filename, na_values='Logged', sep=';', header=None)

        # rename columns
        df.columns = ['date_str', 'time', 'river water level 2 []']
        
    elif variable == 'river water level 4':
        
        # read in raw data
        df = pd.read_csv(filename, na_values='Logged', sep=';', header=None)

        # rename columns
        df.columns = ['date_str', 'time', 'river water level 4 []']
        
    else:
        raise ValueError(f"Variable is '{variable}', must be in ['precipitation', 'air temperature', 'Table1_VWC', 'Table1_EC', 'Table2_VWC', 'Table2_EC', 'ground water level', 'water temperature', 'logger temperature', 'ground water level csv', 'water temperature csv', 'logger temperature csv', 'river water level 1', 'river water level 2', 'river water level 4']")
    
    # return preprocessed dataframe
    return df

In [11]:
def merge(variable):
    """
    This function merges all the data for the assigned list. 
    Here it is one list for the variable "air temperature" and one for the variable "precipitation". 
    It also will create a list for the sensor "Table1" and one for the sensor "Table2".
    It will sort the lists by datetime and then safe the files in the right folder.
    
    "Table1" and "Table2" are names from the data file volumetric water content. Each station has two sensors ("Table1" and "Table2"). 
    While the sensor from "Table1" is placed in a depth of 20 cm below the top edge of the ground, the other sensor "Table2" is placed in a 
    depth of 50 cm below the top edge of the ground.
    
    The abbreviations are:
    AT = air temperature
    P = precipitation
    VWC_1 = volumetric water content of "Table1"
    EC_1 = electric conductivity of "Table1"
    VWC_2 = volumetric water content of "Table2"
    EC_2 = electric conductivity of "Table2"
    GWL = ground water level as a .xlsx file
    WT = water temperature as a .xlsx file
    LT = logger temperature as a .xlsx file
    GWL_csv = ground water level as a .csv file
    WT_csv = water temperature as a .csv file
    LG_csv = logger temperature as a .csv file 
    RWL_1 = river water level from the first sensor
    RWL_2 = river water level from the second sensor
    RWL_4 = river water level from the third sensor - sensor is named with number 4 
    
    """
    
    if variable == 'all_data_AT':
        
        # merge all_data
        df_all_data_AT = pd.concat(all_data_AT, ignore_index=True)
        
        # sort by datetime
        df_all_data_AT.sort_index(axis='index', inplace=False)
        
        # replace filename 
        filename_AT = filename.replace(".csv", "_air_temperature.csv")
        
        # safe file to csv in the right folder
        df_all_data_AT.to_csv(f'data_export/air_temperature/{filename_AT}', index=False)
        
    elif variable == 'all_data_P':
        
        # merge all_data
        df_all_data_P = pd.concat(all_data_P, ignore_index=True)
        
        # sort by datetime
        df_all_data_P.sort_index(axis='index', inplace=False)
        
        # replace filename
        filename_P = filename.replace(".csv", "_precipitation.csv")
        
        # safe file to csv in the right folder
        df_all_data_P.to_csv(f'data_export/precipitation/{filename_P}', index=False)
        
    elif variable == 'all_data_VWC_1':
        
        # merge all_data
        df_all_data_VWC_1 = pd.concat(all_data_VWC_1, ignore_index=True)
        
        # sort by datetime
        df_all_data_VWC_1.sort_index(axis='index', inplace=False)
        
        # replace filename
        filename_VWC_1 = filename.replace(".dat", "_VWC.dat")
        
        # safe file to csv in the right folder
        df_all_data_VWC_1.to_csv(f'data_export/volumetric_water_content/{filename_VWC_1}', index=False)
        
    elif variable == 'all_data_EC_1':
        
        # merge all_data
        df_all_data_EC_1 = pd.concat(all_data_EC_1, ignore_index=True)
        
        # sort by datetime
        df_all_data_EC_1.sort_index(axis='index', inplace=False)
        
        # replace filename
        filename_EC_1 = filename.replace(".dat", "_EC.dat")
        
        # safe file to csv in the right folder
        df_all_data_EC_1.to_csv(f'data_export/electric_conductivity/{filename_EC_1}', index=False)
        
    elif variable == 'all_data_VWC_2':
        
        # merge all_data
        df_all_data_VWC_2 = pd.concat(all_data_VWC_2, ignore_index=True)
        
        # sort by datetime
        df_all_data_VWC_2.sort_index(axis='index', inplace=False)
        
        # replace filename
        filename_VWC_2 = filename.replace(".dat", "_VWC.dat")
        
        # safe file to csv in the right folder
        df_all_data_VWC_2.to_csv(f'data_export/volumetric_water_content/{filename_VWC_2}', index=False)
        
    elif variable == 'all_data_EC_2':
        
        # merge all_data
        df_all_data_EC_2 = pd.concat(all_data_EC_2, ignore_index=True)
        
        # sort by datetime
        df_all_data_EC_2.sort_index(axis='index', inplace=False)
        
        # replace filename
        filename_EC_2 = filename.replace(".dat", "_EC.dat")
        
        # safe file to csv in the right folder
        df_all_data_EC_2.to_csv(f'data_export/electric_conductivity/{filename_EC_2}', index=False)
        
    elif variable == 'all_data_GWL':
        
        # merge all_data
        df_all_data_GWL = pd.concat(all_data_GWL, ignore_index=True)
        
        # sort by datetime
        df_all_data_GWL.sort_index(axis='index', inplace=False)
        
        # replace filename
        filename_GWL = filename.replace(".xlsx", "_GWL.xlsx")
        
        # safe file to csv in the right folder
        df_all_data_GWL.to_excel(f'data_export/ground_water_level/{filename_GWL}', index=False)
        
    elif variable == 'all_data_WT':
        
        # merge all_data
        df_all_data_WT = pd.concat(all_data_WT, ignore_index=True)
        
        # sort by datetime
        df_all_data_WT.sort_index(axis='index', inplace=False)
        
        # replace filename
        filename_WT = filename.replace(".xlsx", "_WT.xlsx")
        
        # safe file to csv in the right folder
        df_all_data_WT.to_excel(f'data_export/water_temperature/{filename_WT}', index=False)
        
    elif variable == 'all_data_LT':
        
        # merge all_data
        df_all_data_LT = pd.concat(all_data_LT, ignore_index=True)
        
        # sort by datetime
        df_all_data_LT.sort_index(axis='index', inplace=False)
        
        # replace filename
        filename_LT = filename.replace(".xlsx", "_LT.xlsx")
        
        # safe file to csv in the right folder
        df_all_data_LT.to_excel(f'data_export/logger_temperature/{filename_LT}', index=False)
        
    elif variable == 'all_data_GWL_csv':
        
        # merge all_data
        df_all_data_GWL_csv = pd.concat(all_data_GWL_csv, ignore_index=True)
        
        # sort by datetime
        df_all_data_GWL_csv.sort_index(axis='index', inplace=False)
        
        # replace filename
        filename_GWL_csv = filename.replace(".csv", "_GWL.csv")
        
        # safe file to csv in the right folder
        df_all_data_GWL_csv.to_csv(f'data_export/ground_water_level/{filename_GWL_csv}', index=False)
        
    elif variable == 'all_data_WT_csv':
        
        # merge all_data
        df_all_data_WT_csv = pd.concat(all_data_WT_csv, ignore_index=True)
        
        # sort by datetime
        df_all_data_WT_csv.sort_index(axis='index', inplace=False)
        
        # replace filename
        filename_WT_csv = filename.replace(".csv", "_WT.csv")
        
        # safe file to csv in the right folder
        df_all_data_WT_csv.to_csv(f'data_export/water_temperature/{filename_WT_csv}', index=False)
        
    elif variable == 'all_data_LT_csv':
        
        # merge all_data
        df_all_data_LT_csv = pd.concat(all_data_LT_csv, ignore_index=True)
        
        # sort by datetime
        df_all_data_LT_csv.sort_index(axis='index', inplace=False)
        
        # replace filename
        filename_LT_csv = filename.replace(".csv", "_LT.csv")
        
        # safe file to csv in the right folder
        df_all_data_LT_csv.to_csv(f'data_export/logger_temperature/{filename_LT_csv}', index=False)
        
    elif variable == 'all_data_RWL_1':
        
        # merge all_data
        df_all_data_RWL_1 = pd.concat(all_data_RWL_1, ignore_index=True)
        
        # sort by datetime
        df_all_data_RWL_1.sort_index(axis='index', inplace=False)
        
        # replace filename
        filename_RWL_1 = filename.replace(".csv", "_RWL_1.csv")
        
        # safe file to csv in the right folder
        df_all_data_RWL_1.to_csv(f'data_export/river_water_level_1/{filename_RWL_1}', index=False)
        
    elif variable == 'all_data_RWL_2':
        
        # merge all_data
        df_all_data_RWL_2 = pd.concat(all_data_RWL_2, ignore_index=True)
        
        # sort by datetime
        df_all_data_RWL_2.sort_index(axis='index', inplace=False)
        
        # replace filename
        filename_RWL_2 = filename.replace(".csv", "_RWL_2.csv")
        
        # safe file to csv in the right folder
        df_all_data_RWL_2.to_csv(f'data_export/river_water_level_2/{filename_RWL_2}', index=False)
        
    elif variable == 'all_data_RWL_4':
        
        # merge all_data
        df_all_data_RWL_4 = pd.concat(all_data_RWL_4, ignore_index=True)
        
        # sort by datetime
        df_all_data_RWL_4.sort_index(axis='index', inplace=False)
        
        # replace filename
        filename_RWL_4 = filename.replace(".csv", "_RWL_4.csv")
        
        # safe file to csv in the right folder
        df_all_data_RWL_4.to_csv(f'data_export/river_water_level_4/{filename_RWL_4}', index=False)
        
    else:
        raise ValueError(f"Variable is '{variable}', must be in ['all_data_AT', 'all_data_P', 'all_data_VWC_1', 'all_data_EC_1', 'all_data_VWC_2', 'all_data_EC_2', 'all_data_GWL', 'all_data_WT', 'all_data_LT', 'all_data_GWL_csv', 'all_data_WT_csv', 'all_data_LT_csv', 'river water level 1', 'river water level 2', 'river water level 4']")
    

In [14]:
# list of all the different stations for precipitation and air temperature
FILENAMES = ['Butschenberg.csv', 'Grundigklinik.csv', 'Hundseck.csv', 'Schafhof.csv', 'Schönbrunn.csv', 'Sportplatz.csv', 
             'Sternenberg-Schlammfang.csv', 'Schwabenquelle.csv', 'Winterberg.csv']

# lists of all the different stations for soil moisture 
FILENAMES_DAT_1 = ['Schafhof1_Table1.dat', 'Schafhof5_Table1.dat']
FILENAMES_DAT_2 = ['Schafhof1_Table2.dat', 'Schafhof5_Table2.dat']

# list of all the different stations for ground water level as a xlsx file
FILENAMES_GWL = ['Schafhof_Tensiometer.xlsx', 'Sprengquellen_Tensiometer_unten_nord.xlsx', 'Sprengquellen_Tensiometer_unten_sued.xlsx', 
                 'Sprengquellen_Tensiometer_oben_nord.xlsx', 'Sprengquellen_Tensiometer_oben_sued.xlsx']

# list of all the different stations for ground water level as a csv file
FILENAMES_GWL_csv = ['Schafhof_Tensiometer_alt.csv', 'Sprengquellen_Tensiometer_unten_nord_alt.csv', 
                     'Sprengquellen_Tensiometer_unten_sued_alt.csv', 'Sprengquellen_Tensiometer_oben_nord_alt.csv', 
                     'Sprengquellen_Tensiometer_oben_sued_alt.csv']

# list of all the different stations for river water level as a csv file
FILENAMES_RWL_1 = ['Pegel1_Bühlot.csv', 'Pegel1_Schwabenbrünnele.csv', 'Pegel1_Büchelbach.csv']

# list of all the different stations for river water level as a csv file
FILENAMES_RWL_2 = ['Pegel2_Bühlot.csv', 'Pegel2_Schwabenbrünnele.csv', 'Pegel2_Büchelbach.csv']

# list of all the different stations for river water level as a csv file
FILENAMES_RWL_4 = ['Pegel4_Bühlot.csv', 'Pegel4_Schwabenbrünnele.csv', 'Pegel4_Büchelbach.csv']

In [50]:
# preprocessing air temperature and precipitation
for filename in FILENAMES:
    all_data_AT = []
    all_data_P = []
    
    filenames = glob(f"*/*/{filename}", recursive=False)
    for datafile in tqdm(filenames):
        
        # preprocess each raw data file, once for rainfall and once for the air temperature
        df_AT = preprocessing(datafile, 'air temperature')
        df_P = preprocessing(datafile, 'precipitation')
        
        # append to all_data
        all_data_AT.append(df_AT)
        all_data_P.append(df_P)

    merge('all_data_AT')
    merge('all_data_P')

100%|██████████████████████████████████████████████████████████████████████████████████| 62/62 [00:02<00:00, 26.26it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 66/66 [00:02<00:00, 27.24it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 36/36 [00:01<00:00, 22.29it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 67/67 [00:02<00:00, 24.93it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 49/49 [00:01<00:00, 27.41it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 69/69 [00:02<00:00, 25.53it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 42/42 [00:01<00:00, 27.32it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 59/59 [00:02<00:00, 24.88it/s]
100%|███████████████████████████████████

In [15]:
# preprocessing volumetric water content and electric conductivity
for filename in FILENAMES_DAT_1:
    all_data_VWC_1 = []
    all_data_EC_1 = []
    
    filenames = glob(f"*/*/{filename}", recursive=False)
    for datafile in tqdm(filenames):
        
        # preprocess each raw data file, once for the volumetric water content and once for the electric conductivity
        df_VWC_1 = preprocessing(datafile, 'Table1_VWC')
        df_EC_1 = preprocessing(datafile, 'Table1_EC')
        
        # append to all_data
        all_data_VWC_1.append(df_VWC_1)
        all_data_EC_1.append(df_EC_1)

    merge('all_data_VWC_1')
    merge('all_data_EC_1')
    
for filename in FILENAMES_DAT_2:
    all_data_VWC_2 = []
    all_data_EC_2 = []
    
    filenames = glob(f"*/*/{filename}", recursive=False)
    for datafile in tqdm(filenames):
        
        # preprocess each raw data file, once for the volumetric water content and once for the electric conductivity
        df_VWC_2 = preprocessing(datafile, 'Table2_VWC')
        df_EC_2 = preprocessing(datafile, 'Table2_EC')
        
        # append to all_data
        all_data_VWC_2.append(df_VWC_2)
        all_data_EC_2.append(df_EC_2)

    merge('all_data_VWC_2')
    merge('all_data_EC_2')

100%|██████████████████████████████████████████████████████████████████████████████████| 47/47 [00:02<00:00, 22.31it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 71/71 [00:02<00:00, 27.67it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 47/47 [00:02<00:00, 21.09it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 70/70 [00:02<00:00, 25.85it/s]


In [27]:
# preprocessing ground water level, water temperature and logger temperature as a xlsx file
for filename in FILENAMES_GWL:
    all_data_GWL = []
    all_data_WT = []
    all_data_LT = []
    
    filenames = glob(f"*/*/{filename}", recursive=False)
    for datafile in tqdm(filenames):
        
        # preprocess each raw data file
        df_GWL = preprocessing(datafile, 'ground water level')
        df_WT = preprocessing(datafile, 'water temperature')
        df_LT = preprocessing(datafile, 'logger temperature')
        
        # append to all_data
        all_data_GWL.append(df_GWL)
        all_data_WT.append(df_WT)
        all_data_LT.append(df_LT)

    merge('all_data_GWL')
    merge('all_data_WT')
    merge('all_data_LT')
    
# preprocessing ground water level, water temperature and logger temperature as a csv file
for filename in FILENAMES_GWL_csv:
    all_data_GWL_csv = []
    all_data_WT_csv = []
    all_data_LT_csv = []
    
    filenames = glob(f"*/*/{filename}", recursive=False)
    for datafile in tqdm(filenames):
        
        # preprocess each raw data file
        df_GWL_csv = preprocessing(datafile, 'ground water level csv')
        df_WT_csv = preprocessing(datafile, 'water temperature csv')
        df_LT_csv = preprocessing(datafile, 'logger temperature csv')
        
        # append to all_data
        all_data_GWL_csv.append(df_GWL_csv)
        all_data_WT_csv.append(df_WT_csv)
        all_data_LT_csv.append(df_LT_csv)

    merge('all_data_GWL_csv')
    merge('all_data_WT_csv')
    merge('all_data_LT_csv')

100%|██████████████████████████████████████████████████████████████████████████████████| 39/39 [01:39<00:00,  2.55s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:55<00:00,  2.78s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:18<00:00,  1.82s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [01:30<00:00,  3.03s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 33/33 [01:38<00:00,  3.00s/it]
100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 21.90it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 19.82it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:00<00:00, 19.45it/s]
100%|███████████████████████████████████

In [15]:
# preprocessing river water level as a csv file
for filename in FILENAMES_RWL_1:
    all_data_RWL_1 = []
    
    filenames = glob(f"*/*/{filename}", recursive=False)
    for datafile in tqdm(filenames):
        
        # preprocess each raw data file
        df_RWL_1 = preprocessing(datafile, 'river water level 1')

        # append to all_data
        all_data_RWL_1.append(df_RWL_1)

    merge('all_data_RWL_1')

for filename in FILENAMES_RWL_2:
    all_data_RWL_2 = []
    
    filenames = glob(f"*/*/{filename}", recursive=False)
    for datafile in tqdm(filenames):
        
        # preprocess each raw data file
        df_RWL_2 = preprocessing(datafile, 'river water level 2')

        # append to all_data
        all_data_RWL_2.append(df_RWL_2)

    merge('all_data_RWL_2')
    
for filename in FILENAMES_RWL_4:
    all_data_RWL_4 = []
    
    filenames = glob(f"*/*/{filename}", recursive=False)
    for datafile in tqdm(filenames):
        
        # preprocess each raw data file
        df_RWL_4 = preprocessing(datafile, 'river water level 4')

        # append to all_data
        all_data_RWL_4.append(df_RWL_4)

    merge('all_data_RWL_4')

100%|██████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 54.33it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 34/34 [00:00<00:00, 76.55it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 52.52it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 58.68it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 50.61it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 23/23 [00:00<00:00, 46.76it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 12/12 [00:00<00:00, 48.38it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 30/30 [00:00<00:00, 48.84it/s]
100%|███████████████████████████████████

In [None]:
"""
Die nächsten Zeilen habe ich noch nicht gelöscht, da ich diese noch dem Alex zeigen möchte.
Ich habe aus den Tabellen wirklich nur den volumetrischen Wassergehalt rausgeholt, da die anderen Variablen nicht in der Liste stehen
die er mir mal per Mail geschickt hat.
Ist das richtig so?

"""

In [58]:
# read in raw data from table 1
df = pd.read_csv('Daten-2023/11_19_23/Schafhof5_Table1.dat', skiprows=[1,2,3,4], na_values='Logged')

# slice down to relevant columns
df = df.iloc[:, [0,2,3,4]].copy()

# rename columns
df.columns = ['date_str', 'volumetric water content [m^3/m^3; %]', 'electric conductivity [dS/m]', 'temperature [°C]']
        
df

Unnamed: 0,date_str,volumetric water content [m^3/m^3; %],electric conductivity [dS/m],temperature [°C]
0,2023-10-05 01:00:00,0.2493,0.0058,15.5418
1,2023-10-05 01:05:00,0.2492,0.0061,15.5336
2,2023-10-05 01:10:00,0.2492,0.0060,15.5143
3,2023-10-05 01:15:00,0.2492,0.0059,15.5143
4,2023-10-05 01:20:00,0.2492,0.0060,15.4867
...,...,...,...,...
13066,2023-11-19 09:50:00,0.3830,0.0072,8.8431
13067,2023-11-19 09:55:00,0.3830,0.0073,8.8633
13068,2023-11-19 10:00:00,0.3828,0.0074,8.8691
13069,2023-11-19 10:05:00,0.3828,0.0073,8.8980


In [59]:
# read in raw data from table 2
df = pd.read_csv('Daten-2023/11_19_23/Schafhof5_Table2.dat', skiprows=[1,2,3,4], na_values='Logged')

# slice down to relevant columns
df = df.iloc[:, [0,2,3,4,5]].copy()

# rename columns
df.columns = ['date_str', 'battery voltage [volts]', 'volumetric water content [m^3/m^3; %]', 'electric conductivity [dS/m]', 
              'temperature [°C]']
        
df

Unnamed: 0,date_str,battery voltage [volts],volumetric water content [m^3/m^3; %],electric conductivity [dS/m],temperature [°C]
0,2023-10-05 01:20:00,12.91225,0.2603,0.0011,16.1330
1,2023-10-05 01:25:00,12.91309,0.2603,0.0011,16.1138
2,2023-10-05 01:30:00,12.91309,0.2603,0.0011,16.0973
3,2023-10-05 01:35:00,12.89798,0.2603,0.0012,16.0973
4,2023-10-05 01:40:00,12.91393,0.2603,0.0011,16.1111
...,...,...,...,...,...
13062,2023-11-19 09:50:00,12.89294,0.3321,0.0014,10.0701
13063,2023-11-19 09:55:00,12.88958,0.3321,0.0013,10.0815
13064,2023-11-19 10:00:00,12.89546,0.3318,0.0016,10.1101
13065,2023-11-19 10:05:00,12.89378,0.3318,0.0014,10.0587


In [6]:
"""
Hier werde ich die Tabellen für die Pegel anzeigen lassen.
"""

'\nHier werde ich die Tabellen für die Tensiometer anzeigen lassen.\n'

In [35]:
# read in raw data from table 2
df = pd.read_csv('Daten-2016/Daten-03-29/Pegel4_Büchelbach.csv', na_values='Logged', sep=';', header=None)


# merge date with time
df['tstamp'] = df.iloc[:,0] + ' ' + df.iloc[:,1]

# rename columns
#df.columns = ['date_str', 'time', 'sensor']

# convert to datetime
df['tstamp'] = pd.to_datetime(df['tstamp'], format='%d.%m.%Y %H:%M')

df = df[['tstamp', 2]]
df

Unnamed: 0,tstamp,2
0,2016-02-29 00:05:00,0.08
1,2016-02-29 00:10:00,0.08
2,2016-02-29 00:15:00,0.08
3,2016-02-29 00:20:00,0.08
4,2016-02-29 00:25:00,0.08
...,...,...
8635,2016-03-29 23:40:00,---
8636,2016-03-29 23:45:00,---
8637,2016-03-29 23:50:00,---
8638,2016-03-29 23:55:00,---


In [None]:
# date_str to tstamp