## Example Usage



## Database.py

* Create an empty database

In [66]:
import importlib
# see: database.py
import database as pyDB
importlib.reload(pyDB)

import os
import shutil
import numpy as np
import pandas as pd

# Define a "test folder" for database
dbpth = os.path.join(os.getcwd(),'test')
# Delete previous "test folder" if it already exists
if os.path.isdir(dbpth): shutil.rmtree(dbpth)

# print('Fix initalization redundancy')

#Create an empty database
db = pyDB.database(projectPath=dbpth,
    # siteID=['SCL'],
    # Years=[str(y) for y in range(2024,2026)]
    )


Initializing empty database
Creating folder:  c:\Users\jskeeter\gsc-permafrost\EC_dataPipeline\test\_metadata.yml
Creating file:  c:\Users\jskeeter\gsc-permafrost\EC_dataPipeline\test\_logfile.txt


## Add Data

Create and add some arbitrary data
<!-- 2) Add some arbitrary data
3) Add new arbitrary data
    * Example is done without overwriting existing values, option exists to overwrite values if needed -->

In [51]:
# Accepts pandas dataframe and dict of metadata as key inputs
# Normally would be parsed from a file using other scripts (see rawDataFile.py)
# Just a dummy example here for illustration purposes

import rawDataFile
importlib.reload(rawDataFile)
importlib.reload(pyDB)

db = pyDB.database(projectPath=dbpth,
    )

# A dummy dataset with two traces
testData = pd.DataFrame(
    index=pd.DatetimeIndex(['2024-12-31 23:30', '2025-01-01 00:00', '2025-01-01 00:30','2025-01-01 01:00']),
    data={'TA':[-1,-2,-3,-4],
          'T*':[-1.0,-2.00002,-3.0,-4.2],
          'Garb':['some','text','to','ignore']}
)

# genericLoggerFile() takes data and metadata for a given file and formats them in a standardized way
# typically called from a raw file parser function (e.g., hoboCSV)
testData = rawDataFile.genericLoggerFile(siteID='Test',subSiteID='MetStation',Data=testData)

# Call the import
db.rawDatabaseImport(testData.Data,testData.Metadata)

Standardizing and documenting traces
Re-named:  T*  to:  T_
raw file Metadata: 

 siteID: Test
fileType: null
loggerName: loggerName
subSiteID: MetStation
siteDescription: null
replicateID: 1
frequency: 30min
timeZone: UTC
lat: 0.0
lon: 0.0
Variables:
  TA:
    ignore: false
    name_in: TA
    unit_in: null
    safe_name: TA
    dtype: <i8
    variableDescription: null
  T_:
    ignore: false
    name_in: T*
    unit_in: null
    safe_name: T_
    dtype: <f8
    variableDescription: null
  Garb:
    ignore: true
    name_in: Garb
    unit_in: null
    safe_name: Garb
    dtype: '|O'
    variableDescription: null
 

Dropping non-numeric data
Writing:  c:\Users\jskeeter\gsc-permafrost\EC_dataPipeline\test\2024\Test\raw\MetStation\POSIX_timestamp
Writing:  c:\Users\jskeeter\gsc-permafrost\EC_dataPipeline\test\2024\Test\raw\MetStation\TA
Writing:  c:\Users\jskeeter\gsc-permafrost\EC_dataPipeline\test\2024\Test\raw\MetStation\T_
Writing:  c:\Users\jskeeter\gsc-permafrost\EC_dataPipeline\te

In [None]:
db = pyDB.database(projectPath=dbpth,verbose=False
    )

# A dummy dataset with two traces to update
# It spans the time period of the original (but with an erroneous null value) and then includes one updated value
testUpdate = pd.DataFrame(
    index=pd.DatetimeIndex(['2024-12-31 23:30', '2025-01-01 00:00', '2025-01-01 00:30','2025-01-01 01:00','2025-01-01 01:30']),
    data={'TA':[-1,-2,-3,-9999,-5],
          'T*':[-1.0,-2.00002,-3.0,np.nan,-5.1],}
)

# Format the data
testUpdate = rawDataFile.genericLoggerFile(siteID='Test',subSiteID='MetStation',Data=testUpdate,verbose=db.verbose)


# Call the import
# Will by default, only fill nan-values in the original database trace
# The null values from the incoming array won't have any impact on the existing data 
# but the timestamps which don't exist in the database will be written

db.rawDatabaseImport(testData.Data,testData.Metadata)



In [60]:
db._map

{}