![lop](../../images/logo_diive1_128px.png)

<span style='font-size:32px; display:block;'>
<b>
    Load and save Parquet file
</b>
</span>

---
**Notebook version**: `2` (24 Oct 2023)  
**Author**: Lukas Hörtnagl (holukas@ethz.ch)  

</br>

# **Background**

> Apache Parquet is an open source, column-oriented data file format designed for efficient data storage and retrieval. It provides efficient data compression and encoding schemes with enhanced performance to handle complex data in bulk. Parquet is available in multiple languages including Java, C++, Python, etc...

source: https://parquet.apache.org/

- Parquet files are much smaller than e.g. CSV files, and faster to load and save

</br>

# **Imports**

In [1]:
import importlib.metadata
import os
from datetime import datetime
from pathlib import Path

from diive.core.io.filereader import MultiDataFileReader
from diive.core.io.files import save_parquet, load_parquet

version_diive = importlib.metadata.version("diive")
print(f"diive version: v{version_diive}")

diive version: v0.80.0


</br>

# **Docstring** of `save_parquet`

In [2]:
help(save_parquet)

Help on function save_parquet in module diive.core.io.files:

save_parquet(filename: str, data: pandas.core.frame.DataFrame, outpath: str = None) -> str
    Save pandas Series or DataFrame as parquet file
    
    Args:
        filename: str
            Name of the generated parquet file.
        data: pandas Series or DataFrame
        outpath: str or None
            If *None*, file is saved to system default folder. When used within
            a notebook, the file is saved in the same location as the notebook.
    
    Returns:
        str, filepath to parquet file



</br>

# **Docstring** of `load_parquet`

In [3]:
help(load_parquet)

Help on function load_parquet in module diive.core.io.files:

load_parquet(filepath: str, output_middle_timestamp: bool = True) -> pandas.core.frame.DataFrame
    Load data from Parquet file to pandas DataFrame
    
    Args:
        filepath: str
            filepath to parquet file
        output_middle_timestamp: Converts the timestamp to show the middle
            of the averaging interval.
    
    Returns:
        pandas DataFrame, data from Parquet file as pandas DataFrame



</br>

# **Input data**

## Source and output folder

In [4]:
DIR = Path(r'F:\TMP\example')

## Search files in folder

In [5]:
filepaths = [f for f in os.listdir(DIR) if f.endswith(".csv")]
filepaths = [DIR / f for f in filepaths]
filepaths = [Path(f) for f in filepaths]
filepaths = [f for f in filepaths if 'fluxnet' in str(f)]
[print(f) for f in filepaths]

F:\TMP\example\2018_3_IRGA_eddypro_CH-CHA_FR-20240730-112310_fluxnet_2024-07-30T132817_adv.csv
F:\TMP\example\2018_4_IRGA_eddypro_CH-CHA_FR-20240730-112301_fluxnet_2024-07-30T144534_adv.csv


[None, None]

## Load and merge data from files
- Read datafiles in `filepaths` using `diive`
- All datafiles are in the `EDDYPRO_FLUXNET_30MIN` format

In [6]:
loaddatafile = MultiDataFileReader(filetype='EDDYPRO-FLUXNET-CSV-30MIN', 
                                   filepaths=filepaths,
                                   output_middle_timestamp=False)
df = loaddatafile.data_df

Reading file 2018_3_IRGA_eddypro_CH-CHA_FR-20240730-112310_fluxnet_2024-07-30T132817_adv.csv ...
Reading file 2018_4_IRGA_eddypro_CH-CHA_FR-20240730-112301_fluxnet_2024-07-30T144534_adv.csv ...


## Check data

In [7]:
df

Unnamed: 0_level_0,AIR_MV,AIR_DENSITY,AIR_RHO_CP,AIR_CP,AOA_METHOD,AXES_ROTATION_METHOD,BOWEN,BURBA_METHOD,BADM_LOCATION_LAT,BADM_LOCATION_LONG,BADM_LOCATION_ELEV,BADM_HEIGHTC,BADM_INST_SAMPLING_INT,BADM_INST_AVERAGING_INT,BADM_INST_MODEL_SA,BADM_INST_HEIGHT_SA,BADM_INST_SA_WIND_FORMAT,BADM_INST_SA_GILL_ALIGN,BADM_SA_OFFSET_NORTH,BADM_INST_MODEL_GA_CO2,BADM_INSTPAIR_NORTHWARD_SEP_GA_CO2,BADM_INSTPAIR_EASTWARD_SEP_GA_CO2,BADM_INSTPAIR_HEIGHT_SEP_GA_CO2,BADM_INST_GA_CP_TUBE_LENGTH_GA_CO2,BADM_INST_GA_CP_TUBE_IN_DIAM_GA_CO2,...,W_NONE_MEAS_COV,W_T_SONIC_COV_IBROM,W_T_SONIC_COV_IBROM_N1626,W_T_SONIC_COV_IBROM_N0614,W_T_SONIC_COV_IBROM_N0277,W_T_SONIC_COV_IBROM_N0133,W_T_SONIC_COV_IBROM_N0065,W_T_SONIC_COV_IBROM_N0032,W_T_SONIC_COV_IBROM_N0016,W_T_SONIC_COV_IBROM_N0008,W_T_SONIC_COV_IBROM_N0004,W_NUM_SPIKES,WD_FILTER_NREX,W_SPIKE_NREX,W_ABSLIM_NREX,W_VM97_TEST,W_LGD,W_KID,W_ZCD,W_ITC,W_ITC_TEST,WBOOST_APPLIED,WPL_APPLIED,ZL,ZL_UNCORR
TIMESTAMP_END,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2018-11-26 15:30:00,0.023997,1.20316,1214.94,1009.79,0.0,1.0,-0.125683,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,-0.009210,-0.009217,-0.009239,-0.009280,-0.009345,-0.009437,-0.009532,-0.009573,-0.009445,-0.008911,2.0,0.0,2.0,0.0,800000011.0,0.0,89.79680,810.0,33.0,3.0,0.0,1.0,0.244814,0.133210
2018-11-26 16:00:00,0.023993,1.20333,1215.14,1009.81,0.0,1.0,-0.627610,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,-0.001973,-0.001972,-0.001971,-0.001970,-0.001973,-0.001980,-0.001982,-0.001948,-0.001830,-0.001610,1.0,0.0,1.0,0.0,800000011.0,0.0,7.98638,2271.0,1.0,1.0,0.0,1.0,0.229719,0.207025
2018-11-26 16:30:00,0.023974,1.20432,1216.09,1009.77,0.0,1.0,-1.629300,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,-0.004287,-0.004284,-0.004277,-0.004267,-0.004257,-0.004246,-0.004208,-0.004110,-0.003899,-0.003418,1.0,0.0,2.0,0.0,800000011.0,0.0,7.33939,402.0,31.0,3.0,0.0,1.0,0.100793,0.096663
2018-11-26 17:00:00,0.023960,1.20506,1216.79,1009.73,0.0,1.0,-0.563474,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,-0.001602,-0.001595,-0.001579,-0.001548,-0.001497,-0.001421,-0.001327,-0.001226,-0.001115,-0.000991,1.0,0.0,3.0,0.0,800000011.0,0.0,10.06640,808.0,3.0,1.0,0.0,1.0,0.050302,0.044926
2018-11-26 17:30:00,0.023957,1.20519,1216.94,1009.75,0.0,1.0,0.442857,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,0.000445,0.000443,0.000440,0.000434,0.000424,0.000411,0.000401,0.000384,0.000322,0.000215,0.0,0.0,0.0,0.0,800000111.0,0.0,11.64380,6135.0,17.0,2.0,0.0,1.0,-0.073620,-0.084776
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-31 23:00:00,0.023359,1.23453,1234.85,1000.26,0.0,1.0,-4.233310,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,-0.001062,-0.001053,-0.001032,-0.000993,-0.000937,-0.000868,-0.000787,-0.000686,-0.000544,-0.000363,1.0,0.0,2.0,0.0,800000111.0,0.0,6.77310,2341.0,10.0,1.0,0.0,1.0,0.605573,0.588933
2018-12-31 23:30:00,0.023339,1.23558,1236.00,1000.34,0.0,1.0,-3.348450,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,-0.002138,-0.002140,-0.002138,-0.002130,-0.002113,-0.002080,-0.002024,-0.001936,-0.001803,-0.001591,2.0,0.0,4.0,0.0,800000011.0,0.0,6.82442,3567.0,1.0,1.0,0.0,1.0,1.949490,1.863090
2019-01-01 00:00:00,0.023364,1.23429,1234.62,1000.27,0.0,1.0,-2.231940,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,-0.001200,-0.001189,-0.001166,-0.001132,-0.001085,-0.001024,-0.000956,-0.000890,-0.000819,-0.000710,2.0,0.0,3.0,0.0,801000011.0,0.0,4.77487,1750.0,41.0,3.0,0.0,1.0,0.527833,0.508103
2019-01-01 00:30:00,0.023388,1.23297,1233.13,1000.13,0.0,1.0,-1.398660,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,0.000250,0.000248,0.000248,0.000251,0.000257,0.000269,0.000290,0.000326,0.000366,0.000386,3.0,0.0,7.0,0.0,801000011.0,0.0,6.44980,7908.0,37.0,3.0,0.0,1.0,-0.123867,-0.119881


</br>

# **Save dataframe as parquet file**

## (1) Save to specific folder

In [8]:
filepath = save_parquet(outpath=DIR, filename="output_file", data=df)

Saved file F:\TMP\example\output_file.parquet (0.054 seconds).


## (2) Save to same folder as notebook

In [9]:
# filepath = save_parquet(filename="output_file", data=df)

## Filepath

In [10]:
filepath

'F:\\TMP\\example\\output_file.parquet'

</br>

# **Load data from parquet file**

In [13]:
data_from_parquet = load_parquet(filepath=filepath, output_middle_timestamp=False)

Loaded .parquet file F:\TMP\example\output_file.parquet (0.023 seconds). Detected time resolution of <30 * Minutes> / 30min 


In [14]:
data_from_parquet

Unnamed: 0_level_0,AIR_MV,AIR_DENSITY,AIR_RHO_CP,AIR_CP,AOA_METHOD,AXES_ROTATION_METHOD,BOWEN,BURBA_METHOD,BADM_LOCATION_LAT,BADM_LOCATION_LONG,BADM_LOCATION_ELEV,BADM_HEIGHTC,BADM_INST_SAMPLING_INT,BADM_INST_AVERAGING_INT,BADM_INST_MODEL_SA,BADM_INST_HEIGHT_SA,BADM_INST_SA_WIND_FORMAT,BADM_INST_SA_GILL_ALIGN,BADM_SA_OFFSET_NORTH,BADM_INST_MODEL_GA_CO2,BADM_INSTPAIR_NORTHWARD_SEP_GA_CO2,BADM_INSTPAIR_EASTWARD_SEP_GA_CO2,BADM_INSTPAIR_HEIGHT_SEP_GA_CO2,BADM_INST_GA_CP_TUBE_LENGTH_GA_CO2,BADM_INST_GA_CP_TUBE_IN_DIAM_GA_CO2,...,W_NONE_MEAS_COV,W_T_SONIC_COV_IBROM,W_T_SONIC_COV_IBROM_N1626,W_T_SONIC_COV_IBROM_N0614,W_T_SONIC_COV_IBROM_N0277,W_T_SONIC_COV_IBROM_N0133,W_T_SONIC_COV_IBROM_N0065,W_T_SONIC_COV_IBROM_N0032,W_T_SONIC_COV_IBROM_N0016,W_T_SONIC_COV_IBROM_N0008,W_T_SONIC_COV_IBROM_N0004,W_NUM_SPIKES,WD_FILTER_NREX,W_SPIKE_NREX,W_ABSLIM_NREX,W_VM97_TEST,W_LGD,W_KID,W_ZCD,W_ITC,W_ITC_TEST,WBOOST_APPLIED,WPL_APPLIED,ZL,ZL_UNCORR
TIMESTAMP_END,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2018-11-26 15:30:00,0.023997,1.20316,1214.94,1009.79,0.0,1.0,-0.125683,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,-0.009210,-0.009217,-0.009239,-0.009280,-0.009345,-0.009437,-0.009532,-0.009573,-0.009445,-0.008911,2.0,0.0,2.0,0.0,800000011.0,0.0,89.79680,810.0,33.0,3.0,0.0,1.0,0.244814,0.133210
2018-11-26 16:00:00,0.023993,1.20333,1215.14,1009.81,0.0,1.0,-0.627610,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,-0.001973,-0.001972,-0.001971,-0.001970,-0.001973,-0.001980,-0.001982,-0.001948,-0.001830,-0.001610,1.0,0.0,1.0,0.0,800000011.0,0.0,7.98638,2271.0,1.0,1.0,0.0,1.0,0.229719,0.207025
2018-11-26 16:30:00,0.023974,1.20432,1216.09,1009.77,0.0,1.0,-1.629300,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,-0.004287,-0.004284,-0.004277,-0.004267,-0.004257,-0.004246,-0.004208,-0.004110,-0.003899,-0.003418,1.0,0.0,2.0,0.0,800000011.0,0.0,7.33939,402.0,31.0,3.0,0.0,1.0,0.100793,0.096663
2018-11-26 17:00:00,0.023960,1.20506,1216.79,1009.73,0.0,1.0,-0.563474,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,-0.001602,-0.001595,-0.001579,-0.001548,-0.001497,-0.001421,-0.001327,-0.001226,-0.001115,-0.000991,1.0,0.0,3.0,0.0,800000011.0,0.0,10.06640,808.0,3.0,1.0,0.0,1.0,0.050302,0.044926
2018-11-26 17:30:00,0.023957,1.20519,1216.94,1009.75,0.0,1.0,0.442857,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,0.000445,0.000443,0.000440,0.000434,0.000424,0.000411,0.000401,0.000384,0.000322,0.000215,0.0,0.0,0.0,0.0,800000111.0,0.0,11.64380,6135.0,17.0,2.0,0.0,1.0,-0.073620,-0.084776
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-12-31 23:00:00,0.023359,1.23453,1234.85,1000.26,0.0,1.0,-4.233310,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,-0.001062,-0.001053,-0.001032,-0.000993,-0.000937,-0.000868,-0.000787,-0.000686,-0.000544,-0.000363,1.0,0.0,2.0,0.0,800000111.0,0.0,6.77310,2341.0,10.0,1.0,0.0,1.0,0.605573,0.588933
2018-12-31 23:30:00,0.023339,1.23558,1236.00,1000.34,0.0,1.0,-3.348450,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,-0.002138,-0.002140,-0.002138,-0.002130,-0.002113,-0.002080,-0.002024,-0.001936,-0.001803,-0.001591,2.0,0.0,4.0,0.0,800000011.0,0.0,6.82442,3567.0,1.0,1.0,0.0,1.0,1.949490,1.863090
2019-01-01 00:00:00,0.023364,1.23429,1234.62,1000.27,0.0,1.0,-2.231940,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,-0.001200,-0.001189,-0.001166,-0.001132,-0.001085,-0.001024,-0.000956,-0.000890,-0.000819,-0.000710,2.0,0.0,3.0,0.0,801000011.0,0.0,4.77487,1750.0,41.0,3.0,0.0,1.0,0.527833,0.508103
2019-01-01 00:30:00,0.023388,1.23297,1233.13,1000.13,0.0,1.0,-1.398660,0.0,47.2102,8.41064,393.0,0.5,20.0,30.0,,2.41,,,7.0,,14.0,31.0,1.0,,,...,,0.000250,0.000248,0.000248,0.000251,0.000257,0.000269,0.000290,0.000326,0.000366,0.000386,3.0,0.0,7.0,0.0,801000011.0,0.0,6.44980,7908.0,37.0,3.0,0.0,1.0,-0.123867,-0.119881


</br>

# **Finish**

In [15]:
dt_string = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
print(f"Finished {dt_string}")

Finished 2024-08-28 10:37:31
