# SJ_FS_FLUX_processing.ipynb

```
H Thompson
UQAM
Oct 2021
```

## Imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import os
import fnmatch as fn

## Constants

In [2]:
yr = 2021
mnth = 4

In [3]:
prev_mnth = int(mnth)-1
if prev_mnth == 12:
    prev_yr = yr-1
else:
    prev_yr = yr
    
next_mnth = int(mnth)+1
if next_mnth == 13:
    next_yr = yr+1
    next_mnth = 1
else:
    next_yr = yr

In [4]:
days = pd.Period(f'{yr}-{mnth}-01').days_in_month
ind = pd.date_range(f'{yr}-{mnth:02d}-01 00:00:00',f'{yr}-{mnth:02d}-{days} 23:59:00',freq='30min')

## Files

In [5]:
path = f'../FixedStation/flux/{yr}_{mnth:02d}/raw/'

AM_list = [f for f in os.listdir(path) if fn.fnmatch(f, '7049_*_AmeriFluxFormat_*.dat')]
CS_list = [f for f in os.listdir(path) if fn.fnmatch(f, 'TOA5_*_CSFormat_*.dat')]
FN_list = [f for f in os.listdir(path) if fn.fnmatch(f, 'TOA5_*_Notes_*.dat')]

In [6]:
if os.path.exists(f'../FixedStation/flux/{yr}_{prev_mnth:02d}/raw/'):
    prev_path = f'../FixedStation/flux/{yr}_{prev_mnth:02d}/raw/'
    
    prev_AM_list = [f for f in os.listdir(prev_path) if fn.fnmatch(f, '7049_*_AmeriFluxFormat_*.dat')]
    prev_CS_list = [f for f in os.listdir(prev_path) if fn.fnmatch(f, 'TOA5_*_CSFormat_*.dat')]
    prev_FN_list = [f for f in os.listdir(prev_path) if fn.fnmatch(f, 'TOA5_*_Notes_*.dat')]

In [7]:
if os.path.exists(f'../FixedStation/flux/{yr}_{next_mnth:02d}/raw/'):
    next_path = f'../FixedStation/flux/{yr}_{next_mnth:02d}/raw/'

    next_AM_list = [f for f in os.listdir(next_path) if fn.fnmatch(f, '7049_*_AmeriFluxFormat_*.dat')]
    next_CS_list = [f for f in os.listdir(next_path) if fn.fnmatch(f, 'TOA5_*_CSFormat_*.dat')]
    next_FN_list = [f for f in os.listdir(next_path) if fn.fnmatch(f, 'TOA5_*_Notes_*.dat')]

In [8]:
frames = []
prev_frames = []
next_frames = []

for f in AM_list:
    print(f'Started working on {f}')
    frame = pd.read_csv(path + f'{f}', header=0, skiprows=(1,2))

    frame['Timestamp'] = pd.to_datetime(frame.TIMESTAMP_START, format='%Y%m%d%H%M')

    frame.set_index(frame.Timestamp, inplace=True)
    
    frame.sort_index(inplace=True)
    
    frames.append(frame)
    
if os.path.exists(f'../FixedStation/flux/{yr}_{prev_mnth:02d}/'):
    for f in prev_AM_list:
        print(f'Started working on {f}')
        prev_frame = pd.read_csv(prev_path + f'{f}', header=0, skiprows=(1,2))

        prev_frame['Timestamp'] = pd.to_datetime(prev_frame.TIMESTAMP_START, format='%Y%m%d%H%M')

        prev_frame.set_index(prev_frame.Timestamp, inplace=True)

        prev_frame.sort_index(inplace=True)

        prev_frames.append(prev_frame)

frames.extend(prev_frames)

if os.path.exists(f'../FixedStation/flux/{yr}_{next_mnth:02d}/'):
    for f in next_AM_list:
        print(f'Started working on {f}')
        next_frame = pd.read_csv(next_path + f'{f}', header=0, skiprows=(1,2))

        next_frame['Timestamp'] = pd.to_datetime(next_frame.TIMESTAMP_START, format='%Y%m%d%H%M')

        next_frame.set_index(next_frame.Timestamp, inplace=True)

        next_frame.sort_index(inplace=True)

        next_frames.append(next_frame)
        
frames.extend(next_frames)

AM_df = pd.concat(frames)

AM_df.sort_index(inplace=True)

del(AM_df['Timestamp'])

sub_df = AM_df[AM_df.index.month==int(mnth)]
sub_df = sub_df[~sub_df.index.duplicated()]
reindxd_df = sub_df.reindex(index=ind)
reindxd_df.index.name='Timestamp'

out_path = f'../FixedStation/flux/{yr}_{mnth:02d}/master/'
reindxd_df.to_csv(out_path + f'SJ_FS_FLUX_AM_{yr}{mnth:02d}.txt')
print(f'Created: ' + out_path + f'SJ_FS_FLUX_AM_{yr}{mnth:02d}.txt')

Started working on 7049_Flux_AmeriFluxFormat_5.dat
Started working on 7049_Flux_AmeriFluxFormat_4.dat
Started working on 7049_Flux_AmeriFluxFormat_3.dat
Started working on 7049_Flux_AmeriFluxFormat_0.dat
Started working on 7049_Flux_AmeriFluxFormat_1.dat
Started working on 7049_Flux_AmeriFluxFormat_2.dat
Created: ../FixedStation/flux/2021_04/master/SJ_FS_FLUX_AM_202104.txt


In [9]:
frames = []
prev_frames = []
next_frames = []

for f in CS_list:
    print(f'Started working on {f}')
    frame = pd.read_csv(path + f'r{f}', header=1, skiprows=(2,3))

    frame.TIMESTAMP = pd.to_datetime(frame.TIMESTAMP, infer_datetime_format=True)

    frame.set_index(frame.TIMESTAMP, inplace=True)
    
    frame.sort_index(inplace=True)
    
    frames.append(frame)
    
if os.path.exists(f'../FixedStation/flux/{yr}_{prev_mnth:02d}/'):
    for f in prev_CS_list:
        print(f'Started working on {f}')
        prev_frame = pd.read_csv(prev_path + f'{f}', header=0, skiprows=(1,2))

        prev_frame['Timestamp'] = pd.to_datetime(prev_frame.TIMESTAMP_START, format='%Y%m%d%H%M')

        prev_frame.set_index(prev_frame.Timestamp, inplace=True)

        prev_frame.sort_index(inplace=True)

        prev_frames.append(prev_frame)

frames.extend(prev_frames)

if os.path.exists(f'../FixedStation/flux/{yr}_{next_mnth:02d}/'):
    for f in next_CS_list:
        print(f'Started working on {f}')
        next_frame = pd.read_csv(next_path + f'{f}', header=0, skiprows=(1,2))

        next_frame['Timestamp'] = pd.to_datetime(next_frame.TIMESTAMP_START, format='%Y%m%d%H%M')

        next_frame.set_index(next_frame.Timestamp, inplace=True)

        next_frame.sort_index(inplace=True)

        next_frames.append(next_frame)
        
frames.extend(next_frames)

CS_df = pd.concat(frames)

CS_df.sort_index(inplace=True)

sub_df = CS_df[CS_df.index.month==int(mnth)]
sub_df = sub_df[~sub_df.index.duplicated()]
reindxd_df = sub_df.reindex(index=ind)
reindxd_df.index.name='Timestamp'

del(reindxd_df['TIMESTAMP'])

out_path = f'../FixedStation/flux/{yr}_{mnth:02d}/master/'
reindxd_df.to_csv(out_path + f'SJ_FS_FLUX_CS_{yr}{mnth:02d}.txt')

Started working on TOA5_7049_Flux_CSFormat_23.dat


FileNotFoundError: [Errno 2] No such file or directory: '../FixedStation/flux/2021_04/raw/rTOA5_7049_Flux_CSFormat_23.dat'

In [None]:
frames = []
prev_frames = []
next_frames = []

for f in FN_list:
    print(f'Started working on {f}')
    frame = pd.read_csv(path + f'raw/{f}', header=1, skiprows=(2,3))

    frame.TIMESTAMP = pd.to_datetime(frame.TIMESTAMP, infer_datetime_format=True)

    frame.set_index(frame.TIMESTAMP, inplace=True)
    
    frame.sort_index(inplace=True)
    
    frames.append(frame)
    
if os.path.exists(f'../FixedStation/flux/{yr}_{prev_mnth:02d}/'):
    for f in prev_FN_list:
        print(f'Started working on {f}')
        prev_frame = pd.read_csv(prev_path + f'{f}', header=0, skiprows=(1,2))

        prev_frame['Timestamp'] = pd.to_datetime(prev_frame.TIMESTAMP_START, format='%Y%m%d%H%M')

        prev_frame.set_index(prev_frame.Timestamp, inplace=True)

        prev_frame.sort_index(inplace=True)

        prev_frames.append(prev_frame)

frames.extend(prev_frames)

if os.path.exists(f'../FixedStation/flux/{yr}_{next_mnth:02d}/'):
    for f in next_FN_list:
        print(f'Started working on {f}')
        next_frame = pd.read_csv(next_path + f'{f}', header=0, skiprows=(1,2))

        next_frame['Timestamp'] = pd.to_datetime(next_frame.TIMESTAMP_START, format='%Y%m%d%H%M')

        next_frame.set_index(next_frame.Timestamp, inplace=True)

        next_frame.sort_index(inplace=True)

        next_frames.append(next_frame)
        
frames.extend(next_frames)

FN_df = pd.concat(frames)

FN_df.sort_index(inplace=True)

sub_df = FN_df[FN_df.index.month==int(mnth)]
sub_df = sub_df[~sub_df.index.duplicated()]
reindxd_df = sub_df.reindex(index=ind)
reindxd_df.index.name='Timestamp'

del(reindxd_df['TIMESTAMP'])

out_path = f'../FixedStation/flux/{yr}_{mnth:02d}/master/'
reindxd_df.to_csv(out_path + f'SJ_FS_FLUX_FN_{yr}{mnth:02d}.txt')

In [None]:
print('Done ;-)')