In [1]:
import pandas as pd
import numpy as np
import pickle 

In [2]:
# lakemask table
folder = r"C:\Users\frede\OneDrive\Dokumenter\DTU\bachelor\GIS"
file = r"\lakemask.csv"
path = folder + file
lakemask_df = pd.read_csv(path,
                engine='python') 
lakemask_df.head()

Unnamed: 0,lake_id,basin_id,names,grand_id,ref_area,ref_wse,date_t0,ds_t0,pass_full,pass_part,...,ref_area_u,ref_wse_u,storage,ice_clim_f,ice_dyn_fl,lon,lat,reach_id_l,layer,path
0,1140000602,114,LAKE INGIA,4490,5770.17,-9999.0,-9999,255.0,,443;568,...,-100000000000.0,-100000000000.0,-100000000000.0,-999,-999,39.366547,7.128828,,af_20,/home/karina/SWOT_lake_simple_20/af_20.shp
1,1140002942,114,LACH DERA,0,9243.49,-9999.0,-9999,255.0,109;540,,...,-100000000000.0,-100000000000.0,-100000000000.0,-999,-999,42.429474,0.045416,,af_20,/home/karina/SWOT_lake_simple_20/af_20.shp
2,1150000822,115,LAKE ELMENTEITA,0,2109.89,-9999.0,-9999,255.0,318;471,,...,-100000000000.0,-100000000000.0,-100000000000.0,-999,-999,36.240613,-0.440324,,af_20,/home/karina/SWOT_lake_simple_20/af_20.shp
3,1150001092,115,LAKE NAKURU,0,5393.69,-9999.0,-9999,255.0,471,318,...,-100000000000.0,-100000000000.0,-100000000000.0,-999,-999,36.091035,-0.360965,,af_20,/home/karina/SWOT_lake_simple_20/af_20.shp
4,1150001122,115,LAKE ILPOLOSAT;OL?BOLLOSAT,0,2304.33,-9999.0,-9999,255.0,318,471,...,-100000000000.0,-100000000000.0,-100000000000.0,-999,-999,36.437673,-0.137849,,af_20,/home/karina/SWOT_lake_simple_20/af_20.shp


In [3]:
# create dataframe with all lake IDs
folder = r"C:\Users\frede\OneDrive\Dokumenter\DTU\bachelor\data\ts_lakes"
file = r"\ts_lakes_list.txt"
path = folder + file
lake_id_df = pd.read_csv(path,
                engine='python',
                header=None)
lake_id_df = lake_id_df.drop(labels=5140, axis=0)
lake_id_df.rename(columns={0: 'lake_id'}, inplace=True)
lake_id_df['lake_id'] = pd.to_numeric(lake_id_df['lake_id'])
lake_id_df

Unnamed: 0,lake_id
0,1140000602
1,1150000822
2,1150001092
3,1150002502
4,1150002582
...,...
5135,9120294102
5136,9130036692
5137,9130036722
5138,9130036772


In [4]:
# for each lake you can get a folder overview
# example lake
lake = lake_id_df.lake_id[0]
lake_path = folder + "\\" + str(lake) + "\lake_dir.txt"
lake_info_df = pd.read_csv(lake_path,
                engine='python',
                header=None)
lake_info_df

Unnamed: 0,0
0,1140000602_wlts_S3A_141_imth.pdf
1,1140000602_wlts_S3A_141_ocea.pdf
2,1140000602_wlts_S3A_141_ocog.pdf
3,conv_S3A_141_imth.dat
4,conv_S3A_141_ocea.dat
5,conv_S3A_141_ocog.dat
6,extr_S3A_141_imth.dat
7,extr_S3A_141_ocea.dat
8,extr_S3A_141_ocog.dat
9,lake_dir.txt


In [5]:
# create dictionary with lake data
lakes = list(lake_id_df.lake_id)
n = len(lakes)

data = dict()
#data['lake_ids'] = lakes

# get lake names from lakemask table
for lake in lakes:
    lakename = lakemask_df.loc[lakemask_df['lake_id'] == lake].names
    # create dict for each lake
    data[lake] = {'name': lakename[lakename.index[0]], 
                  'sats': [],'tracks': [],'wlts': [],'extr': [],'conv': []}

# go through all lake folders and save relevant information to dictionary
# loop through all lakes
for lake_index, lake_id in enumerate(lakes):
    if lake_index%500 == 0:
        print(f"lake {lake_index} out of {n}")
    # get lake folder
    folderpath = folder + "\\" + str(lake_id)
    lake_path = folderpath + "\lake_dir.txt"
    lake_info_df = pd.read_csv(lake_path,
                engine='python',
                header=None)
    # loop through files in lake folder
    for index, row in lake_info_df.iterrows():
        filename = row[0]
        # we only want the ocog files, and only the data, not the pdfs
        if 'ocog.dat' in filename:
            # wlts
            if 'wlts' in filename:
                # get table as dataframe
                filepath_wlts = folderpath + "\\" + filename
                wlts_df = pd.read_csv(filepath_wlts,
                    skiprows=1,
                    sep=r' ', 
                    engine='python',
                    header=None)
                wlts_df.rename(columns={0: 'time', 1: 'wl', 2: 'wlsd'}, inplace=True)
                # add to dict
                data[lake_id]['wlts'].append(wlts_df)
                data[lake_id]['sats'].append(filename[5:8])
                data[lake_id]['tracks'].append(filename[9:12])
            # extr
            if 'extr' in filename:
                # get table as dataframe
                filepath_extr = folderpath + "\\" + filename
                extr_df = pd.read_csv(filepath_extr,
                    skiprows=1,
                    sep=r' ', 
                    engine='python',
                    header=None)
                extr_df.rename(columns={0: 'timeu', 1: 'cycle', 2: 'lat', 3: 'long'}, inplace=True)
                # add to dict
                data[lake_id]['extr'].append(extr_df)
            # conv
            if 'conv' in filename:
                # get table as dataframe
                filepath_conv = folderpath + "\\" + filename
                conv_df = pd.read_csv(filepath_conv, engine='python')
                # add to dict
                data[lake_id]['conv'].append(conv_df)
        
# save to file
with open(f'saves/lake_data_dicts.pkl', 'wb') as f:
    pickle.dump(data, f)
    
# to open file
#with open(f'saves/lake_data_dicts.pkl', 'rb') as f:
#    loaded_dict = pickle.load(f)

print('Done')

lake 0 out of 5140
lake 500 out of 5140
lake 1000 out of 5140
lake 1500 out of 5140
lake 2000 out of 5140
lake 2500 out of 5140
lake 3000 out of 5140
lake 3500 out of 5140
lake 4000 out of 5140
lake 4500 out of 5140
lake 5000 out of 5140
Done
