# SP3 Data Wrangler

This code will take the data that is within the sp3 folder in the RawData folder, process it and convert it into an output folder for anlaysis 

In [2]:
import os
import sp3
import shutil
import gzip
import pickle

In [3]:
output_folder = 'sp3_test' 
grace_satellite = 'L64' # 'L65' or 'L64'
sp3_raw_data_folder = os.path.join(os.getcwd(), 'RawData', 'sp3', f'{grace_satellite}')
specific_folder_numbers = [-1] # include a specific range, or use -1 for all folders

# first check that the folder exists
if not os.path.isdir(sp3_raw_data_folder):
    raise Exception(f'Folder not found: {sp3_raw_data_folder}')

if specific_folder_numbers == [-1]:
    # get all folders
    folders = [f for f in os.listdir(sp3_raw_data_folder) if os.path.isdir(os.path.join(sp3_raw_data_folder, f))]
    print('There are {} folders'.format(len(folders)))
    print('Extracting SP3 data from all folders...')
else: 
    # extract the specific folders from the range in specific_folder_numbers, i.e [001, 005] would take 001, 002, 003, 004, 005
    # need to think of a better way of doing this
    folders = [f for f in os.listdir(sp3_raw_data_folder) if os.path.isdir(os.path.join(sp3_raw_data_folder, f))]
    folders = [f for f in folders if int(f) in specific_folder_numbers]
    print('There are {} folders'.format(len(folders)))
    print('Extracting SP3 data from folders {} to {}...'.format(specific_folder_numbers[0], specific_folder_numbers[-1]))

There are 50 folders
Extracting SP3 data from all folders...


In [4]:
# as the each file in the folder is zipped, we need to unzip them first
for folder in folders:
    # Iterate through all files in the current folder
    for filename in os.listdir(os.path.join(sp3_raw_data_folder, folder)):
        # Check if the file is a .gz file
        if filename.endswith('.gz'):
            gz_file_path = os.path.join(sp3_raw_data_folder, folder, filename)
            output_file_path = os.path.join(sp3_raw_data_folder, folder, filename[:-3])  # Remove '.gz'

            # Check if the uncompressed file already exists
            if not os.path.exists(output_file_path):
                # Unzip the file
                with gzip.open(gz_file_path, 'rb') as f_in:
                    with open(output_file_path, 'wb') as f_out:
                        shutil.copyfileobj(f_in, f_out)
                print(f"Unzipped {filename} in {folder}")
            else:
                print(f"File already unzipped: {filename[:-3]}")

File already unzipped: GFZOP_RSO_L64_G_20221231_220000_20230101_120000_v02.sp3
File already unzipped: GFZOP_RSO_L64_G_20230101_100000_20230102_000000_v02.sp3
File already unzipped: GFZOP_RSO_L64_G_20230101_220000_20230102_120000_v02.sp3
File already unzipped: GFZOP_RSO_L64_G_20230102_100000_20230103_000000_v02.sp3
File already unzipped: GFZOP_RSO_L64_G_20230102_220000_20230103_120000_v02.sp3
File already unzipped: GFZOP_RSO_L64_G_20230103_100000_20230104_000000_v02.sp3
File already unzipped: GFZOP_RSO_L64_G_20230103_220000_20230104_120000_v02.sp3
File already unzipped: GFZOP_RSO_L64_G_20230104_100000_20230105_000000_v02.sp3
File already unzipped: GFZOP_RSO_L64_G_20230104_220000_20230105_120000_v02.sp3
File already unzipped: GFZOP_RSO_L64_G_20230105_100000_20230106_000000_v02.sp3
File already unzipped: GFZOP_RSO_L64_G_20230105_220000_20230106_120000_v02.sp3
File already unzipped: GFZOP_RSO_L64_G_20230106_100000_20230107_000000_v02.sp3
File already unzipped: GFZOP_RSO_L64_G_20230106_2200

In [5]:
# extract the data from each folder
raw_sp3_data = []
for folder in folders:
    print('Extracting SP3 data from folder {}...'.format(folder))
    for filename in os.listdir(os.path.join(sp3_raw_data_folder, folder)):
        if filename.endswith('.sp3'):
            sp3_file_path = os.path.join(sp3_raw_data_folder, folder, filename)
            temp_sp3_data = sp3.Product.from_file(sp3_file_path)
            raw_sp3_data.append(temp_sp3_data.satellites[0].records)

# flatten the list of lists
raw_sp3_data = [item for sublist in raw_sp3_data for item in sublist]

print(len(raw_sp3_data))



Extracting SP3 data from folder 001...
Extracting SP3 data from folder 002...
Extracting SP3 data from folder 003...
Extracting SP3 data from folder 004...
Extracting SP3 data from folder 005...
Extracting SP3 data from folder 006...
Extracting SP3 data from folder 007...
Extracting SP3 data from folder 008...
Extracting SP3 data from folder 009...
Extracting SP3 data from folder 010...
Extracting SP3 data from folder 011...
Extracting SP3 data from folder 012...
Extracting SP3 data from folder 013...
Extracting SP3 data from folder 014...
Extracting SP3 data from folder 015...
Extracting SP3 data from folder 016...
Extracting SP3 data from folder 017...
Extracting SP3 data from folder 018...
Extracting SP3 data from folder 019...
Extracting SP3 data from folder 020...
Extracting SP3 data from folder 021...
Extracting SP3 data from folder 022...
Extracting SP3 data from folder 023...
Extracting SP3 data from folder 024...
Extracting SP3 data from folder 025...
Extracting SP3 data from 

In [10]:
output = raw_sp3_data # this would get hourly intervals

print(len(output))

# create a new folder to store the data
if not os.path.isdir(os.path.join(os.getcwd(), 'RawData', output_folder)):
    os.mkdir(output_folder)

168200


In [11]:
# save the list to a pickle file
output_file_path = os.path.join(os.getcwd(), 'RawData', output_folder, f'{grace_satellite}_sp3_data.pkl')

with open(output_file_path, 'wb') as f:
    pickle.dump(output, f)

print(f'Saved SP3 data to {output_file_path}')

Saved SP3 data to c:\Users\IT\Documents\UCL\CatAnalysis\RawData\sp3_test\L64_sp3_data.pkl
