
# Getting data from CDAWeb

Using Sunpy to download data from CDAWeb using its Fido interface. To lookup the different dataset IDs available, you can use the form at https://cdaweb.gsfc.nasa.gov/index.html/
Tick the name of the spacecraft you want, hit submit, and you'll be a given a list of all the data products for that spacecraft. Copy and paste the name into the `a.cdaweb.Dataset("")` below, choose your time range `trange`, and you're away!

In [8]:
import numpy as np
import pandas as pd
import math as m
import pickle
from matplotlib import pyplot as plt
from matplotlib import gridspec
import matplotlib.dates as mdates
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sunpy.timeseries import TimeSeries
from matplotlib.dates import DateFormatter

import sys
import os
import glob

sys.path.append(os.path.abspath(".."))
# So that I can read in the src files while working here in the notebooks/ folder
# NB: does not affect working directory, so still need ../data e.g. for reading data

import src.utils as utils
import src.params as params
import src.sf_funcs as sf

plt.rcParams["xtick.direction"] = "in"
plt.rcParams["ytick.direction"] = "in"

In [48]:
def print_metadata_table(data=None, max_width=80):
    """
    Parse and print data in a nice readable table format with text wrapping.
    Handles both formats:
    1. ("key": "[value1, value2]") format
    2. {'key': [value1, value2]} dictionary-like format

    Args:
        file_path (str, optional): Path to the file containing the data.
        data_string (str, optional): String containing the data.
        max_width (int, optional): Maximum width for the value column. Default is 80.
    """
    import re
    import textwrap
    import ast

    # Try to parse as a Python dictionary first
    try:
        data_dict = ast.literal_eval(data)
        # If successful, format the dictionary items
        matches = [(key, str(value)) for key, value in data_dict.items()]
    except (SyntaxError, ValueError):
        # If not a valid dictionary, try the old format
        pattern = r"\('([^']+)': '(\[[^\]]+\])'\)"
        matches = re.findall(pattern, data)

        if not matches:
            # Try another pattern for tuple-like format without the extra quotes
            pattern = r"\('([^']+)': (\[[^\]]+\])\)"
            matches = re.findall(pattern, data)

            if not matches:
                raise ValueError("No valid data found in the provided input")

    # Find the longest key for formatting
    max_key_length = max(len(key) for key, _ in matches)

    # Print the table header
    print(f"{'KEY'.ljust(max_key_length)} | VALUE")
    print("-" * max_key_length + "-+-" + "-" * max_width)

    # Print the table content
    for key, value in matches:
        # Clean up the value: handle both list and string representations
        if value.startswith("[") and value.endswith("]"):
            # Parse the list-like string
            try:
                # Try to parse as a Python list
                value_list = ast.literal_eval(value)
                if isinstance(value_list, list):
                    items = [str(item) for item in value_list]
                else:
                    items = [value.strip("[]")]
            except (SyntaxError, ValueError):
                # If parsing fails, handle as a string with commas
                cleaned_value = value.strip("[]")
                if "', '" in cleaned_value:
                    items = cleaned_value.split("', '")
                else:
                    items = [cleaned_value]
        else:
            items = [value]

        # Print each item with proper wrapping
        for i, item in enumerate(items):
            # For multi-line text, wrap each item
            wrapper = textwrap.TextWrapper(
                width=max_width,
                initial_indent="" if i == 0 else " " * (max_key_length + 3),
                subsequent_indent=" " * (max_key_length + 3),
            )

            # Clean the item text
            item_text = item.strip().strip("'\"")

            # Print first item with key, others with proper indentation
            if i == 0:
                wrapped_lines = wrapper.wrap(item_text)
                print(
                    f"{key.ljust(max_key_length)} | {wrapped_lines[0] if wrapped_lines else ''}"
                )
                for line in wrapped_lines[1:]:
                    print(line)
            else:
                wrapped_lines = wrapper.wrap(item_text)
                for line in wrapped_lines:
                    print(line)

        # Add a separator line between entries
        print("-" * max_key_length + "-+-" + "-" * max_width)


# Example usage:
# print_data_table(file_path="paste.txt", max_width=70)

## Download data

In [2]:
# "WI_H2_MFI"
# "PSP_FLD_L2_MAG_RTN"
# "VOYAGER1_48S_MAG-VIM"

In [3]:
# trange = a.Time("2018-01-01 00:00", "2024-01-01 00:00")
# dataset = a.cdaweb.Dataset("VOYAGER2_48S_MAG-VIM")
# result = Fido.search(trange, dataset)
# print(result)

In [4]:
# downloaded_files = Fido.fetch(result[0], path="../data/raw/voyager/")
# print(downloaded_files)

## Read data

In [26]:
import warnings
from sunpy.util.exceptions import SunpyUserWarning

# Suppress the specific SunpyUserWarning
warnings.filterwarnings("ignore", category=SunpyUserWarning)
# Suppress the pandas df.sum() warning
#warnings.simplefilter(action="ignore", category=FutureWarning)

In [51]:
data = TimeSeries(
    "../data/raw/voyager/voyager1_48s_mag-vim_20180101_v01.cdf",
    concatenate=True,
)

# data.columns
# data.units
# data.meta
# data.quicklook()  # Fun!
# data.truncate("2023/02/01 00:00", "2023/02/07 00:00").peek(columns=data.columns[0:3])

In [58]:
# data.meta cuts off: this is better
full_metadata = data.meta.find().metas[0]
metadata_to_print = str(full_metadata)

print_metadata_table(metadata_to_print)

KEY                        | VALUE
---------------------------+---------------------------------------------------------------------------------
title                      | VOYAGER1
---------------------------+---------------------------------------------------------------------------------
project                    | VOYAGER1
---------------------------+---------------------------------------------------------------------------------
discipline                 | Space Physics>Interplanetary Studies
                             Solar Physics>Heliospheric Physics
---------------------------+---------------------------------------------------------------------------------
source_name                | VOYAGER1> Voyager 1
---------------------------+---------------------------------------------------------------------------------
data_type                  | 48SEC>48 second Time Resolution
---------------------------+-----------------------------------------------------------------------