In [None]:
import json
import logging
import os
from typing import Union, List, Dict, Any

import numpy as np
import pandas as pd
from pandas import Timestamp
import matplotlib.pyplot as plt
import itertools
import pprint

In [None]:
logger = logging.getLogger(__name__)

logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

logger.setLevel(logging.INFO)

In [None]:
satellite_json: os.path = "../data/CubeBel-1/normalized_frames.json"

with open(satellite_json, 'r') as file:
    json_stream: str = file.read()

json_data: dict = json.loads(json_stream)

if not json_data:
    logger.error("Json data is empty")
    exit(1)


class Satellite:
    def __init__(self, norad_id: int, name: str, observations_amount: int):
        self.norad_id: int = norad_id
        self.name: str = name
        self.observations_amount: int = observations_amount


metadata: dict = json_data['metadata']

satellite: Satellite = Satellite(
    metadata['satellite_norad'],
    metadata['satellite_name'],
    metadata['total_frames']
)

# Note '...' means there might be more same data

# column_tags look like:
# "column_tags": {
#               "some_variable": "variable",
#               "some_constant": "constant",
#               ... 
#               "some_status": "status"
#             }
solar_and_satellite_parameters_names: Dict[str, str] = metadata['analysis'][
    'column_tags']

# feature_columns looks like:
# "feature_columns": [
#                 "Fredericksburg A",
#                 "Fredericksburg K 0-3"
#                 ]
solar_parameters_names: List[str] = metadata['analysis']['feature_columns']

satellite_parameters_names: List[str] = [value for value in
                                         solar_and_satellite_parameters_names if
                                         value not in solar_parameters_names]

# frames look like (fields might contain both satellite and solar parameters):
# "frames": [
#         {
#             "time": "2019-07-02 18:15:08",
#             "measurement": "",
#             "tags": {
#                 "satellite": "",
#                 "decoder": "Lightsail2",
#                 "station": "",
#                 "observer": "",
#                 "source": "",
#                 "version": "1.66.0"
#             },
#             "fields": {
#                 "dest_callsign": {
#                     "value": "N6CP  ",
#                     "unit": null
#                 },
#                 "Fredericksburg A": {
#                     "value": 3.3125,
#                     "unit": "V"
#                 },
#                 ...
#             }
#         },
#         ...
#     ]

frames: dict = json_data['frames']

allowed_types: Union = Union[int, float, bool, str]

observation_data: List[Dict[str, Timestamp]] = []

# 'time' is unix timestamps, convertor - https://www.epochconverter.com/
for frame in frames:
    observation_time = pd.to_datetime(frame['time'])
    fields: Dict[Any, Any] = {key: value['value'] for key, value in
                              frame['fields'].items() if
                              isinstance(value, dict)}
    observation_data.append({'time': observation_time, **fields})

observation_dataframe: pd.DataFrame = pd.DataFrame(observation_data).set_index(
    'time').dropna().select_dtypes(include=np.number)

observation_dataframe = observation_dataframe.loc[:,
                        observation_dataframe.nunique() > 1]  # Select columns with more than 1 unique value

In [None]:
solar_parameters_names = [param for param in solar_parameters_names if
                          param in observation_dataframe.columns]

satellite_parameters_names = [param for param in satellite_parameters_names if
                              param in observation_dataframe.columns]

In [None]:
def calculate_correlations(df: pd.DataFrame, list1_columns: List[str],
                           list2_columns: List[str]) -> Dict[str, float]:
    """
    Calculates Pearson correlation coefficients between columns in two lists from a DataFrame.
  
    Args:
        df (pd.DataFrame): DataFrame containing observations with potential missing values.
        list1_columns (list): List of column names from the first list.
        list2_columns (list): List of column names from the second list.
  
    Returns:
        dict: A dictionary containing correlation coefficients between corresponding pairs 
              of columns from the two lists.
    """
    corr_df: pd.DataFrame = df[list1_columns + list2_columns].corr()

    result: Dict[str, float] = {}

    for col1 in list1_columns:
        for col2 in list2_columns:
            if col1 != col2:
                result[f"{col1} vs. {col2}"] = corr_df.loc[col1, col2]

    return result


correlations: Dict[str, float] = calculate_correlations(observation_dataframe,
                                                        satellite_parameters_names,
                                                        solar_parameters_names)

sorted_correlations: Dict[str, float] = dict(
    sorted(correlations.items(), key=lambda item: abs(item[1]), reverse=True))

pprint.pprint(sorted_correlations, sort_dicts=False)

In [None]:
def plot_sat_vs_solar(df: pd.DataFrame, sat_param_name: str,
                      solar_params_names: List[str],
                      figure_size=(12, 8), cmap='viridis',
                      legend_title='Data Comparison',
                      grid_style='--', grid_width=0.5, label_size=10):
    """
    Creates aesthetically pleasing and informative scatter plots for each satellite parameter
    vs. solar parameters, using advanced Matplotlib techniques for customization.

    Args:
        df (pd.DataFrame): DataFrame containing observations with timestamp index.
        sat_param_name (str): Name of the satellite parameter (y-axis).
        solar_params_names (list): List of solar parameter names (x-axes).
        figure_size (tuple, optional): Desired figure size. Defaults to (12, 8).
        cmap (str, optional): Colormap for scatter points (e.g., 'viridis', 'plasma'). Defaults to 'viridis'.
        legend_title (str, optional): Title for the legend. Defaults to 'Data Comparison'.
        grid_style (str, optional): Linestyle for the grid (e.g., '-', '--'). Defaults to '--'.
        grid_width (float, optional): Width of the grid lines. Defaults to 0.5.
        label_size (int, optional): Font size for labels and ticks. Defaults to 10.
    """
    figure, axis = plt.subplots(figsize=figure_size)

    # Define marker styles and cycle length
    markers = ['o', 's', '^', 'P', 'D', 'x', '+', 'v', '<', '>', '1',
               '2', '3', '4']  # More marker options for variety
    marker_cycle = itertools.cycle(markers)  # Cycle through markers

    y = df[sat_param_name].values
    for i, solar_param in enumerate(solar_params_names):
        x = df[solar_param].values
        axis.scatter(x, y, alpha=0.8, label=solar_param,
                     marker=next(marker_cycle))

    # Customization and informative labels
    axis.set_xlabel(f'{solar_params_names}', fontsize=label_size)
    axis.set_ylabel(sat_param_name, fontsize=label_size)
    axis.set_title(f"{sat_param_name} vs. Solar Parameters",
                   fontsize=label_size)
    axis.grid(True, which='both', linestyle=grid_style, linewidth=grid_width)
    axis.legend(loc='upper left', fontsize=label_size, title=legend_title,
                bbox_to_anchor=(1, 1))
    plt.tick_params(bottom=True, top=True, left=True, right=True, which='both',
                    labelsize=label_size)

    plt.tight_layout()
    plt.show()
    plt.clf()


for sat_param in satellite_parameters_names:
    logger.info(f"Plotting {sat_param} vs. Solar Parameters")
    plot_sat_vs_solar(observation_dataframe, sat_param,
                      solar_parameters_names.copy())

In [None]:
import holoviews as hv
from holoviews import opts
from bokeh.embed import file_html
from bokeh.resources import CDN

hv.extension('bokeh')


def create_scatter_plot(df, x_columns, y_columns):
    """
    Creates an interactive scatter plot with HoloViews and Bokeh.
  
    Args:
        df (pd.DataFrame): Pandas DataFrame containing the data.
        x_columns (list): List of column names to use for the x-axis.
        y_columns (list): List of column names to use for the y-axis.
  
    Returns:
        HoloViews DynamicMap object: Interactive scatter plot.
    """

    def plot_func(x, y):
        return hv.Scatter(df, x, y).opts(
            opts.Scatter(tools=['hover'], size=8, color='blue')
        )

    return hv.DynamicMap(plot_func, kdims=['x', 'y']).redim.values(x=x_columns,
                                                                   y=y_columns)


plot = create_scatter_plot(observation_dataframe,
                           solar_parameters_names,
                           satellite_parameters_names
                           )

plot

In [None]:
def export_plot_to_html(plot, filename="../out/plot.html"):
    """
    Exports a HoloViews plot to a standalone HTML file using Bokeh.

    Args:
        plot (HoloViews object): The HoloViews plot to export.
        filename (str, optional): The name of the HTML file to create. Defaults to "plot.html".
    """
    # Render the plot using the Bokeh backend
    bokeh_plot = hv.render(plot, backend='bokeh')

    # Create the HTML file
    html = file_html(bokeh_plot, CDN, title="HoloViews Plot")

    # Save the HTML to a file
    with open(filename, "w") as f:
        f.write(html)


export_plot_to_html(plot)