In [None]:
import json
import logging
import os
from typing import Union, List, Dict, Any

import numpy as np
import pandas as pd
from pandas import Timestamp
import matplotlib.pyplot as plt
import itertools
import pprint

In [None]:
logger = logging.getLogger(__name__)

logging.basicConfig(level=logging.WARNING,
                    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')

logger.setLevel(logging.INFO)

In [None]:
satellite_json: os.path = "../data/CubeBel-1/normalized_frames.json"

with open(satellite_json, 'r') as file:
    json_stream: str = file.read()

json_data: dict = json.loads(json_stream)

if not json_data:
    logger.error("Json data is empty")
    exit(1)


class Satellite:
    def __init__(self, norad_id: int, name: str, observations_amount: int):
        self.norad_id: int = norad_id
        self.name: str = name
        self.observations_amount: int = observations_amount


metadata: dict = json_data['metadata']

satellite: Satellite = Satellite(
    metadata['satellite_norad'],
    metadata['satellite_name'],
    metadata['total_frames']
)

solar_and_satellite_parameters_names: Dict[str, str] = metadata['analysis'][
    'column_tags']

solar_parameters_names: List[str] = metadata['analysis']['feature_columns']

satellite_parameters_names: List[str] = [value for value in
                                         solar_and_satellite_parameters_names if
                                         value not in solar_parameters_names]

frames: dict = json_data['frames']

allowed_types: Union = Union[int, float, bool, str]

observation_data: List[Dict[str, Timestamp]] = []

# 'time' is unix timestamps, convertor - https://www.epochconverter.com/
for frame in frames:
    observation_time = pd.to_datetime(frame['time'])
    fields: Dict[Any, Any] = {key: value['value'] for key, value in
                              frame['fields'].items() if
                              isinstance(value, dict)}
    observation_data.append({'time': observation_time, **fields})

observation_dataframe: pd.DataFrame = pd.DataFrame(observation_data).set_index(
    'time').dropna().select_dtypes(include=np.number)

observation_dataframe = observation_dataframe.loc[:,
                        observation_dataframe.nunique() > 1]  # Select columns with more than 1 unique value

In [None]:
solar_parameters_names = [param for param in solar_parameters_names if
                          param in observation_dataframe.columns]

satellite_parameters_names = [param for param in satellite_parameters_names if
                              param in observation_dataframe.columns]

In [None]:
def calculate_correlations(df: pd.DataFrame, list1_columns: List[str], list2_columns: List[str]) -> Dict[str, float]:
    corr_matrix = df[list1_columns + list2_columns].corr()

    result = corr_matrix.loc[list1_columns, list2_columns]
    result = result[~result.index.isin(result.columns)]  # Exclude self-correlations
    return result.stack().to_dict()


correlations: Dict[str, float] = calculate_correlations(observation_dataframe,
                                                        satellite_parameters_names,
                                                        solar_parameters_names)

sorted_correlations: Dict[str, float] = dict(
    sorted(correlations.items(), key=lambda item: abs(item[1]), reverse=True))

pprint.pprint(sorted_correlations, sort_dicts=False)

In [None]:
def plot_sat_vs_solar(df: pd.DataFrame, sat_param_name: str,
                      solar_params_names: List[str],
                      figure_size=(12, 8), cmap='viridis',
                      legend_title='Data Comparison',
                      grid_style='--', grid_width=0.5, label_size=10):
    """
    Creates aesthetically pleasing and informative scatter plots for each satellite parameter
    vs. solar parameters, using advanced Matplotlib techniques for customization.

    Args:
        df (pd.DataFrame): DataFrame containing observations with timestamp index.
        sat_param_name (str): Name of the satellite parameter (y-axis).
        solar_params_names (list): List of solar parameter names (x-axes).
        figure_size (tuple, optional): Desired figure size. Defaults to (12, 8).
        cmap (str, optional): Colormap for scatter points (e.g., 'viridis', 'plasma'). Defaults to 'viridis'.
        legend_title (str, optional): Title for the legend. Defaults to 'Data Comparison'.
        grid_style (str, optional): Linestyle for the grid (e.g., '-', '--'). Defaults to '--'.
        grid_width (float, optional): Width of the grid lines. Defaults to 0.5.
        label_size (int, optional): Font size for labels and ticks. Defaults to 10.
    """
    figure, axis = plt.subplots(figsize=figure_size)

    markers = ['o', 's', '^', 'P', 'D', 'x', '+', 'v', '<', '>', '1',
               '2', '3', '4']
    marker_cycle = itertools.cycle(markers)

    y = df[sat_param_name].values
    for i, solar_param in enumerate(solar_params_names):
        x = df[solar_param].values
        axis.scatter(x, y, alpha=0.8, label=solar_param,
                     marker=next(marker_cycle))

    axis.set_xlabel(f'{solar_params_names}', fontsize=label_size)
    axis.set_ylabel(sat_param_name, fontsize=label_size)
    axis.set_title(f"{sat_param_name} vs. Solar Parameters",
                   fontsize=label_size)
    axis.grid(True, which='both', linestyle=grid_style, linewidth=grid_width)
    axis.legend(loc='upper left', fontsize=label_size, title=legend_title,
                bbox_to_anchor=(1, 1))
    plt.tick_params(bottom=True, top=True, left=True, right=True, which='both',
                    labelsize=label_size)

    plt.tight_layout()
    plt.show()
    plt.clf()

In [None]:
import ipywidgets as widgets

# Create interactive widgets with descriptive labels and layout
sat_param_label = widgets.Label(value="Select Satellite Parameter:")
sat_param_dropdown = widgets.Dropdown(options=satellite_parameters_names)
solar_params_label = widgets.Label(value="Select Solar Parameters (multiple choices):")
solar_params_selector = widgets.SelectMultiple(options=solar_parameters_names)
plot_button = widgets.Button(description="Generate Plot")

# Output widget for displaying the plot
output = widgets.Output()


# Function to update the plot when the button is clicked
def on_button_clicked(button):
    sat_param = sat_param_dropdown.value
    selected_solar_params = list(solar_params_selector.value)

    with output:
        output.clear_output(wait=True)

        # Calculate correlations
        correlations = calculate_correlations(observation_dataframe, [sat_param], selected_solar_params)

        # Data amount information
        data_amount = len(observation_dataframe)

        # Generate plot
        plot_sat_vs_solar(observation_dataframe, sat_param, selected_solar_params)
        plt.show()

        # Display data information
        print(f"Data amount analyzed: {data_amount}")
        print("Correlations:")
        for param_pair, corr_value in correlations.items():
            print(f"\t{param_pair}: {corr_value:.4f}")


# Connect the button's click event to the on_button_clicked function
plot_button.on_click(on_button_clicked)

# Arrange widgets using HBox and VBox for a user-friendly layout
controls = widgets.VBox([sat_param_label, sat_param_dropdown, solar_params_label, solar_params_selector, plot_button])
display(widgets.HBox([controls, output]))