In [46]:
import json
import os
from typing import List, Dict, Any

import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from scipy.stats import spearmanr, kendalltau

In [47]:
# Define the list of years to analyze
years_to_analyze: List[int] = [2023, 2022, 2021, 2020, 2019, 2018, 2017,
                               2016, 2015, 2014]

In [48]:
def load_json_data(file_path: str) -> pd.DataFrame:
    """Load data from JSON file."""
    if not os.path.isfile(file_path):
        raise FileNotFoundError(f"The file {file_path} does not exist.")

    with open(file_path, 'r') as f:
        data = json.load(f)

    # Assuming that all items in the JSON file have the same structure
    keys: List[str] = list(data[0].keys())
    filtered_data: List[Dict[str, Any]] = [{key: item[key] for key in keys} for
                                           item in data]

    df: pd.DataFrame = pd.DataFrame(filtered_data)

    df.rename(columns={'time-tag': 'time'}, inplace=True)

    target: str = 'time'

    if target in keys:
        df[target] = pd.to_datetime(df[target], errors='coerce')
        df[target] = df[target].dt.to_period('M')

    return df


observed_solar_cycle_data_path: os.path = '../data/observed_solar_cycle_indices.json'
df_solar = load_json_data(observed_solar_cycle_data_path)

In [49]:
class DataAnalyzer:
    """
    A class used to analyze and visualize data from CSV files.
    """
    solar_property: str
    sat_property_col: str
    time_col: str
    years_filter: List[int]
    file_path: str

    def __init__(self, file_path: str, years_filter: List[int],
                 time_col: str,
                 sat_property_col: str, solar_property: str):
        """
        Constructs all the necessary attributes for the DataAnalyzer object.

        :param file_path: Path to the csv file
        :param years_filter: List of years to filter data
        :param time_col: Name of the time column in the DataFrame
        :param sat_property_col: Name of the satellite property(such as resets etc.) column in the DataFrame
        :param solar_property: Name of the solar_property column in the DataFrame
        """
        self.file_path = file_path
        self.years_filter = years_filter
        self.time_col = time_col
        self.sat_property_col = sat_property_col
        self.solar_property = solar_property
        self.df_sat_property = None
        self.median_sat_property = None
        self.merged_df = None
        self.correlation = None
        self.corr_matrix = None
        self.spearman_corr = None
        self.kendall_corr = None

    def load_filtered_data(self):
        """
        Loads the data from the csv file and filters it based on the specified years.
        """
        if not os.path.isfile(self.file_path):
            raise FileNotFoundError(
                f"The file {self.file_path} does not exist.")

        result: pd.DataFrame = pd.read_csv(self.file_path)
        result[self.time_col] = pd.to_datetime(result[self.time_col])
        result[self.time_col] = result[self.time_col].dt.normalize()

        self.df_sat_property = result[
            result[self.time_col].dt.year.isin(self.years_filter)]

    def group_dataframe_by_year_and_month(self):
        """
        Groups the dataframe by year and month, and calculates the median of resets.
        """
        self.df_sat_property['Month'] = self.df_sat_property[
            self.time_col].dt.to_period(
            'M')
        self.median_sat_property = self.df_sat_property.groupby('Month')[
            self.sat_property_col].median().reset_index()

    def merge_dataframes(self, df_solar: pd.DataFrame, left_on: str,
                         right_on: str):
        """
        Merges the median dataframe with another dataframe.
    
        :param df_solar: The other dataframe to merge with
        :param left_on: The column name in the median dataframe to merge on
        :param right_on: The column name in the other dataframe to merge on
        """
        self.merged_df = pd.merge(self.median_sat_property, df_solar,
                                  left_on=left_on,
                                  right_on=right_on)
        self.correlation = self.merged_df[self.sat_property_col].corr(
            self.merged_df[self.solar_property])
        self.corr_matrix = self.merged_df[
            [self.sat_property_col, self.solar_property]].corr()

        self.spearman_corr, _ = spearmanr(self.merged_df[self.sat_property_col],
                                          self.merged_df[self.solar_property])

        self.kendall_corr, _ = kendalltau(self.merged_df[self.sat_property_col],
                                          self.merged_df[self.solar_property])

    def create_scatter_plot(self):
        """
        Creates a scatter plot showing the correlation between resets and ssn.
        """
        plt.figure(figsize=(10, 6))
        sns.scatterplot(data=self.merged_df, x=self.sat_property_col,
                        y=self.solar_property)
        plt.title('Correlation between {} and {}'.format(self.sat_property_col,
                                                         self.solar_property))
        plt.show()

    def run_analysis(self, df_solar: pd.DataFrame, left_on: str, right_on: str):
        """
        Runs the full analysis process.
    
        :param df_solar: The other dataframe to merge with
        :param left_on: The column name in the median dataframe to merge on
        :param right_on: The column name in the other dataframe to merge on
        """
        self.load_filtered_data()
        self.group_dataframe_by_year_and_month()
        self.merge_dataframes(df_solar, left_on, right_on)
        self.create_scatter_plot()


In [50]:
griffex_base_path: os.path = '../data/GRIFFEX/'
satellite_data_time_column_name: str = 'Time'
left_on: str = 'Month'
right_on: str = 'time'


def analyze_griffex(file_name: str, target_col: str) -> None:
    data: os.path = os.path.join(griffex_base_path, file_name)

    sun_variables = list(df_solar)
    sun_variables.remove('time')

    for sun_variable in sun_variables:
        analyzer: DataAnalyzer = DataAnalyzer(data,
                                              years_to_analyze,
                                              satellite_data_time_column_name,
                                              target_col, sun_variable)
        analyzer.run_analysis(df_solar, left_on, right_on)

        print("Analysis results:")
        print(f"Spearman's rank correlation: {analyzer.spearman_corr}")
        print(f"Kendall's tau: {analyzer.kendall_corr}")
        print(f"Correlation:  {analyzer.correlation}")
        print(analyzer.corr_matrix)

In [51]:
analyze_griffex('resets.csv', 'resets')

ValueError: You are trying to merge on period[M] and object columns. If you wish to proceed you should use pd.concat

In [None]:
analyze_griffex('data_mnt_usage.csv', '40379.median')

In [None]:
analyze_griffex('battery.csv', 'Battery')

In [None]:
analyze_griffex('memory_median.csv', '40379.median')

In [None]:
analyze_griffex('memory_free_median.csv', '40379.freemem')

In [None]:
analyze_griffex('bus_current.csv', 'Battery Bus')

In [None]:
analyze_griffex('bus_voltage.csv', 'Battery Bus')

In [None]:
analyze_griffex('rssi.csv', '40379.min')