In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from copy import copy

In [82]:
class LabelOrganizer:
    def __init__(self, x_label, y_label, label_check=True):
        self.x_label_ref = x_label
        self.y_label_ref = y_label
        self.reset_label("both")
        if label_check == True:
            self.check_labels()

    def check_labels(self):
        print(f"x-label -> {self.x_label}")
        print(f"y-label -> {self.y_label}")
        
    def select_label(self, label_name):
        if label_name == "x-label":
            label_list = self.x_label
        elif label_name == "y-label":
            label_list = self.y_label
        else:
            print(f"Error: LabelOrganizer.select_label: {label_name} is not defined.")
        return label_list
    
    def remove_label(self, label_name, drop_label):
        label_list = self.select_label(label_name)
        if type(drop_label) is str:
            label_list.remove(drop_label)
        else:
            for label in drop_label:
                label_list.remove(label)     
        return label_list

    def reset_label(self, label_name):
        if label_name == "x-label":
            self.x_label = copy(self.x_label_ref)
        elif label_name == "y-label":
            self.y_label = copy(self.y_label_ref)
        elif label_name == "both":
            self.x_label = copy(self.x_label_ref)
            self.y_label = copy(self.y_label_ref)
        else:    
            print(f"Error: CorrelationAnalyzer.LabelOrganizer.reset_label: {label_name} is not defined.")
        
    def get_labels(self):
        return copy(self.x_label), copy(self.y_label) 
        

class CorrelationAnalyzer:
    def __init__(self, data_list, data_dir):
        self.data_list = data_list
        self.data_dir = data_dir
        
        self.dataframe = []
        self.df_corr = []
        
        self.dataframe_x = []
        self.dataframe_y = []
        for data_name in self.data_list:
            data_x = pd.read_csv(f"{self.data_dir}/{data_name}_x.csv", index_col=0)
            data_y = pd.read_csv(f"{self.data_dir}/{data_name}_y.csv", index_col=0)
            self.dataframe_x.append(data_x)
            self.dataframe_y.append(data_y)
            
        x_label = list(self.dataframe_x[0].columns)
        y_label = list(self.dataframe_y[0].columns)
        
        self.label_organizer = LabelOrganizer(x_label, y_label)
        
    def create_result(self):
        x_label, y_label = self.label_organizer.get_labels()
        self.check_labels()
        for i in range(len(self.dataframe_x)):
            dataframe = self.dataframe_x[i][x_label].join(self.dataframe_y[i][y_label])
            df_corr = dataframe.corr()[x_label][len(x_label):]
            self.dataframe.append(dataframe)
            self.df_corr.append(df_corr)
    
    #  wrapper method of "Label Organizer" 
    def check_labels(self):
        self.label_organizer.check_labels()
        
    def remove_xlabel(self, drop_label, label_check=False):
        self.label_organizer.remove_label("x-label", drop_label)
        if label_check:
            self.label_organizer.check_labels()
    
    def remove_ylabel(self, drop_label, label_check=False):
        self.label_organizer.remove_label("y-label", drop_label)
        if label_check:
            self.label_organizer.check_labels()
            
    def reset_xlabel(self, label_check=False):
        self.label_organizer.reset_label("x-label")
        if label_check:
            self.label_organizer.check_labels()
    
    def reset_ylabel(self, label_check=False):
        self.label_organizer.reset_label("y-label")
        if label_check:
            self.label_organizer.check_labels()
    
    def reset_labels(self, label_check=False):
        self.label_organizer.reset_label("both")
        if label_check:
            self.label_organizer.check_labels()
    
    
        

In [83]:
data_list = ["ms1a", "ms2a", "free"]
data_dir  = "data"
corr_analyzer = CorrelationAnalyzer(data_list, data_dir)

x-label -> ['x1[k]', 'x2[k]', 'x3[k]', 'x4[k]', 'd/dt x3[k]', 'd/dt x4[k]', 'voltage']
y-label -> ['x1[k+1]', 'x2[k+1]', 'x3[k+1]', 'x4[k+1]']


In [84]:
corr_analyzer.remove_xlabel("voltage", True)

x-label -> ['x1[k]', 'x2[k]', 'x3[k]', 'x4[k]', 'd/dt x3[k]', 'd/dt x4[k]']
y-label -> ['x1[k+1]', 'x2[k+1]', 'x3[k+1]', 'x4[k+1]']


In [85]:
corr_analyzer.create_result()

x-label -> ['x1[k]', 'x2[k]', 'x3[k]', 'x4[k]', 'd/dt x3[k]', 'd/dt x4[k]']
y-label -> ['x1[k+1]', 'x2[k+1]', 'x3[k+1]', 'x4[k+1]']


In [97]:
label = [f"x{i}" for i in range(6)]
label

['x0', 'x1', 'x2', 'x3', 'x4', 'x5']

In [99]:
corr_analyzer.df_corr[0][0:1][corr_analyzer.df_corr[0][0:1].apply(lambda x: x >= 0.7)]

Unnamed: 0,x1[k],x2[k],x3[k],x4[k],d/dt x3[k],d/dt x4[k]
x1[k+1],1.0,,,,,
