In [1]:
import pandas as pd

In [27]:
class MOACalculator:
    def __init__(self, label_series: pd.Series, versus_series: pd.Series):
        self.label_series = label_series
        self.versus_series = versus_series
    
    @property
    def label_mean(self):
        return self.label_series.mean()
    
    @property
    def versus_mean(self):
        return self.versus_series.mean()
    
    @property
    def population_std(self):
        return pd.concat([self.label_series, self.versus_series]).std()
    
    def calc(self):
        return (self.label_mean - self.versus_mean) / self.population_std


In [28]:
df = pd.read_csv("./data/raw/samples-124.csv")
df

Unnamed: 0,system:index,Brightness,Brightness_1,Brightness_2,Elevation,GaussianCurvature,Greenness,Greenness_1,Greenness_2,HH,...,cos_3_coeff,isTraining,phase_1,phase_2,phase_3,sin_1_coeff,sin_2_coeff,sin_3_coeff,t_coeff,.geo
0,0_0,2219.2740,3644.3118,2983.1853,183.128712,-1.111947e-07,1383.5968,2389.0840,1532.5927,5351.000000,...,0.508938,1,-0.569459,2.017689,1.817476,0.406582,0.457142,0.483894,0.501836,"{""type"":""MultiPoint"",""coordinates"":[]}"
1,1_0,326.8218,783.0737,752.4232,114.561850,-3.356085e-08,9.3434,333.9811,271.9726,2170.222222,...,0.505032,1,-0.949492,0.071245,2.007185,0.360472,0.524289,0.460664,0.493707,"{""type"":""MultiPoint"",""coordinates"":[]}"
2,2_0,3032.6624,2831.0404,2846.3911,87.621972,-1.036272e-07,1501.6973,1629.3110,1607.4224,6217.555556,...,0.505744,1,-0.572481,1.427724,1.871911,0.450827,0.496408,0.486329,0.501026,"{""type"":""MultiPoint"",""coordinates"":[]}"
3,3_0,3698.9892,3375.2343,3135.9292,93.100778,-1.970626e-07,2570.6233,2576.7657,2421.1097,5447.444444,...,0.500680,1,-0.763917,1.238851,2.053983,0.445311,0.503122,0.479773,0.499924,"{""type"":""MultiPoint"",""coordinates"":[]}"
4,4_0,329.4975,339.3223,241.6455,160.380712,1.447277e-07,-23.9396,56.7423,66.7478,1600.111111,...,0.503306,1,-1.011212,-1.001987,1.982560,0.352237,0.428258,0.481460,0.503251,"{""type"":""MultiPoint"",""coordinates"":[]}"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5591,5643_0,3817.1554,3448.5995,3201.4238,177.161920,6.706069e-09,2252.9583,2604.5884,2298.4458,5090.888889,...,0.498468,0,-0.604765,0.797488,-0.879256,0.404026,0.509490,0.496198,0.503228,"{""type"":""MultiPoint"",""coordinates"":[]}"
5592,5644_0,2987.3919,2913.7741,2855.2119,100.167942,9.204048e-09,1705.2172,1983.9280,1847.6640,7395.666667,...,0.504025,0,-0.619140,0.928427,1.921965,0.439265,0.518652,0.486880,0.502250,"{""type"":""MultiPoint"",""coordinates"":[]}"
5593,5645_0,3737.0335,3203.0217,3176.7917,153.829762,-2.672850e-07,719.2948,1292.1885,976.3423,7850.333333,...,0.509546,0,-0.728614,-0.232904,1.933536,0.428897,0.500848,0.466104,0.503478,"{""type"":""MultiPoint"",""coordinates"":[]}"
5594,5646_0,3401.2935,3259.6997,2886.5738,47.552816,3.426760e-07,2347.6762,2396.6767,1922.7331,6598.444444,...,0.506532,0,-0.604742,1.195143,1.759720,0.437927,0.504844,0.490891,0.499392,"{""type"":""MultiPoint"",""coordinates"":[]}"


In [36]:
label_series = df[(df["class_name"] == 1)]['VV']
versus_series = df[(df["class_name"] == 2)]['VV']

In [30]:
moa_calculator = MOACalculator(label_series, versus_series)

In [36]:
from itertools import combinations
class MOATable:
    def __init__(self, df: pd.DataFrame):
        self.table = df
    
    def rank_scores(self):
        self.table.sort_values(by="moa", ascending=False).reset_index(drop=True, inplace=True)
        self.table['rank'] = list(range(1, len(self.table) + 1))
        return self
    

class MOAScoresCreator:
    def __init__(self):
        ...



ColumnName = str

class CalculateMOA:
    
    class MOATable(pd.DataFrame):
        def __init__(self, df: pd.DataFrame):
            super().__init__(df)
            self.df = df

        def rank_scores(self):
            self.sort_values(by="moa", ascending=False).reset_index(drop=True, inplace=True)
            self['rank'] = list(range(1, len(self) + 1))
            return self
    

    def __init__(self, input_df: pd.DataFrame, label_col: ColumnName):
        """ Calculates the MOA for each predictor in the input dataframe against the label column """
        self.input_df = input_df
        self.label_col = label_col
        self._data = {"label": [], "versus": [], "predictor": [], "moa": []}
    
    @staticmethod
    def _validate_input_df(input_df: pd.DataFrame, label_col):
        if not isinstance(input_df, pd.DataFrame):
            raise TypeError("input_df must be a pandas DataFrame")
        if input_df.empty:
            raise ValueError("input_df must not be empty")
        
        if len(input_df[label_col].unique()) != 2:
            raise ValueError("label_col must have exactly two unique values")
    
    
    def run_calcs(self):
        self._validate_input_df(self.input_df, self.label_col)
        
        for predictor in self.input_df.columns:
            if predictor == self.label_col:
                continue
            label_series = self.[(self.raw[self.label_col] == self.label_col)][predictor]
            versus_series = self.raw[(self.raw[self.label_col] == versus)][predictor]
            moa_calculator = MOACalculator(label_series, versus_series)
            self._data["label"].append(label)
            self._data["versus"].append(versus)
            self._data["predictor"].append(predictor)
            self._data["moa"].append(moa_calculator.calc())
        moa_table = MOATable(self._data)
        return moa_table.rank_scores()
    
            
