In [1]:
import fcalc
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

## DATAFRAME CONSTRUCTION

In [2]:
lines = ['No  System  mount  price  con  snow  ice  dur  Accegrade',
 '1 SK F 206 1.9 1.4 1.8  2.7 F',
 '2 SRK For R  520  2.1 0.8 3.8  2.3 F',
 '3 SK F 160  1.7 1.9 1.6  3.7 F',
 '4 SK F 213  1.7 2.0  2.4  3.4 F',
 '5 SMS For R  598 1.6 2.4 7  28 F',
 '6 SK F 109  2.0 19  2.4  3.7 F',
 '7 SRK For R  325  2.0 2.1 3.2  2.8 F',
 '8 SMS For R  498 1.5 3.3 3.5  2.0 T',
 '9 SRK For R  396  2.8 2.1 3.1  2.5 T',
 '10 SRK For R  325  2.2 2.2  4.6  3.2 T',
 '11 SRK For R  389  2.0 2.2 3.3  4.3 T',
 '12 SRK F 298  2.5 2.3 3.3  2.8 T',
 '13 SK F 149 1.9 2.5 4.0  3.8 T',
 '14 SMS For R  684  17 3.3 44  2.2 T',
 '15 SK F 99 2.8 2.2  2.5  4.0 T',
 '16 SK F 140  2.6 2.3 3.3  3.4 T',
 '17 SK F 215  2.3 3.8  48  2.3 T']

lines2 = ['No System mount price con snow ice dur Accegrade',
'1 SK F 149 1.9 2.5 4.0 3.8 T',
'2 SRK For R 520 2.1 0.8 3.8 2.3 F',
'3 SRK For R 389 2.0 2.2 3.3 4.3 T',
'4 SK F 213 1.7 2.0 2.4 3.4 F',
'5 SMS For R 598 1.6 2.4 7 2.8 F',
'6 SK F 109 2.0 1.9 2.4 3.7 F',
'7 SRK For R 325 2.0 2.1 3.2 2.8 F',
'8 SMS For R 498 1.5 3.3 3.5 2.0 T',
'9 SRK For R 396 2.8 2.1 3.1 2.5 T',
'10 SK F 160 1.7 1.9 1.6 3.7 F',
'11 SRK For R 389 2.0 2.2 3.3 4.3 T',
'12 SRK F 298 2.5 2.3 3.3 2.8 T',
'13 SK F 206 1.9 1.4 1.8 2.7 F',
'14 SMS For R 684 1.7 3.3 4.4 2.2 T',
'15 SK F 99 2.8 2.2 2.5 4.0 T',
'16 SK F 140 2.6 2.3 3.3 3.4 T',
'17 SK F 215 2.3 3.8 4.8 2.3 T']

In [3]:
def dataframe_const(lines):
    data_rows = []

    for line in lines[1:]:  
        row_elements = line.split()
        corrected_elements = []
        i = 0
        while i < len(row_elements):
            if row_elements[i] == 'For' and i+1 < len(row_elements) and row_elements[i+1] == 'R':
                corrected_elements.append('F or R')
                i += 2  
            else:
                corrected_elements.append(row_elements[i])
                i += 1
        data_rows.append(corrected_elements)
        
    df = pd.DataFrame(columns=["No", "System", "mount", "price", "con", "snow", "ice", "dur", "Accgrade"])

    for index, row in enumerate(data_rows):
        corrected_row = {}

        corrected_row["No"] = row[0]
        corrected_row["System"] = row[1]
        corrected_row["mount"] = row[2] if row[2] in ['F', 'F or R'] else 'F'
        corrected_row["price"] = row[3] if row[2] in ['F', 'F or R'] else row[2]
        corrected_row["con"] = row[4] if row[2] in ['F', 'F or R'] else row[3]
        corrected_row["snow"] = row[5] if row[2] in ['F', 'F or R'] else row[4]
        corrected_row["ice"] = row[6] if row[2] in ['F', 'F or R'] else row[5]
        corrected_row["dur"] = row[7] if row[2] in ['F', 'F or R'] else row[6]
        corrected_row["Accgrade"] = row[-1]

        df = df.append(corrected_row, ignore_index=True)
        
    return df

In [None]:
df = dataframe_const(lines)
df2 = dataframe_const(lines2)

In [5]:
df = df.drop('No', axis=1)
df2 = df2.drop('No', axis=1)

In [7]:
num = ['price', 'con', 'snow', 'ice', 'dur']
df[num] = df[num].apply(pd.to_numeric)
df2[num] = df2[num].apply(pd.to_numeric)

## PROCESSING ALGORITHM

In [8]:
def split(df):
    tr_rows = [i for i in range(0, 14)]
    te_rows = [14, 15, 16]
    train = df.iloc[tr_rows]
    test = df.iloc[te_rows]
    
    y_train = train['Accgrade'] == 'T'
    y_test = test['Accgrade'] == 'T'
    X_train = train.drop('Accgrade', axis=1)
    X_test = test.drop('Accgrade', axis=1)
    
    return X_train, y_train, X_test, y_test

In [9]:
def est(df):
    X_train, y_train, X_test, y_test = split(df)
    pat_cls = fcalc.classifier.PatternBinaryClassifier(X_train.values, y_train.to_numpy(), 
                                                       categorical=np.array([0, 1]), method = "standard-support")
    pat_cls.predict(X_test.values)
    acc = round(accuracy_score(y_test, pat_cls.predictions),4)
    
    print("accuracy:",acc)

In [11]:
est(df)
est(df2)

accuracy: 0.0
accuracy: 0.0


### Results
For first dataset with _method = "standard-support"_ accuracy is 0.333

For second dataset with _method = "standard-support"_ accuracy is 1.0

In [12]:
def cross_est(df):
    acc = []
    for i in range(0, 5):
        te_rows = [k for k in range(3*i, 3*i + 3)]
        test = df.iloc[te_rows]
        train = df.drop(te_rows)
        
        y_train = train['Accgrade'] == 'T'
        y_test = test['Accgrade'] == 'T'
        X_train = train.drop('Accgrade', axis=1)
        X_test = test.drop('Accgrade', axis=1)
        
        pat_cls = fcalc.classifier.PatternBinaryClassifier(X_train.values, y_train.to_numpy(), 
                                                       categorical=np.array([0, 1]), method = "standard-support")
        pat_cls.predict(X_test.values)
        acc.append(round(accuracy_score(y_test, pat_cls.predictions),4))
        
    print("res: ", acc)
    return sum(acc)/len(acc)

In [14]:
acc_df2 = cross_est(df2)
print(acc_df2)

res:  [0.3333, 0.0, 0.3333, 0.3333, 0.0]
0.19998


### Results with cross-validation on second dataset
Here I have changed _IntervalPattern_ method in _patterns.py_ in such manner:
> class IntervalPattern:
>    def __init__(self, test, train) -> None:
>        self.low = np.minimum(test, train)
>        self.high = np.maximum(test, train)

For stantard interval pattern

>class IntervalPattern:
>    def __init__(self, test, train) -> None:
>        self.low = np.minimum(test, train)
        self.high = np.full((self.low.size), 10**6)
        
For _(min, inf)_ interval pattern
        
        
>class IntervalPattern:
    def __init__(self, test, train) -> None:
        self.low = np.maximum(test, train)
        self.high = np.full((self.low.size), 10**6)
        
For _(max, inf)_interval pattern


With stantard interval pattern accuracy is 0.66

With _(min, inf)_ interval pattern accuracy is 0.73

With _(max, inf)_ interval pattern accuracy is 0.19

As we can see, _(min, inf)_ is more suitable. Such result happening in my opinion because such interval captures more data for review, resulting in improved performance.