### Random forest feature selection

In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Read your data
df = pd.read_csv('./data/processed_mrna_zscore.csv')
X = df.iloc[:, :-2]
y = df.iloc[:, -1]


def evaluate_features(X, y, selected_features, classifier, k_folds=10):
    skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

    weighted_accuracies = []

    for i, (train_index, test_index) in enumerate(skf.split(X, y)):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Train the classifier on the selected features
        classifier.fit(X_train[selected_features], y_train)

        # Make predictions on the test set
        y_pred = classifier.predict(X_test[selected_features])

        # Calculate and store the weighted accuracy
        weighted_accuracy = accuracy_score(y_test, y_pred, sample_weight=None)
        weighted_accuracies.append(weighted_accuracy)

        # Print class-wise accuracies for each fold
        class_report = classification_report(y_test, y_pred, output_dict=True)
        print(f"\nFold {i + 1} - Class-wise Accuracies:")
        print(class_report)

        print(f"Fold {i + 1} : {weighted_accuracy}")

    # Calculate the average weighted accuracy
    average_weighted_accuracy = np.mean(weighted_accuracies)
    print("Average Weighted Accuracy:", average_weighted_accuracy)

    return average_weighted_accuracy

def feature_selection_random_forest(X, y, n_estimators=100, top_n=15, k_folds=10):
    skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

    # Initialize RandomForestClassifier
    rf_classifier = RandomForestClassifier(n_estimators=n_estimators, n_jobs=-1)

    feature_importances = np.zeros(X.shape[1])

    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Fit the Random Forest model
        rf_classifier.fit(X_train, y_train)

        # Accumulate feature importances
        feature_importances += rf_classifier.feature_importances_

    # Average feature importances across folds
    feature_importances /= k_folds

    # Get indices of top N features
    top_n_indices = np.argsort(feature_importances)[::-1][:top_n]

#     # Convert top_features to a DataFrame
#     top_features = pd.DataFrame(X.columns[top_n_indices], columns=['Top Features'])

    # Print and return the top features
    top_features = X.columns[top_n_indices]
    print("Top features selected by Random Forest:")
    print(top_features)
    
#     # Save top features to a text file
#     top_features.to_csv('top_features_rf.txt', index=False, header=False)
     # Save top features to a text file
    np.savetxt(f'top_features_rf_{top_n}.txt', X.columns[top_n_indices], fmt='%s')


    return top_features

# Select top features using Random Forest
selected_features_rf = feature_selection_random_forest(X, y, n_estimators=100, top_n=100, k_folds=10)

# Evaluate the selected features using RandomForestClassifier
# accuracy_rf = evaluate_features(X, y, selected_features_rf, RandomForestClassifier(n_estimators=100, n_jobs=-1))
# print(f"Random Forest Accuracy with selected features: {accuracy_rf}")


Top features selected by Random Forest:
Index(['RAMP3', 'SLC35C1', 'DUSP13', 'CCDC170', 'KRT24', 'MELK', 'KRTAP5-3',
       'ADGRG1', 'TNS4', 'DYDC1', 'NOX5', 'GSDMB', 'DUOX2', 'RBM28', 'RORB',
       'WDR19', 'CYP4F3', 'PREX1', 'RPL6', 'INAVA', 'FAM187B', 'B4GALT5',
       'EPHX2', 'KCNE2', 'AADAC', 'C10orf129', 'FSIP1', 'IRF4', 'C12orf45',
       'FAM207A', 'ZNF646', 'MRC1L1', 'SOX11', 'ESR1', 'STXBP5', 'ZNF670',
       'GRHPR', 'RERG', 'FUT10', 'PGR', 'SKP2', 'SMARCE1', 'ADGRG6', 'PIGV',
       'FAM83D', 'TMEM144', 'OR2L2', 'C5', 'KRT75', 'NOLC1', 'SYT2', 'CA12',
       'PNPLA4', 'GOLGA8CP', 'PPP1R1B', 'ARSG', 'ACE2', 'ZNF224', 'SMYD5',
       'LONRF2', 'TMOD3', 'HGC6.3', 'BCL2', 'FBL', 'KLHDC2', 'ARIH1',
       'NR2F2-AS1', 'SPACA7', 'ANP32AP1', 'OR7A10', 'C3orf36', 'OR8G2',
       'NUP155', 'SUSD3', 'PLXDC2', 'AGR3', 'WFDC3', 'ACADSB', 'APTX',
       'ZNF736', 'CDCA7', 'OR52N1', 'DEGS2', 'CCT5', 'RIPK4', 'CDCA8', 'TEX2',
       'C11orf91', 'PDCD6IP', 'SMAD7', 'CENPA', 'CRABP1', 'Y

### Select K Feature Selection

In [8]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.metrics import accuracy_score, classification_report

# Read your data
df = pd.read_csv('processed_mrna_zscore.csv')
X = df.iloc[:, :-2]
y = df.iloc[:, -1]


def evaluate_features(X, y, selected_features, classifier, k_folds=10):
    skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

    weighted_accuracies = []

    for i, (train_index, test_index) in enumerate(skf.split(X, y)):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Train the classifier on the selected features
        classifier.fit(X_train[selected_features], y_train)

        # Make predictions on the test set
        y_pred = classifier.predict(X_test[selected_features])

        # Calculate and store the weighted accuracy
        weighted_accuracy = accuracy_score(y_test, y_pred, sample_weight=None)
        weighted_accuracies.append(weighted_accuracy)

        # Print class-wise accuracies for each fold
        class_report = classification_report(y_test, y_pred, output_dict=True)
        print(f"\nFold {i + 1} - Class-wise Accuracies:")
        print(class_report)

        print(f"Fold {i + 1} : {weighted_accuracy}")

    # Calculate the average weighted accuracy
    average_weighted_accuracy = np.mean(weighted_accuracies)
    print("Average Weighted Accuracy:", average_weighted_accuracy)

    return average_weighted_accuracy

def feature_selection_select_k_best(X, y, k=15, k_folds=10):
    skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

    # Initialize SelectKBest with f_classif
    k_best = SelectKBest(score_func=f_classif, k=k)

    selected_features = []

    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Fit SelectKBest on the training data
        k_best.fit(X_train, y_train)

        # Get the indices of the selected features
        selected_indices = np.where(k_best.get_support())[0]

        # Accumulate selected features across folds
        selected_features.extend(selected_indices)

    # Get the unique selected features
    selected_features = np.unique(selected_features)

    # Print and return the top features
    top_features = X.columns[selected_features]
    print(f"Top {k} features selected by SelectKBest:")
    print(top_features)

    np.savetxt('top_features_SelK.txt', X.columns[selected_features], fmt='%s')


    return top_features

# Select top features using SelectKBest
selected_features_k_best = feature_selection_select_k_best(X, y, k=15, k_folds=10)

# Evaluate the selected features using RandomForestClassifier
accuracy_k_best = evaluate_features(X, y, selected_features_k_best, RandomForestClassifier(n_estimators=100, n_jobs=-1))
print(f"SelectKBest Accuracy with selected features: {accuracy_k_best}")


Top 15 features selected by SelectKBest:
Index(['FAM83D', 'MELK', 'RABEP1', 'ECE2', 'CENPW', 'KCNG1', 'ZNF670', 'SUSD3',
       'MID1', 'CDCA7', 'MAPT', 'PDSS1', 'WDR43', 'SOX11', 'ESR1', 'PREX1',
       'CCNE1', 'IL6ST', 'CENPL', 'BCL2', 'PLK1', 'FGD3', 'AGR3', 'RIPK4',
       'GATA3', 'FBP1', 'CA9', 'WDR19', 'INAVA'],
      dtype='object')


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Fold 1 - Class-wise Accuracies:
{'long': {'precision': 0.868020304568528, 'recall': 1.0, 'f1-score': 0.9293478260869565, 'support': 171}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 21}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}, 'accuracy': 0.868020304568528, 'macro avg': {'precision': 0.2893401015228427, 'recall': 0.3333333333333333, 'f1-score': 0.30978260869565216, 'support': 197}, 'weighted avg': {'precision': 0.75345924914324, 'recall': 0.868020304568528, 'f1-score': 0.8066927830500993, 'support': 197}}
Fold 1 : 0.868020304568528


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Fold 2 - Class-wise Accuracies:
{'long': {'precision': 0.8717948717948718, 'recall': 0.9941520467836257, 'f1-score': 0.9289617486338798, 'support': 171}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 21}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}, 'accuracy': 0.8629441624365483, 'macro avg': {'precision': 0.2905982905982906, 'recall': 0.3313840155945419, 'f1-score': 0.3096539162112933, 'support': 197}, 'weighted avg': {'precision': 0.7567356501366653, 'recall': 0.8629441624365483, 'f1-score': 0.8063576599816926, 'support': 197}}
Fold 2 : 0.8629441624365483


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Fold 3 - Class-wise Accuracies:
{'long': {'precision': 0.868020304568528, 'recall': 1.0, 'f1-score': 0.9293478260869565, 'support': 171}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 21}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}, 'accuracy': 0.868020304568528, 'macro avg': {'precision': 0.2893401015228427, 'recall': 0.3333333333333333, 'f1-score': 0.30978260869565216, 'support': 197}, 'weighted avg': {'precision': 0.75345924914324, 'recall': 0.868020304568528, 'f1-score': 0.8066927830500993, 'support': 197}}
Fold 3 : 0.868020304568528


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Fold 4 - Class-wise Accuracies:
{'long': {'precision': 0.8629441624365483, 'recall': 1.0, 'f1-score': 0.9264305177111716, 'support': 170}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}, 'accuracy': 0.8629441624365483, 'macro avg': {'precision': 0.2876480541455161, 'recall': 0.3333333333333333, 'f1-score': 0.3088101725703905, 'support': 197}, 'weighted avg': {'precision': 0.7446726274833158, 'recall': 0.8629441624365483, 'f1-score': 0.7994578071619247, 'support': 197}}
Fold 4 : 0.8629441624365483


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Fold 5 - Class-wise Accuracies:
{'long': {'precision': 0.865979381443299, 'recall': 0.9882352941176471, 'f1-score': 0.9230769230769231, 'support': 170}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4}, 'accuracy': 0.8571428571428571, 'macro avg': {'precision': 0.288659793814433, 'recall': 0.32941176470588235, 'f1-score': 0.3076923076923077, 'support': 196}, 'weighted avg': {'precision': 0.7511045655375553, 'recall': 0.8571428571428571, 'f1-score': 0.8006279434850864, 'support': 196}}
Fold 5 : 0.8571428571428571


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Fold 6 - Class-wise Accuracies:
{'long': {'precision': 0.8673469387755102, 'recall': 1.0, 'f1-score': 0.9289617486338798, 'support': 170}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4}, 'accuracy': 0.8673469387755102, 'macro avg': {'precision': 0.2891156462585034, 'recall': 0.3333333333333333, 'f1-score': 0.3096539162112933, 'support': 196}, 'weighted avg': {'precision': 0.7522907122032486, 'recall': 0.8673469387755102, 'f1-score': 0.8057321289171406, 'support': 196}}
Fold 6 : 0.8673469387755102


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Fold 7 - Class-wise Accuracies:
{'long': {'precision': 0.8673469387755102, 'recall': 1.0, 'f1-score': 0.9289617486338798, 'support': 170}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4}, 'accuracy': 0.8673469387755102, 'macro avg': {'precision': 0.2891156462585034, 'recall': 0.3333333333333333, 'f1-score': 0.3096539162112933, 'support': 196}, 'weighted avg': {'precision': 0.7522907122032486, 'recall': 0.8673469387755102, 'f1-score': 0.8057321289171406, 'support': 196}}
Fold 7 : 0.8673469387755102


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Fold 8 - Class-wise Accuracies:
{'long': {'precision': 0.8673469387755102, 'recall': 1.0, 'f1-score': 0.9289617486338798, 'support': 170}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4}, 'accuracy': 0.8673469387755102, 'macro avg': {'precision': 0.2891156462585034, 'recall': 0.3333333333333333, 'f1-score': 0.3096539162112933, 'support': 196}, 'weighted avg': {'precision': 0.7522907122032486, 'recall': 0.8673469387755102, 'f1-score': 0.8057321289171406, 'support': 196}}
Fold 8 : 0.8673469387755102


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Fold 9 - Class-wise Accuracies:
{'long': {'precision': 0.8762886597938144, 'recall': 1.0, 'f1-score': 0.9340659340659341, 'support': 170}, 'medium': {'precision': 0.5, 'recall': 0.045454545454545456, 'f1-score': 0.08333333333333334, 'support': 22}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4}, 'accuracy': 0.8724489795918368, 'macro avg': {'precision': 0.4587628865979381, 'recall': 0.34848484848484845, 'f1-score': 0.3391330891330891, 'support': 196}, 'weighted avg': {'precision': 0.8161687355354512, 'recall': 0.8724489795918368, 'f1-score': 0.8195129700231741, 'support': 196}}
Fold 9 : 0.8724489795918368

Fold 10 - Class-wise Accuracies:
{'long': {'precision': 0.8673469387755102, 'recall': 1.0, 'f1-score': 0.9289617486338798, 'support': 170}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 21}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}, 'accuracy': 0.8673469387755102, 'macro avg': {'precision': 0.28911

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Feature Agglomeration

In [10]:
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.cluster import FeatureAgglomeration

# Read your data
df = pd.read_csv('processed_mrna_zscore.csv')
X = df.iloc[:, :-2]
y = df.iloc[:, -1]

def evaluate_features(X, y, selected_features, classifier, k_folds=10):
    skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

    weighted_accuracies = []

    for i, (train_index, test_index) in enumerate(skf.split(X, y)):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Train the classifier on the selected features
        classifier.fit(X_train[selected_features], y_train)

        # Make predictions on the test set
        y_pred = classifier.predict(X_test[selected_features])

        # Calculate and store the weighted accuracy
        weighted_accuracy = accuracy_score(y_test, y_pred, sample_weight=None)
        weighted_accuracies.append(weighted_accuracy)

        # Print class-wise accuracies for each fold
        class_report = classification_report(y_test, y_pred, output_dict=True)
        print(f"\nFold {i + 1} - Class-wise Accuracies:")
        print(class_report)

        print(f"Fold {i + 1} : {weighted_accuracy}")

    # Calculate the average weighted accuracy
    average_weighted_accuracy = np.mean(weighted_accuracies)
    print("Average Weighted Accuracy:", average_weighted_accuracy)

    return average_weighted_accuracy

def feature_selection_feature_agglomeration(X, y, n_clusters=10, top_n=10, k_folds=10):
    skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

    # Initialize FeatureAgglomeration
    agglo = FeatureAgglomeration(n_clusters=n_clusters)

    selected_features = []

    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]

        # Fit FeatureAgglomeration on the training data
        X_train_transformed = agglo.fit_transform(X_train)
        X_test_transformed = agglo.transform(X_test)

        # Get the indices of the top N features
        top_n_indices = np.argsort(np.var(X_train_transformed, axis=0))[-top_n:]

        # Accumulate selected features across folds
        selected_features.extend(top_n_indices)

    # Get the unique selected features
    selected_features = np.unique(selected_features)

    
    # Print and return the top features
    top_features = X.columns[selected_features]
    print(f"Top {top_n} features selected by Feature Agglomeration:")
    print(top_features)

    np.savetxt('top_features_aggl.txt', X.columns[selected_features], fmt='%s')

    return top_features

# Select top features using Feature Agglomeration
selected_features_agglo = feature_selection_feature_agglomeration(X, y, n_clusters=10, top_n=10, k_folds=10)

# Evaluate the selected features using RandomForestClassifier
accuracy_agglo = evaluate_features(X, y, selected_features_agglo, RandomForestClassifier(n_estimators=100, n_jobs=-1))
print(f"Feature Agglomeration Accuracy with selected features: {accuracy_agglo}")


Top 100 features selected by Feature Agglomeration:
Index(['RERE', 'RNF165', 'PHF7', 'CIDEA', 'TENT2', 'SLC17A3', 'SDS',
       'ATP6V1C2', 'F3', 'FAM71C'],
      dtype='object')


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Fold 1 - Class-wise Accuracies:
{'long': {'precision': 0.868020304568528, 'recall': 1.0, 'f1-score': 0.9293478260869565, 'support': 171}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 21}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}, 'accuracy': 0.868020304568528, 'macro avg': {'precision': 0.2893401015228427, 'recall': 0.3333333333333333, 'f1-score': 0.30978260869565216, 'support': 197}, 'weighted avg': {'precision': 0.75345924914324, 'recall': 0.868020304568528, 'f1-score': 0.8066927830500993, 'support': 197}}
Fold 1 : 0.868020304568528


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Fold 2 - Class-wise Accuracies:
{'long': {'precision': 0.868020304568528, 'recall': 1.0, 'f1-score': 0.9293478260869565, 'support': 171}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 21}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}, 'accuracy': 0.868020304568528, 'macro avg': {'precision': 0.2893401015228427, 'recall': 0.3333333333333333, 'f1-score': 0.30978260869565216, 'support': 197}, 'weighted avg': {'precision': 0.75345924914324, 'recall': 0.868020304568528, 'f1-score': 0.8066927830500993, 'support': 197}}
Fold 2 : 0.868020304568528

Fold 3 - Class-wise Accuracies:
{'long': {'precision': 0.868020304568528, 'recall': 1.0, 'f1-score': 0.9293478260869565, 'support': 171}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 21}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}, 'accuracy': 0.868020304568528, 'macro avg': {'precision': 0.2893401015228427, 'recall': 0.3333333333333333,

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Fold 4 - Class-wise Accuracies:
{'long': {'precision': 0.8629441624365483, 'recall': 1.0, 'f1-score': 0.9264305177111716, 'support': 170}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}, 'accuracy': 0.8629441624365483, 'macro avg': {'precision': 0.2876480541455161, 'recall': 0.3333333333333333, 'f1-score': 0.3088101725703905, 'support': 197}, 'weighted avg': {'precision': 0.7446726274833158, 'recall': 0.8629441624365483, 'f1-score': 0.7994578071619247, 'support': 197}}
Fold 4 : 0.8629441624365483

Fold 5 - Class-wise Accuracies:
{'long': {'precision': 0.8673469387755102, 'recall': 1.0, 'f1-score': 0.9289617486338798, 'support': 170}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4}, 'accuracy': 0.8673469387755102, 'macro avg': {'precision': 0.2891156462585034, 'recall': 0.3333333333

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Fold 6 - Class-wise Accuracies:
{'long': {'precision': 0.8666666666666667, 'recall': 0.9941176470588236, 'f1-score': 0.9260273972602739, 'support': 170}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4}, 'accuracy': 0.8622448979591837, 'macro avg': {'precision': 0.2888888888888889, 'recall': 0.33137254901960783, 'f1-score': 0.30867579908675796, 'support': 196}, 'weighted avg': {'precision': 0.7517006802721089, 'recall': 0.8622448979591837, 'f1-score': 0.8031870282359519, 'support': 196}}
Fold 6 : 0.8622448979591837


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Fold 7 - Class-wise Accuracies:
{'long': {'precision': 0.8673469387755102, 'recall': 1.0, 'f1-score': 0.9289617486338798, 'support': 170}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4}, 'accuracy': 0.8673469387755102, 'macro avg': {'precision': 0.2891156462585034, 'recall': 0.3333333333333333, 'f1-score': 0.3096539162112933, 'support': 196}, 'weighted avg': {'precision': 0.7522907122032486, 'recall': 0.8673469387755102, 'f1-score': 0.8057321289171406, 'support': 196}}
Fold 7 : 0.8673469387755102


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Fold 8 - Class-wise Accuracies:
{'long': {'precision': 0.8673469387755102, 'recall': 1.0, 'f1-score': 0.9289617486338798, 'support': 170}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4}, 'accuracy': 0.8673469387755102, 'macro avg': {'precision': 0.2891156462585034, 'recall': 0.3333333333333333, 'f1-score': 0.3096539162112933, 'support': 196}, 'weighted avg': {'precision': 0.7522907122032486, 'recall': 0.8673469387755102, 'f1-score': 0.8057321289171406, 'support': 196}}
Fold 8 : 0.8673469387755102


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))



Fold 9 - Class-wise Accuracies:
{'long': {'precision': 0.8673469387755102, 'recall': 1.0, 'f1-score': 0.9289617486338798, 'support': 170}, 'medium': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 22}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 4}, 'accuracy': 0.8673469387755102, 'macro avg': {'precision': 0.2891156462585034, 'recall': 0.3333333333333333, 'f1-score': 0.3096539162112933, 'support': 196}, 'weighted avg': {'precision': 0.7522907122032486, 'recall': 0.8673469387755102, 'f1-score': 0.8057321289171406, 'support': 196}}
Fold 9 : 0.8673469387755102

Fold 10 - Class-wise Accuracies:
{'long': {'precision': 0.8711340206185567, 'recall': 0.9941176470588236, 'f1-score': 0.9285714285714286, 'support': 170}, 'medium': {'precision': 0.5, 'recall': 0.047619047619047616, 'f1-score': 0.08695652173913042, 'support': 21}, 'short': {'precision': 0.0, 'recall': 0.0, 'f1-score': 0.0, 'support': 5}, 'accuracy': 0.8673469387755102, 'macro avg': {'preci

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
