In [1]:
%run ../../import_src.py

import lymedata
from lymedata import *
import constants
from constants import *

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split

from sklearn.svm import SVC
from sklearn.metrics import classification_report

In [6]:
for defn in [DEF_OWD, DEF_CNS1, DEF_CNS2, DEF_CNS3, DEF_PNS1, DEF_PNS2, DEF_PNS3]:
    print(f'Definition: {defn}')
    data = LymeData({CHRONIC, NEURO, MUSCULO},{ADDL_CIR},{NEURO, MUSCULO, BOTH, NEITHER}, individual_cols={'Bio Sex','Bed Days',
       'Mental Health Days', 'Physical Health Days', 'Disability'}, defn=defn, drop_99=True)
    print(data.df.shape)
    print(data.df.columns)
    
    X, Y = data.get_data_and_labels()
    
    scaler = StandardScaler()
    # X_scaled = scaler.fit_transform(X
    X_scaled = X
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X_scaled)
    
    print(f"Original feature space: {X.shape}")
    print(f"Reduced feature space: {X_pca.shape}")
    
    X_train, X_test, Y_train, Y_test = train_test_split(X_pca, Y, test_size=0.2, random_state=42)
    
    # classifier = OneVsRestClassifier(SVC(kernel="linear", probability=True))
    classifier = OneVsRestClassifier(SVC(kernel="rbf", probability=True))
    classifier.fit(X_train, Y_train)
    train_accuracy = classifier.score(X_train, Y_train)
    print(f'{defn} Train: {train_accuracy}')
    
    Y_pred = classifier.predict(X_test)
    print(classification_report(Y_test, Y_pred))
    
    train_accuracy = classifier.score(X_test, Y_test)
    print(f'{defn} Test: {train_accuracy}')


Definition: OWD


  df = pd.read_csv(csv_file)


Both Neuro and Mus: 1562
Only Neuro: 632
Only Mus: 963
Neither Neuro nor Mus: 806
Dropping 99
(1619, 9)
Index(['Bed Days', 'Physical Health Days', 'Mental Health Days', 'Bio Sex',
       'Disability', 'both', 'neither', 'musculo', 'neuro'],
      dtype='object')
Original feature space: (1619, 5)
Reduced feature space: (1619, 2)
OWD Train: 0.24864864864864866
              precision    recall  f1-score   support

           0       0.59      0.62      0.60       130
           1       0.00      0.00      0.00        48
           2       0.00      0.00      0.00        90
           3       0.00      0.00      0.00        56

   micro avg       0.59      0.25      0.35       324
   macro avg       0.15      0.16      0.15       324
weighted avg       0.24      0.25      0.24       324
 samples avg       0.25      0.25      0.25       324

OWD Test: 0.25
Definition: CNS1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  df = pd.read_csv(csv_file)


Both Neuro and Mus: 2259
Only Neuro: 310
Only Mus: 762
Neither Neuro nor Mus: 632
Dropping 99
(1619, 9)
Index(['Bed Days', 'Physical Health Days', 'Mental Health Days', 'Bio Sex',
       'Disability', 'both', 'neither', 'musculo', 'neuro'],
      dtype='object')
Original feature space: (1619, 5)
Reduced feature space: (1619, 2)
CNS1 Train: 0.4972972972972973
              precision    recall  f1-score   support

           0       0.73      0.88      0.80       199
           1       0.00      0.00      0.00        29
           2       0.00      0.00      0.00        71
           3       0.00      0.00      0.00        25

   micro avg       0.73      0.54      0.62       324
   macro avg       0.18      0.22      0.20       324
weighted avg       0.45      0.54      0.49       324
 samples avg       0.54      0.54      0.54       324

CNS1 Test: 0.5401234567901234
Definition: CNS2


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  df = pd.read_csv(csv_file)


Both Neuro and Mus: 1862
Only Neuro: 707
Only Mus: 762
Neither Neuro nor Mus: 632
Dropping 99
(1619, 9)
Index(['Bed Days', 'Physical Health Days', 'Mental Health Days', 'Bio Sex',
       'Disability', 'both', 'neither', 'musculo', 'neuro'],
      dtype='object')
Original feature space: (1619, 5)
Reduced feature space: (1619, 2)
CNS2 Train: 0.32123552123552124
              precision    recall  f1-score   support

           0       0.67      0.71      0.69       157
           1       0.00      0.00      0.00        29
           2       0.00      0.00      0.00        71
           3       0.00      0.00      0.00        67

   micro avg       0.67      0.35      0.46       324
   macro avg       0.17      0.18      0.17       324
weighted avg       0.32      0.35      0.33       324
 samples avg       0.35      0.35      0.35       324

CNS2 Test: 0.345679012345679
Definition: CNS3


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  df = pd.read_csv(csv_file)


Both Neuro and Mus: 1862
Only Neuro: 707
Only Mus: 528
Neither Neuro nor Mus: 866
Dropping 99
(1619, 9)
Index(['Bed Days', 'Physical Health Days', 'Mental Health Days', 'Bio Sex',
       'Disability', 'both', 'neither', 'musculo', 'neuro'],
      dtype='object')
Original feature space: (1619, 5)
Reduced feature space: (1619, 2)
CNS3 Train: 0.32123552123552124
              precision    recall  f1-score   support

           0       0.67      0.71      0.69       157
           1       0.00      0.00      0.00        57
           2       0.00      0.00      0.00        43
           3       0.00      0.00      0.00        67

   micro avg       0.67      0.35      0.46       324
   macro avg       0.17      0.18      0.17       324
weighted avg       0.32      0.35      0.33       324
 samples avg       0.35      0.35      0.35       324

CNS3 Test: 0.345679012345679
Definition: PNS1


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  df = pd.read_csv(csv_file)


Both Neuro and Mus: 2227
Only Neuro: 248
Only Mus: 794
Neither Neuro nor Mus: 694
Dropping 99
(1619, 9)
Index(['Bed Days', 'Physical Health Days', 'Mental Health Days', 'Bio Sex',
       'Disability', 'both', 'neither', 'musculo', 'neuro'],
      dtype='object')
Original feature space: (1619, 5)
Reduced feature space: (1619, 2)
PNS1 Train: 0.46795366795366794
              precision    recall  f1-score   support

           0       0.65      0.84      0.73       185
           1       0.00      0.00      0.00        35
           2       0.00      0.00      0.00        85
           3       0.00      0.00      0.00        19

   micro avg       0.65      0.48      0.55       324
   macro avg       0.16      0.21      0.18       324
weighted avg       0.37      0.48      0.42       324
 samples avg       0.48      0.48      0.48       324

PNS1 Test: 0.4783950617283951
Definition: PNS2


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  df = pd.read_csv(csv_file)


Both Neuro and Mus: 1898
Only Neuro: 577
Only Mus: 794
Neither Neuro nor Mus: 694
Dropping 99
(1619, 9)
Index(['Bed Days', 'Physical Health Days', 'Mental Health Days', 'Bio Sex',
       'Disability', 'both', 'neither', 'musculo', 'neuro'],
      dtype='object')
Original feature space: (1619, 5)
Reduced feature space: (1619, 2)
PNS2 Train: 0.3250965250965251
              precision    recall  f1-score   support

           0       0.60      0.69      0.64       152
           1       0.00      0.00      0.00        35
           2       0.00      0.00      0.00        85
           3       0.00      0.00      0.00        52

   micro avg       0.60      0.32      0.42       324
   macro avg       0.15      0.17      0.16       324
weighted avg       0.28      0.32      0.30       324
 samples avg       0.32      0.32      0.32       324

PNS2 Test: 0.32407407407407407
Definition: PNS3


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  df = pd.read_csv(csv_file)


Both Neuro and Mus: 1898
Only Neuro: 577
Only Mus: 492
Neither Neuro nor Mus: 996
Dropping 99
(1619, 9)
Index(['Bed Days', 'Physical Health Days', 'Mental Health Days', 'Bio Sex',
       'Disability', 'both', 'neither', 'musculo', 'neuro'],
      dtype='object')
Original feature space: (1619, 5)
Reduced feature space: (1619, 2)
PNS3 Train: 0.3250965250965251
              precision    recall  f1-score   support

           0       0.60      0.69      0.64       152
           1       0.00      0.00      0.00        72
           2       0.00      0.00      0.00        48
           3       0.00      0.00      0.00        52

   micro avg       0.60      0.32      0.42       324
   macro avg       0.15      0.17      0.16       324
weighted avg       0.28      0.32      0.30       324
 samples avg       0.32      0.32      0.32       324

PNS3 Test: 0.32407407407407407


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
