In [1]:
%run ../../import_src.py

import lymedata
from lymedata import *
import constants
from constants import *

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split

from sklearn.svm import SVC
from sklearn.metrics import classification_report

In [3]:
for defn in [DEF_OWD, DEF_CNS1, DEF_CNS2, DEF_CNS3, DEF_PNS1, DEF_PNS2, DEF_PNS3]:
    print(f'Definition: {defn}')
    data = LymeData({CHRONIC, NEURO, MUSCULO},{SYMPTOMS},{NEURO, MUSCULO, BOTH, NEITHER}, defn=defn, drop_99=True)
    print(data.df.shape)
    print(data.df.columns)
    
    X, Y = data.get_data_and_labels()
    
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    pca = PCA(n_components=2)
    X_pca = pca.fit_transform(X_scaled)
    
    print(f"Original feature space: {X.shape}")
    print(f"Reduced feature space: {X_pca.shape}")
    
    X_train, X_test, Y_train, Y_test = train_test_split(X_pca, Y, test_size=0.2, random_state=42)
    
    # classifier = OneVsRestClassifier(SVC(kernel="linear", probability=True))
    classifier = OneVsRestClassifier(SVC(kernel="rbf", probability=True))
    classifier.fit(X_train, Y_train)
    train_accuracy = classifier.score(X_train, Y_train)
    print(f'{defn} Train: {train_accuracy}')
    
    Y_pred = classifier.predict(X_test)
    print(classification_report(Y_test, Y_pred))
    
    train_accuracy = classifier.score(X_test, Y_test)
    print(f'{defn} Test: {train_accuracy}')


Definition: OWD


  df = pd.read_csv(csv_file)


OWD
Both Neuro and Mus: 1562
Only Neuro: 632
Only Mus: 963
Neither Neuro nor Mus: 806
Dropping 99
(3598, 16)
Index(['Fatigue', 'Headache', 'Joint Pain', 'Muscle aches', 'Neuropathy',
       'Twitching', 'Memory Loss', 'Cognitive Impairment', 'Sleep Impairment',
       'Psychiatric', 'Heart related', 'Gastrointestinal', 'neuro', 'musculo',
       'both', 'neither'],
      dtype='object')
Original feature space: (3598, 12)
Reduced feature space: (3598, 2)
OWD Train: 0.6063238359972203
              precision    recall  f1-score   support

           0       0.55      0.40      0.46       108
           1       0.75      0.51      0.60       205
           2       0.84      0.85      0.84       301
           3       0.82      0.17      0.28       106

   micro avg       0.77      0.58      0.66       720
   macro avg       0.74      0.48      0.55       720
weighted avg       0.77      0.58      0.63       720
 samples avg       0.58      0.58      0.58       720



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


OWD Test: 0.5833333333333334
Definition: CNS1


  df = pd.read_csv(csv_file)


Both Neuro and Mus: 2259
Only Neuro: 310
Only Mus: 762
Neither Neuro nor Mus: 632
Dropping 99
(3598, 16)
Index(['Fatigue', 'Headache', 'Joint Pain', 'Muscle aches', 'Neuropathy',
       'Twitching', 'Memory Loss', 'Cognitive Impairment', 'Sleep Impairment',
       'Psychiatric', 'Heart related', 'Gastrointestinal', 'neuro', 'musculo',
       'both', 'neither'],
      dtype='object')
Original feature space: (3598, 12)
Reduced feature space: (3598, 2)
CNS1 Train: 0.7734537873523281
              precision    recall  f1-score   support

           0       0.71      0.56      0.62        54
           1       0.78      0.69      0.74       160
           2       0.90      0.89      0.89       448
           3       0.80      0.48      0.60        58

   micro avg       0.86      0.79      0.82       720
   macro avg       0.80      0.65      0.71       720
weighted avg       0.85      0.79      0.81       720
 samples avg       0.78      0.79      0.78       720

CNS1 Test: 0.7819444444444

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  df = pd.read_csv(csv_file)


Both Neuro and Mus: 1862
Only Neuro: 707
Only Mus: 762
Neither Neuro nor Mus: 632
Dropping 99
(3598, 16)
Index(['Fatigue', 'Headache', 'Joint Pain', 'Muscle aches', 'Neuropathy',
       'Twitching', 'Memory Loss', 'Cognitive Impairment', 'Sleep Impairment',
       'Psychiatric', 'Heart related', 'Gastrointestinal', 'neuro', 'musculo',
       'both', 'neither'],
      dtype='object')
Original feature space: (3598, 12)
Reduced feature space: (3598, 2)
CNS2 Train: 0.7453092425295343
              precision    recall  f1-score   support

           0       0.70      0.55      0.62       137
           1       0.78      0.69      0.74       160
           2       0.85      0.84      0.85       365
           3       0.80      0.48      0.60        58

   micro avg       0.81      0.72      0.76       720
   macro avg       0.78      0.64      0.70       720
weighted avg       0.80      0.72      0.76       720
 samples avg       0.72      0.72      0.72       720

CNS2 Test: 0.725
Definitio

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  df = pd.read_csv(csv_file)


Both Neuro and Mus: 1862
Only Neuro: 707
Only Mus: 528
Neither Neuro nor Mus: 866
Dropping 99
(3598, 16)
Index(['Fatigue', 'Headache', 'Joint Pain', 'Muscle aches', 'Neuropathy',
       'Twitching', 'Memory Loss', 'Cognitive Impairment', 'Sleep Impairment',
       'Psychiatric', 'Heart related', 'Gastrointestinal', 'neuro', 'musculo',
       'both', 'neither'],
      dtype='object')
Original feature space: (3598, 12)
Reduced feature space: (3598, 2)
CNS3 Train: 0.7268936761640028
              precision    recall  f1-score   support

           0       0.70      0.55      0.62       137
           1       0.81      0.59      0.68       115
           2       0.85      0.84      0.85       365
           3       0.78      0.57      0.66       103

   micro avg       0.81      0.71      0.76       720
   macro avg       0.79      0.64      0.70       720
weighted avg       0.81      0.71      0.75       720
 samples avg       0.71      0.71      0.71       720

CNS3 Test: 0.7083333333333

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  df = pd.read_csv(csv_file)


Both Neuro and Mus: 2227
Only Neuro: 248
Only Mus: 794
Neither Neuro nor Mus: 694
Dropping 99
(3598, 16)
Index(['Fatigue', 'Headache', 'Joint Pain', 'Muscle aches', 'Neuropathy',
       'Twitching', 'Memory Loss', 'Cognitive Impairment', 'Sleep Impairment',
       'Psychiatric', 'Heart related', 'Gastrointestinal', 'neuro', 'musculo',
       'both', 'neither'],
      dtype='object')
Original feature space: (3598, 12)
Reduced feature space: (3598, 2)
PNS1 Train: 0.6341209173036831
              precision    recall  f1-score   support

           0       1.00      0.03      0.05        36
           1       0.56      0.13      0.21       175
           2       0.82      0.91      0.86       433
           3       0.81      0.45      0.58        76

   micro avg       0.80      0.63      0.70       720
   macro avg       0.80      0.38      0.43       720
weighted avg       0.77      0.63      0.63       720
 samples avg       0.63      0.63      0.63       720

PNS1 Test: 0.6277777777777

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  df = pd.read_csv(csv_file)


Both Neuro and Mus: 1898
Only Neuro: 577
Only Mus: 794
Neither Neuro nor Mus: 694
Dropping 99
(3598, 16)
Index(['Fatigue', 'Headache', 'Joint Pain', 'Muscle aches', 'Neuropathy',
       'Twitching', 'Memory Loss', 'Cognitive Impairment', 'Sleep Impairment',
       'Psychiatric', 'Heart related', 'Gastrointestinal', 'neuro', 'musculo',
       'both', 'neither'],
      dtype='object')
Original feature space: (3598, 12)
Reduced feature space: (3598, 2)
PNS2 Train: 0.5576789437109103
              precision    recall  f1-score   support

           0       0.69      0.12      0.20        93
           1       0.56      0.13      0.21       175
           2       0.86      0.89      0.88       376
           3       0.81      0.45      0.58        76

   micro avg       0.82      0.56      0.67       720
   macro avg       0.73      0.40      0.47       720
weighted avg       0.76      0.56      0.60       720
 samples avg       0.56      0.56      0.56       720



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


PNS2 Test: 0.5611111111111111
Definition: PNS3


  df = pd.read_csv(csv_file)


Both Neuro and Mus: 1898
Only Neuro: 577
Only Mus: 492
Neither Neuro nor Mus: 996
Dropping 99
(3598, 16)
Index(['Fatigue', 'Headache', 'Joint Pain', 'Muscle aches', 'Neuropathy',
       'Twitching', 'Memory Loss', 'Cognitive Impairment', 'Sleep Impairment',
       'Psychiatric', 'Heart related', 'Gastrointestinal', 'neuro', 'musculo',
       'both', 'neither'],
      dtype='object')
Original feature space: (3598, 12)
Reduced feature space: (3598, 2)
PNS3 Train: 0.5816539263377345
              precision    recall  f1-score   support

           0       0.69      0.12      0.20        93
           1       0.00      0.00      0.00       104
           2       0.86      0.89      0.88       376
           3       0.77      0.54      0.64       147

   micro avg       0.84      0.59      0.69       720
   macro avg       0.58      0.39      0.43       720
weighted avg       0.69      0.59      0.61       720
 samples avg       0.59      0.59      0.59       720

PNS3 Test: 0.5930555555555

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
