<a href="https://colab.research.google.com/github/asattiraju13/datahackathon/blob/main/svm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Imports

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import train_test_split

from sklearn.model_selection import GridSearchCV

from sklearn.utils import class_weight

from sklearn.metrics import precision_recall_fscore_support

from sklearn.svm import SVC

import matplotlib.pyplot as plt

from sklearn.inspection import permutation_importance

### Input Data

In [None]:
df = pd.read_csv('covid-selected-data.csv')
labels = pd.read_csv('covid-selected-data-labels.csv')

data = pd.merge(df,labels,on='Unnamed: 0')
data['type'] = data['type'].map({'Normal':0,'Mild':1,'Severe':2})

X = data.drop(['type','Unnamed: 0'],axis=1)
y = data['type'].values

### Upsampling Function

In [None]:
def random_upsample(DataZ,Labels,rand_seed):
    # Find Unique Labels and Number of Samples Per Class
    Groups,NC = np.unique(Labels, return_counts=True)
    # Find Minimum Number of Samples Per Class
    MaxC = np.max(NC)
    count = 0
    for i in Groups:
        # Generate Randomized Indices
        np.random.seed(rand_seed)
        randpermvals = np.random.permutation(NC[i])
        
        # Select All Samples for a Certain Class
        DG = DataZ[np.squeeze(list(Labels)==i*np.ones_like(Labels)),:]        
        
        # Randomly Upsample Class and Define Labels
        if (randpermvals.shape[0] > MaxC - NC[i]):
            DG = np.concatenate((DG,DG[randpermvals<MaxC-NC[i],:]),axis=0)
        
        else:
            while (DG.shape[0] < MaxC / 2):
                DG = np.concatenate((DG, DG), axis=0)
                NC[i] = DG.shape[0]
                randpermvals = np.random.permutation(NC[i])
            DG = np.concatenate((DG,DG[randpermvals<MaxC-NC[i],:]),axis=0)
        
        LG = i*np.ones((MaxC,1))
        
        # Concatenate Samples from Each Class
        if count == 0:
            DataZ2 = DG; Labels2 = LG;
        else:
            DataZ2 = np.concatenate((DataZ2,DG),axis=0)
            Labels2 = np.concatenate((Labels2,LG),axis=0)
        count += 1
    return(DataZ2,list(Labels2))

### Training and Evaluation

In [None]:
sss = StratifiedShuffleSplit(n_splits=5,test_size=0.2,train_size=0.8,random_state=0)

y_pred_all = []; y_all = []; weights_per_fold = [];

svm = SVC(C=1.5,kernel='rbf')

# Split data into Training + Validation and Test Groups
for train_val_index, test_index in sss.split(X.values,y):
    
    X_train_val = X.values[train_val_index]
    y_train_val = y[train_val_index]
    X_test = X.values[test_index]
    y_test = y[test_index]
    
    
    # Split Train/Val Data into Training and Validation Groups
    sss2 = StratifiedShuffleSplit(n_splits=1,test_size=0.25,train_size=0.75,random_state=0)
    
    for train_index, val_index in sss2.split(X_train_val,y_train_val):
        
        X_train = X_train_val[train_index]
        y_train = y_train_val[train_index]
        
        X_train_new, y_train_new = random_upsample(X_train, y_train, 0)
        
        X_val = X_train_val[val_index]
        y_val = y_train_val[val_index]
        
        X_train_val = []
        y_train_val = []
        
        # Create Weights for Model Classes
        values, counts = np.unique(y_train, return_counts=True)
        weights = class_weight.compute_class_weight('balanced', np.unique(y_train), np.squeeze(y_train))
        class_weights = dict(zip(values, weights))
        
        # TRAIN AND TEST MODEL HERE
        svm.fit(X_train_new, y_train_new)
        
        y_pred_all.append(svm.predict(X_test))
        y_all.append(y_test)
        weights_per_fold.append(class_weights)
        
        print('Fold Completed.')

  return f(*args, **kwargs)


Fold Completed.


  return f(*args, **kwargs)


Fold Completed.


  return f(*args, **kwargs)


Fold Completed.


  return f(*args, **kwargs)


Fold Completed.


  return f(*args, **kwargs)


Fold Completed.


In [None]:
# Unpack Test Results
precision = []; recall = []; f1 = [];
for i in range(5):
    metrics = np.array(precision_recall_fscore_support(np.array(y_all)[i,:], np.array(y_pred_all)[i,:], beta=1.0, average=None))
    precision.append(np.array(metrics)[0,:])
    recall.append(np.array(metrics)[1,:])
    f1.append(np.array(metrics)[2,:])
print('Precision')
print(str(np.mean(precision,axis=0)) + ' + ' + str(np.std(precision,axis=0)))
print('Recall')
print(str(np.mean(recall,axis=0)) + ' + ' + str(np.std(recall,axis=0)))
print('F1')
print(str(np.mean(f1,axis=0)) + ' + ' + str(np.std(f1,axis=0)))

Precision
[0.9493352  0.78959262 0.94873008] + [0.0035556  0.01567101 0.0028348 ]
Recall
[0.92420701 0.8337386  0.96464646] + [0.0059763  0.01902065 0.00598916]
F1
[0.93659201 0.81103297 0.95660724] + [0.00383673 0.01653093 0.00274344]
