In [264]:
from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.metrics import roc_auc_score, confusion_matrix
from sklearn.preprocessing import MinMaxScaler
import numpy as np


def weird_division(n, d):
    return n / d if d else 0


def validate_model(model, X, Y, fold):
    """
    validates the model with a k-fold validation which is iterated
    returns the mean accuracy, specificiy, recall, precision, f1 score and auc score
    """

    splits = 5
    iteration = 10

    acc_list = []
    specificity_list = []
    recall_list = []
    precision_list = []
    f1_list = []

    if fold == "Strat":
        folds = StratifiedKFold(n_splits=splits)
    elif fold == "K":
        folds = KFold(splits, shuffle=True)

    # Iterate "interation" times of k-fold
    for i in range(1, iteration):
        # print(f'Iteration {i}/{iteration}')

        acc_total = 0
        specificity_total = 0
        recall_total = 0
        precision_total = 0
        f1_total = 0
        # auc_total = 0

        for train_index, test_index in folds.split(X, Y):
            # x_train,y_train,x_test,y_test = X.iloc[train_index,:], Y.iloc[train_index,:], X.iloc[test_index,:],Y.iloc[test_index,:]
            x_train = X.iloc[train_index, :]
            x_test = X.iloc[test_index, :]
            y_train = Y.iloc[train_index, :]
            y_test = Y.iloc[test_index, :]

            # scale
            sc = MinMaxScaler()
            x_train = sc.fit_transform(x_train)
            x_test = sc.transform(x_test)

            # fit model and predict
            model.fit(x_train, np.ravel(y_train))
            y_pred = model.predict(x_test)

            conf_matrix = confusion_matrix(y_test, y_pred)
            TN = conf_matrix[0][0]
            FP = conf_matrix[0][1]
            FN = conf_matrix[1][0]
            TP = conf_matrix[1][1]

            accuracy = (weird_division((TP + TN), (TP + TN + FP + FN))) * 100
            recall = weird_division(TP, (TP + FN)) * 100  # recall
            specificity = weird_division(TN, (TN + FP)) * 100
            precision = weird_division(TP, (TP + FP)) * 100
            f1_score = weird_division(2 * (recall * precision), (recall + precision))

            # sum it up
            acc_total += accuracy
            recall_total += recall
            specificity_total += specificity
            precision_total += precision
            f1_total += f1_score
            # auc_total += roc_auc_score(y_test, y_pred)

        # avg
        accuracy_mean = acc_total / splits
        recall_mean = recall_total / splits
        specificity_mean = specificity_total / splits
        precision_mean = precision_total / splits
        f1_mean = f1_total / splits
        # auc_mean = auc_total / splits

        acc_list.append(accuracy_mean)
        recall_list.append(recall_mean)
        specificity_list.append(specificity_mean)
        precision_list.append(precision_mean)
        f1_list.append(f1_mean)
        # auc_list.append(auc_mean)

    print("Accuracy for the 10 iterations: ",  acc_list) #mean accuracy acros the 6 folds for each iteration
    print("Recall for the 10 iterations: ",  recall_list) #mean accuracy acros the 6 folds for each iteration
    print("Specificity for the 10 iterations: ",  specificity_list)
    print("Precision for the 10 iterations: ",  precision_list)
    print("F1  score for the 10 iterations: ",  f1_list)

    return (
        np.mean(acc_list),
        np.mean(specificity_list),
        np.mean(recall_list),
        np.mean(precision_list),
        np.mean(f1_list),
        # np.mean(auc_list),
    )


In [265]:
import pandas as pd
import os
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
#from validate_model import validate_model
from sklearn.svm import SVC  
from sklearn.model_selection import train_test_split

path = '/Users/athena.kam/Documents/Thesis/codebase/thesis-2023-athena'
os.chdir(path)

CV_SPLIT = 5

def get_best_param(x_train,y_train):
    param_grid = {'C': [0.1,1, 10, 100], 'gamma': [1,0.1,0.01,0.001],'kernel': ['rbf', 'poly', 'sigmoid']}
    grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=0,return_train_score=True,cv = CV_SPLIT)
    #print(grid.cv_results_)
    grid.fit(x_train,y_train)
    print(grid.best_estimator_.get_params())
    return grid


def train_test_SVC(filename:str,hold_out:bool,include_personal_q:bool, grid_search:bool,model_weights:dict = {}):

    df = pd.read_csv(filename)

    if include_personal_q:
        df = df[df['noPersonalQ']!=1]
    else:
        df = df[df['personalQ']!=1]
    
    headers = df.columns
    non_embeddings_headers = []
    for header in headers:
        if header.find('embbedings')<0:
            non_embeddings_headers.append(header)
    
    X = df.drop(columns=non_embeddings_headers)
    Y = df['classification']
   
    if hold_out:
        x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size=0.10)
    else:
        x_train = X
        y_train = Y

    if grid_search: 
        grid = get_best_param(x_train=x_train,y_train=y_train) 
        model_svc = grid.best_estimator_
    else:
        model_svc = SVC(C = model_weights['C'],gamma=model_weights['gamma'],kernel=model_weights['kernel'])
        
    accuracy, specificiy, recall, precision, f1_score =validate_model(model_svc,pd.DataFrame(x_train),pd.DataFrame(y_train),"Strat")
    print(f'\tAverage Accuracy: {accuracy} \n\
      Average Specificity: {specificiy} \n\
      Average Recall: {recall}\n\
      Average Precision:{precision}\n\
      Average F1 score {f1_score}\n\
      ')



# Google ASR

## BERT embeddings 
Trying out first the embedded transcripts without personal Q

In [268]:
params = {"C": 0.1, "degree": 3, "gamma": 1, "kernel": "poly"}
grid = train_test_SVC(
    "datasets/transformed/spontaneousDialogueOnly_google_bert_embeddings_transformed.csv",
    hold_out=False,
    include_personal_q=False,
    grid_search=True,
    model_weights=params,
)

{'C': 0.1, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 1, 'kernel': 'poly', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Accuracy for the 10 iterations:  [66.42857142857142, 66.42857142857142, 66.42857142857142, 66.42857142857142, 66.42857142857142, 66.42857142857142, 66.42857142857142, 66.42857142857142, 66.42857142857142]
Recall for the 10 iterations:  [60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0]
Specificity for the 10 iterations:  [71.0, 71.0, 71.0, 71.0, 71.0, 71.0, 71.0, 71.0, 71.0]
Precision for the 10 iterations:  [53.33333333333333, 53.33333333333333, 53.33333333333333, 53.33333333333333, 53.33333333333333, 53.33333333333333, 53.33333333333333, 53.33333333333333, 53.33333333333333]
F1  score for the 10 iterations:  [50.761904761904766, 50.761904761904766, 50.761904761904766, 50.761904761904766, 50.761904761904766, 50

with personal questions

In [269]:
params = {"C": 100, "degree": 3, "gamma": 0.001, "kernel": "sigmoid"}
grid = train_test_SVC(
    "datasets/transformed/spontaneousDialogueOnly_google_bert_embeddings_transformed.csv",
    hold_out=False,
    include_personal_q=True,
    grid_search=True,
    model_weights=params,
)

{'C': 100, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 0.001, 'kernel': 'sigmoid', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Accuracy for the 10 iterations:  [68.57142857142858, 68.57142857142858, 68.57142857142858, 68.57142857142858, 68.57142857142858, 68.57142857142858, 68.57142857142858, 68.57142857142858, 68.57142857142858]
Recall for the 10 iterations:  [59.999999999999986, 59.999999999999986, 59.999999999999986, 59.999999999999986, 59.999999999999986, 59.999999999999986, 59.999999999999986, 59.999999999999986, 59.999999999999986]
Specificity for the 10 iterations:  [76.0, 76.0, 76.0, 76.0, 76.0, 76.0, 76.0, 76.0, 76.0]
Precision for the 10 iterations:  [66.66666666666666, 66.66666666666666, 66.66666666666666, 66.66666666666666, 66.66666666666666, 66.66666666666666, 66.66666666666666, 66.66666666666666, 66.66666666666666]
F1  

In [267]:
params = {"C": 10, "degree": 3, "gamma": 0.001, "kernel": "sigmoid"}
grid = train_test_SVC(
    "datasets/transformed/spontaneousDialogueOnly_google_bert_sentence_embeddings_transformed.csv",
    hold_out=False,
    include_personal_q=False,
    grid_search=True,
    model_weights=params
)

{'C': 10, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 0.001, 'kernel': 'sigmoid', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Accuracy for the 10 iterations:  [62.43859649122807, 62.43859649122807, 62.43859649122807, 62.43859649122807, 62.43859649122807, 62.43859649122807, 62.43859649122807, 62.43859649122807, 62.43859649122807]
Recall for the 10 iterations:  [10.919540229885058, 10.919540229885058, 10.919540229885058, 10.919540229885058, 10.919540229885058, 10.919540229885058, 10.919540229885058, 10.919540229885058, 10.919540229885058]
Specificity for the 10 iterations:  [95.21739130434784, 95.21739130434784, 95.21739130434784, 95.21739130434784, 95.21739130434784, 95.21739130434784, 95.21739130434784, 95.21739130434784, 95.21739130434784]
Precision for the 10 iterations:  [34.26470588235294, 34.26470588235294, 34.26470588235294, 34

In [249]:
params = {"C": 10, "degree": 3, "gamma": 1, "kernel": "sigmoid"}
grid = train_test_SVC(
    "datasets/transformed/spontaneousDialogueOnly_google_gpt_embeddings_transformed.csv",
    hold_out=False,
    include_personal_q=False,
    grid_search=True,
    model_weights=params
)

{'C': 10, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 1, 'kernel': 'sigmoid', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Accuracy for the 10 iterations:  [61.07142857142858, 61.07142857142858, 61.07142857142858, 61.07142857142858, 61.07142857142858, 61.07142857142858, 61.07142857142858, 61.07142857142858, 61.07142857142858]
Recall for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Specificity for the 10 iterations:  [100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0]
Precision for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
F1  score for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
	Average Accuracy: 61.071428571428584 
      Average Specificity: 100.0 
      Average Recall: 0.0
      Average Precision:0.0
      Average F1 score 0.0
      


In [237]:
params = {"C": 10, "degree": 3, "gamma": 1, "kernel": "sigmoid"}
grid = train_test_SVC(
    "datasets/transformed/spontaneousDialogueOnly_google_gpt_embeddings_transformed.csv",
    hold_out=False,
    include_personal_q=True,
    grid_search=True,
    model_weights=params
)

{'C': 10, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 1, 'kernel': 'sigmoid', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Accuracy for the 10 iterations:  [60.06944444444444, 60.06944444444444, 60.06944444444444, 60.06944444444444, 60.06944444444444, 60.06944444444444, 60.06944444444444, 60.06944444444444, 60.06944444444444]
Recall for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Specificity for the 10 iterations:  [100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0]
Precision for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
F1  score for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
	Average Accuracy: 60.06944444444444 
      Average Specificity: 100.0 
      Average Recall: 0.0
      Average Precision:0.0
      Average F1 score 0.0
      


In [238]:
params = {"C": 0.1, "degree": 3, "gamma": 1, "kernel": "rbf"}
grid = train_test_SVC(
    "datasets/transformed/spontaneousDialogueOnly_google_gpt_sentence_embeddings_transformed.csv",
    hold_out=False,
    include_personal_q=False,
    grid_search=False,
    model_weights=params
)

Accuracy for the 10 iterations:  [61.11142217245241, 61.11142217245241, 61.11142217245241, 61.11142217245241, 61.11142217245241, 61.11142217245241, 61.11142217245241, 61.11142217245241, 61.11142217245241]
Recall for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Specificity for the 10 iterations:  [100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0]
Precision for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
F1  score for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
	Average Accuracy: 61.11142217245241 
      Average Specificity: 100.0 
      Average Recall: 0.0
      Average Precision:0.0
      Average F1 score 0.0
      


# WAV2VEC ASR

Since wav2vec doesn't transcribe with punctuations. I can't run the sentence embeddings method on it. For wav2vec i'll only be comparing the embeddings generated by BERT and ones generated by GPT.


## BERT 
I will first try WITHOUT personal Q

In [251]:
params = {"C": 0.1, "degree": 3, "gamma": 0.01, "kernel": "poly"}
grid = train_test_SVC(
    "datasets/transformed/spontaneousDialogueOnly_wav2vec_bert_embeddings_transformed.csv",
    hold_out=False,
    include_personal_q=False,
    grid_search=True,
    model_weights=params
)

{'C': 0.1, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 0.01, 'kernel': 'poly', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Accuracy for the 10 iterations:  [62.857142857142854, 62.857142857142854, 62.857142857142854, 62.857142857142854, 62.857142857142854, 62.857142857142854, 62.857142857142854, 62.857142857142854, 62.857142857142854]
Recall for the 10 iterations:  [19.999999999999996, 19.999999999999996, 19.999999999999996, 19.999999999999996, 19.999999999999996, 19.999999999999996, 19.999999999999996, 19.999999999999996, 19.999999999999996]
Specificity for the 10 iterations:  [90.0, 90.0, 90.0, 90.0, 90.0, 90.0, 90.0, 90.0, 90.0]
Precision for the 10 iterations:  [46.666666666666664, 46.666666666666664, 46.666666666666664, 46.666666666666664, 46.666666666666664, 46.666666666666664, 46.666666666666664, 46.666666666666664, 46.6666666

This is WITH personal questions added in

In [252]:
params = {"C": 1, "degree": 3, "gamma": 0.01, "kernel": "poly"}
grid = train_test_SVC(
    "datasets/transformed/spontaneousDialogueOnly_wav2vec_bert_embeddings_transformed.csv",
    hold_out=False,
    include_personal_q=True,
    grid_search=True,
    model_weights=params
)

{'C': 1, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 0.01, 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Accuracy for the 10 iterations:  [62.857142857142854, 62.857142857142854, 62.857142857142854, 62.857142857142854, 62.857142857142854, 62.857142857142854, 62.857142857142854, 62.857142857142854, 62.857142857142854]
Recall for the 10 iterations:  [6.666666666666666, 6.666666666666666, 6.666666666666666, 6.666666666666666, 6.666666666666666, 6.666666666666666, 6.666666666666666, 6.666666666666666, 6.666666666666666]
Specificity for the 10 iterations:  [100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0]
Precision for the 10 iterations:  [20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0]
F1  score for the 10 iterations:  [10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0]
	Average Accuracy: 62.85714285

## GPT 

In [253]:
params = {"C": 10, "degree": 3, "gamma": 1, "kernel": "rbf"}
grid = train_test_SVC(
    "datasets/transformed/spontaneousDialogueOnly_wav2vec_gpt_embeddings_transformed.csv",
    hold_out=False,
    include_personal_q=True,
    grid_search=True,
    model_weights=params
)

{'C': 10, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 1, 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Accuracy for the 10 iterations:  [60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0]
Recall for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Specificity for the 10 iterations:  [100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0]
Precision for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
F1  score for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
	Average Accuracy: 60.0 
      Average Specificity: 100.0 
      Average Recall: 0.0
      Average Precision:0.0
      Average F1 score 0.0
      


# Whisper ASR

In [254]:
params = {"C": 10, "degree": 3, "gamma": 0.01, "kernel": "rbf"}
grid = train_test_SVC(
    "datasets/transformed/spontaneousDialogueOnly_whisper_bert_embeddings_transformed.csv",
    hold_out=False,
    include_personal_q=False,
    grid_search=True,
    model_weights=params
)

{'C': 10, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 0.01, 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Accuracy for the 10 iterations:  [65.71428571428571, 65.71428571428571, 65.71428571428571, 65.71428571428571, 65.71428571428571, 65.71428571428571, 65.71428571428571, 65.71428571428571, 65.71428571428571]
Recall for the 10 iterations:  [33.33333333333333, 33.33333333333333, 33.33333333333333, 33.33333333333333, 33.33333333333333, 33.33333333333333, 33.33333333333333, 33.33333333333333, 33.33333333333333]
Specificity for the 10 iterations:  [85.0, 85.0, 85.0, 85.0, 85.0, 85.0, 85.0, 85.0, 85.0]
Precision for the 10 iterations:  [48.0, 48.0, 48.0, 48.0, 48.0, 48.0, 48.0, 48.0, 48.0]
F1  score for the 10 iterations:  [36.0, 36.0, 36.0, 36.0, 36.0, 36.0, 36.0, 36.0, 36.0]
	Average Accuracy: 65.71428571428571 
      Ave

In [258]:
params = {"C": 10, "degree": 3, "gamma": 0.01, "kernel": "rbf"}
grid = train_test_SVC(
    "datasets/transformed/spontaneousDialogueOnly_whisper_bert_embeddings_transformed.csv",
    hold_out=False,
    include_personal_q=True,
    grid_search=True,
    model_weights=params
)

{'C': 0.1, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 1, 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Accuracy for the 10 iterations:  [60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0]
Recall for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Specificity for the 10 iterations:  [100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0]
Precision for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
F1  score for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
	Average Accuracy: 60.0 
      Average Specificity: 100.0 
      Average Recall: 0.0
      Average Precision:0.0
      Average F1 score 0.0
      


In [255]:
params = {"C": 1, "degree": 3, "gamma": 1, "kernel": "rbf"}
grid = train_test_SVC(
    "datasets/transformed/spontaneousDialogueOnly_whisper_bert_sentence_embeddings_transformed.csv",
    hold_out=False,
    include_personal_q=False,
    grid_search=True,
    model_weights=params
)

{'C': 1, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 1, 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Accuracy for the 10 iterations:  [63.02460697197539, 63.02460697197539, 63.02460697197539, 63.02460697197539, 63.02460697197539, 63.02460697197539, 63.02460697197539, 63.02460697197539, 63.02460697197539]
Recall for the 10 iterations:  [2.1182266009852215, 2.1182266009852215, 2.1182266009852215, 2.1182266009852215, 2.1182266009852215, 2.1182266009852215, 2.1182266009852215, 2.1182266009852215, 2.1182266009852215]
Specificity for the 10 iterations:  [99.16666666666666, 99.16666666666666, 99.16666666666666, 99.16666666666666, 99.16666666666666, 99.16666666666666, 99.16666666666666, 99.16666666666666, 99.16666666666666]
Precision for the 10 iterations:  [33.33333333333333, 33.33333333333333, 33.33333333333333, 33.33333333

In [259]:
params = {"C": 0.1, "degree": 3, "gamma": 1, "kernel": "rbf"}
grid = train_test_SVC(
    "datasets/transformed/spontaneousDialogueOnly_whisper_gpt_embeddings_transformed.csv",
    hold_out=False,
    include_personal_q=False,
    grid_search=True,
    model_weights=params
)

{'C': 0.1, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 1, 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Accuracy for the 10 iterations:  [60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0]
Recall for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Specificity for the 10 iterations:  [100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0]
Precision for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
F1  score for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
	Average Accuracy: 60.0 
      Average Specificity: 100.0 
      Average Recall: 0.0
      Average Precision:0.0
      Average F1 score 0.0
      


In [260]:
params = {"C": 0.1, "degree": 3, "gamma": 1, "kernel": "rbf"}
grid = train_test_SVC(
    "datasets/transformed/spontaneousDialogueOnly_whisper_gpt_sentence_embeddings_transformed.csv",
    hold_out=False,
    include_personal_q=False,
    grid_search=True,
    model_weights=params
)

{'C': 0.1, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 1, 'kernel': 'rbf', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Accuracy for the 10 iterations:  [62.761449077238545, 62.761449077238545, 62.761449077238545, 62.761449077238545, 62.761449077238545, 62.761449077238545, 62.761449077238545, 62.761449077238545, 62.761449077238545]
Recall for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
Specificity for the 10 iterations:  [100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100.0]
Precision for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
F1  score for the 10 iterations:  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
	Average Accuracy: 62.761449077238545 
      Average Specificity: 100.0 
      Average Recall: 0.0
      Average Precision:0.0
      Average F1 score 0.0
      
