In [11]:
import pandas as pd
import numpy as np
import librosa
import os
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

def extract_features(file_name):
    try:
        audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
        # Extract MFCCs (Mel-frequency cepstral coefficients)
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
        # Compute the mean of MFCCs
        mfccs_scaled = np.mean(mfccs.T, axis=0)
        return mfccs_scaled
    except Exception as e:
        print(f"Error encountered while parsing file: {file_name}. Exception: {e}")
        return np.zeros(40)  # Return a zero vector if there's an error

def main():
    # Load the training data
    train_df = pd.read_csv('truth_or_lie_input.csv')

    # Extract features and labels
    features = []
    labels = []

    for index, row in train_df.iterrows():
        if row["Chunk Filename"].startswith("/Users/milanvaghani/Desktop/Unstructed Machine Learning/VP"):
            file_path = row['Chunk Filename'].replace("/Users/milanvaghani/Desktop/Unstructed Machine Learning/", "audio-files/")
        else:
            file_path = row['Chunk Filename'].replace("/Users/milanvaghani/Desktop/Unstructed Machine Learning/Audio Files", "audio-files/")

        label = row['lie']  # Assuming the column is named 'label' with boolean values True or False
        data = extract_features(file_path)
        features.append(data)
        labels.append(label)

    # Convert boolean labels to integers: True -> 1, False -> 0
    y = np.array([int(label) for label in labels])

    # Convert features to numpy array
    X = np.array(features)

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify = y
    )

    # Scale the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)

    # Train the model
    model = SVC(kernel='linear', class_weight={0: 1, 1: 8}, probability=True)
    model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_val)
    print("Classification Report:")
    print(
        classification_report(
            y_val, y_pred, target_names=['False', 'True']
        )
    )
    print("Confusion Matrix:")
    print(confusion_matrix(y_val, y_pred))


if __name__ == "__main__":
    main()

Classification Report:
              precision    recall  f1-score   support

       False       0.97      0.82      0.89       101
        True       0.18      0.57      0.28         7

    accuracy                           0.81       108
   macro avg       0.57      0.70      0.58       108
weighted avg       0.91      0.81      0.85       108

Confusion Matrix:
[[83 18]
 [ 3  4]]


In [12]:
# Kamala Harris truth/ lie prediction:
def main():
    # Load the training data
    train_df = pd.read_csv('truth_or_lie_input.csv')

    # Extract features and labels
    features = []
    labels = []

    for index, row in train_df.iterrows():
        if row["Chunk Filename"].startswith("/Users/milanvaghani/Desktop/Unstructed Machine Learning/VP"):
            file_path = row['Chunk Filename'].replace("/Users/milanvaghani/Desktop/Unstructed Machine Learning/", "audio-files/")
        elif row["Chunk Filename"].startswith("/Users/milanvaghani/Desktop/Unstructed Machine Learning/Audio Files/Vice"):
            file_path = row['Chunk Filename'].replace("/Users/milanvaghani/Desktop/Unstructed Machine Learning/Audio Files/Vice", "audio-files/Vice")
        else:
            continue

        label = row['lie']  # Assuming the column is named 'label' with boolean values True or False
        data = extract_features(file_path)
        features.append(data)
        labels.append(label)

    # Convert boolean labels to integers: True -> 1, False -> 0
    y = np.array([int(label) for label in labels])

    # Convert features to numpy array
    X = np.array(features)

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    # Scale the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)

    # Train the model
    model = SVC(kernel='linear', class_weight={0: 1, 1: 10}, probability=True)
    model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_val)
    print("Classification Report:")
    print(
        classification_report(
            y_val, y_pred, target_names=['False', 'True']
        )
    )
    print("Kamala Harris Confusion Matrix:")
    print(confusion_matrix(y_val, y_pred))


if __name__ == "__main__":
    main()

Classification Report:
              precision    recall  f1-score   support

       False       0.93      0.88      0.90        32
        True       0.20      0.33      0.25         3

    accuracy                           0.83        35
   macro avg       0.57      0.60      0.58        35
weighted avg       0.87      0.83      0.85        35

Kamala Harris Confusion Matrix:
[[28  4]
 [ 2  1]]


In [13]:
# Trump lie/ truth prediction:

def main():
    # Load the training data
    train_df = pd.read_csv('truth_or_lie_input.csv')

    # Extract features and labels
    features = []
    labels = []

    for index, row in train_df.iterrows():
        if row["Chunk Filename"].startswith("/Users/milanvaghani/Desktop/Unstructed Machine Learning/VP"):
            continue
        elif row["Chunk Filename"].startswith("/Users/milanvaghani/Desktop/Unstructed Machine Learning/Audio Files/Vice"):
            continue
        else:
            file_path = row['Chunk Filename'].replace("/Users/milanvaghani/Desktop/Unstructed Machine Learning/Audio Files/", "audio-files/")

        label = row['lie']  # Assuming the column is named 'label' with boolean values True or False
        data = extract_features(file_path)
        features.append(data)
        labels.append(label)

    # Convert boolean labels to integers: True -> 1, False -> 0
    y = np.array([int(label) for label in labels])

    # Convert features to numpy array
    X = np.array(features)

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=0.2, random_state=4, stratify = y
    )

    # Scale the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)

    # Train the model
    model = SVC(kernel='linear', class_weight={0: 1, 1: 7}, probability=True)
    model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_val)
    print("Classification Report:")
    print(
        classification_report(
            y_val, y_pred, target_names=['False', 'True']
        )
    )
    print("Donald Trump Confusion Matrix:")
    print(confusion_matrix(y_val, y_pred))


if __name__ == "__main__":
    main()

Classification Report:
              precision    recall  f1-score   support

       False       0.94      0.89      0.91        70
        True       0.00      0.00      0.00         4

    accuracy                           0.84        74
   macro avg       0.47      0.44      0.46        74
weighted avg       0.89      0.84      0.86        74

Donald Trump Confusion Matrix:
[[62  8]
 [ 4  0]]
