In [13]:
import pandas as pd
import numpy as np
import librosa
import os
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix

def extract_features(file_name):
    try:
        audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
        # Extract MFCCs (Mel-frequency cepstral coefficients)
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
        # Compute the mean of MFCCs
        mfccs_scaled = np.mean(mfccs.T, axis=0)
        return mfccs_scaled
    except Exception as e:
        print(f"Error encountered while parsing file: {file_name}. Exception: {e}")
        return np.zeros(40)  # Return a zero vector if there's an error

def main():
    # Load the training data
    train_df = pd.read_csv('truth_or_lie_input.csv')

    # Extract features and labels
    features = []
    labels = []

    for index, row in train_df.iterrows():
        if row["Chunk Filename"].startswith("/Users/milanvaghani/Desktop/Unstructed Machine Learning/VP"):
            file_path = row['Chunk Filename'].replace("/Users/milanvaghani/Desktop/Unstructed Machine Learning/", "audio-files/")
        else:
            file_path = row['Chunk Filename'].replace("/Users/milanvaghani/Desktop/Unstructed Machine Learning/Audio Files", "audio-files/")

        label = row['lie']  # Assuming the column is named 'label' with boolean values True or False
        data = extract_features(file_path)
        features.append(data)
        labels.append(label)

    # Convert boolean labels to integers: True -> 1, False -> 0
    y = np.array([int(label) for label in labels])

    # Convert features to numpy array
    X = np.array(features)

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=0.35, random_state=42, stratify = y
    )

    # Scale the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)

    # Train the model
    model = SVC(kernel='linear', class_weight={0: 1, 1: 7}, probability=True)
    model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_val)
    print("Classification Report:")
    print(
        classification_report(
            y_val, y_pred, target_names=['False', 'True']
        )
    )
    print("Confusion Matrix:")
    print(confusion_matrix(y_val, y_pred))


if __name__ == "__main__":
    main()

  audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error encountered while parsing file: audio-files/VP Kamala Harris full speech at 2024 DNC (Aug. 22, 2024)_chunk0.wav. Exception: [Errno 2] No such file or directory: 'audio-files/VP Kamala Harris full speech at 2024 DNC (Aug. 22, 2024)_chunk0.wav'
Error encountered while parsing file: audio-files/VP Kamala Harris full speech at 2024 DNC (Aug. 22, 2024)_chunk1.wav. Exception: [Errno 2] No such file or directory: 'audio-files/VP Kamala Harris full speech at 2024 DNC (Aug. 22, 2024)_chunk1.wav'
Error encountered while parsing file: audio-files/VP Kamala Harris full speech at 2024 DNC (Aug. 22, 2024)_chunk2.wav. Exception: [Errno 2] No such file or directory: 'audio-files/VP Kamala Harris full speech at 2024 DNC (Aug. 22, 2024)_chunk2.wav'
Error encountered while parsing file: audio-files/VP Kamala Harris full speech at 2024 DNC (Aug. 22, 2024)_chunk3.wav. Exception: [Errno 2] No such file or directory: 'audio-files/VP Kamala Harris full speech at 2024 DNC (Aug. 22, 2024)_chunk3.wav'
Erro

In [14]:
# Kamala Harris truth/ lie prediction:
def main():
    # Load the training data
    train_df = pd.read_csv('truth_or_lie_input.csv')

    # Extract features and labels
    features = []
    labels = []

    for index, row in train_df.iterrows():
        if row["Chunk Filename"].startswith("/Users/milanvaghani/Desktop/Unstructed Machine Learning/VP"):
            file_path = row['Chunk Filename'].replace("/Users/milanvaghani/Desktop/Unstructed Machine Learning/", "audio-files/")
        elif row["Chunk Filename"].startswith("/Users/milanvaghani/Desktop/Unstructed Machine Learning/Audio Files/Vice"):
            file_path = row['Chunk Filename'].replace("/Users/milanvaghani/Desktop/Unstructed Machine Learning/Audio Files/Vice", "audio-files/Vice")
        else:
            continue

        label = row['lie']  # Assuming the column is named 'label' with boolean values True or False
        data = extract_features(file_path)
        features.append(data)
        labels.append(label)

    # Convert boolean labels to integers: True -> 1, False -> 0
    y = np.array([int(label) for label in labels])

    # Convert features to numpy array
    X = np.array(features)

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=0.35, random_state=42, stratify=y
    )

    # Scale the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)

    # Train the model
    model = SVC(kernel='linear', class_weight={0: 1, 1: 7}, probability=True)
    model.fit(X_train, y_train)

    # Evaluate the model
    y_pred = model.predict(X_val)
    print("Classification Report:")
    print(
        classification_report(
            y_val, y_pred, target_names=['False', 'True']
        )
    )
    print("Kamala Harris Confusion Matrix:")
    print(confusion_matrix(y_val, y_pred))


if __name__ == "__main__":
    main()

  audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error encountered while parsing file: audio-files/VP Kamala Harris full speech at 2024 DNC (Aug. 22, 2024)_chunk0.wav. Exception: [Errno 2] No such file or directory: 'audio-files/VP Kamala Harris full speech at 2024 DNC (Aug. 22, 2024)_chunk0.wav'
Error encountered while parsing file: audio-files/VP Kamala Harris full speech at 2024 DNC (Aug. 22, 2024)_chunk1.wav. Exception: [Errno 2] No such file or directory: 'audio-files/VP Kamala Harris full speech at 2024 DNC (Aug. 22, 2024)_chunk1.wav'
Error encountered while parsing file: audio-files/VP Kamala Harris full speech at 2024 DNC (Aug. 22, 2024)_chunk2.wav. Exception: [Errno 2] No such file or directory: 'audio-files/VP Kamala Harris full speech at 2024 DNC (Aug. 22, 2024)_chunk2.wav'
Error encountered while parsing file: audio-files/VP Kamala Harris full speech at 2024 DNC (Aug. 22, 2024)_chunk3.wav. Exception: [Errno 2] No such file or directory: 'audio-files/VP Kamala Harris full speech at 2024 DNC (Aug. 22, 2024)_chunk3.wav'
Erro

In [15]:
# Trump lie/ truth prediction:

import pandas as pd
import numpy as np
import librosa
import os
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE

def extract_features(file_name):
    try:
        audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
        # Extract MFCCs (Mel-frequency cepstral coefficients)
        mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
        # Compute the mean of MFCCs
        mfccs_scaled = np.mean(mfccs.T, axis=0)
        return mfccs_scaled
    except Exception as e:
        print(f"Error encountered while parsing file: {file_name}. Exception: {e}")
        return np.zeros(40)  # Return a zero vector if there's an error

def main():
    # Load the training data
    train_df = pd.read_csv('truth_or_lie_input.csv')

    # Extract features and labels
    features = []
    labels = []

    for index, row in train_df.iterrows():
        if row["Chunk Filename"].startswith("/Users/milanvaghani/Desktop/Unstructed Machine Learning/VP"):
            continue
        elif row["Chunk Filename"].startswith("/Users/milanvaghani/Desktop/Unstructed Machine Learning/Audio Files/Vice"):
            continue
        else:
            file_path = row['Chunk Filename'].replace("/Users/milanvaghani/Desktop/Unstructed Machine Learning/Audio Files/", "audio-files/")

        label = row['lie']  # Assuming the column is named 'label' with boolean values True or False
        data = extract_features(file_path)
        features.append(data)
        labels.append(label)

    # Convert boolean labels to integers: True -> 1, False -> 0
    y = np.array([int(label) for label in labels])

    # Convert features to numpy array
    X = np.array(features)

    # Split the dataset
    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=0.35, random_state=4, stratify = y
    )

    # Scale the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_val = scaler.transform(X_val)


    # Train the model
    model = SVC(kernel='linear', class_weight={0: 1, 1: 7}, probability=True)
    model.fit(X_train, y_train)
    

    # Evaluate the model
    y_pred = model.predict(X_val)
    print("Classification Report:")
    print(
        classification_report(
            y_val, y_pred, target_names=['False', 'True']
        )
    )
    print("Donald Trump Confusion Matrix:")
    print(confusion_matrix(y_val, y_pred))


if __name__ == "__main__":
    main()

  audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Error encountered while parsing file: audio-files/Donald Trump full speech at Pennsylvania rally (October 5, 2024)_chunk0.wav. Exception: [Errno 2] No such file or directory: 'audio-files/Donald Trump full speech at Pennsylvania rally (October 5, 2024)_chunk0.wav'
Error encountered while parsing file: audio-files/Donald Trump full speech at Pennsylvania rally (October 5, 2024)_chunk1.wav. Exception: [Errno 2] No such file or directory: 'audio-files/Donald Trump full speech at Pennsylvania rally (October 5, 2024)_chunk1.wav'
Error encountered while parsing file: audio-files/Donald Trump full speech at Pennsylvania rally (October 5, 2024)_chunk2.wav. Exception: [Errno 2] No such file or directory: 'audio-files/Donald Trump full speech at Pennsylvania rally (October 5, 2024)_chunk2.wav'
Error encountered while parsing file: audio-files/Donald Trump full speech at Pennsylvania rally (October 5, 2024)_chunk3.wav. Exception: [Errno 2] No such file or directory: 'audio-files/Donald Trump full