# Import Required Libraries
Import the necessary libraries, including scikit-learn, pandas, and numpy.

In [1]:
# Import necessary libraries
import pandas as pd # for data manipulation and analysis
import numpy as np # for numerical operations
from sklearn.feature_extraction.text import CountVectorizer # for text feature extraction
from sklearn.model_selection import train_test_split # for splitting data into training and testing sets
from sklearn.linear_model import LogisticRegression # for building the logistic regression model
from sklearn.metrics import accuracy_score # for evaluating the model's accuracy

# Load and Preprocess Data
Load the dataset and preprocess the data by cleaning, transforming, and encoding it.

In [2]:
# Load the dataset
data = pd.read_csv('lie_detection_dataset.csv')

# Clean the data
data.dropna(inplace=True) # drop any rows with missing values
data.reset_index(drop=True, inplace=True) # reset the index after dropping rows
data['text'] = data['text'].str.lower() # convert text to lowercase

# Transform the data
vectorizer = CountVectorizer(stop_words='english') # initialize the vectorizer with English stop words
X = vectorizer.fit_transform(data['text']) # transform the text into a matrix of token counts
y = data['label'] # set the labels as the target variable

# Encode the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # split the data into training and testing sets
y_train = np.where(y_train=='lie', 1, 0) # encode the labels as binary values (0 for truthful, 1 for deceptive)
y_test = np.where(y_test=='lie', 1, 0) # encode the labels as binary values (0 for truthful, 1 for deceptive)

FileNotFoundError: [Errno 2] No such file or directory: 'lie_detection_dataset.csv'

# Split Data into Training and Testing Sets
Split the preprocessed data into training and testing sets using the train_test_split() function.

In [None]:
# Split the preprocessed data into training and testing sets using the train_test_split() function
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Model
Train the lie detection model using a suitable algorithm, such as logistic regression or support vector machines.

In [None]:
# Train the lie detection model using logistic regression
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Evaluate the Model
Evaluate the performance of the model using metrics such as accuracy, precision, recall, and F1 score.

In [None]:
# Import necessary libraries
from sklearn.metrics import precision_score, recall_score, f1_score

# Evaluate the model's precision, recall, and F1 score
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Make Predictions
Use the trained model to make predictions on new data and interpret the results.

In [None]:
# Use the trained model to make predictions on new data
new_data = ["I always tell the truth", "I never lie"]
new_data_transformed = vectorizer.transform(new_data) # transform the new data using the same vectorizer
new_data_pred = model.predict(new_data_transformed) # make predictions on the new data

# Interpret the results
for i in range(len(new_data)):
    if new_data_pred[i] == 0:
        print(f"'{new_data[i]}' is predicted to be truthful.")
    else:
        print(f"'{new_data[i]}' is predicted to be deceptive.")