In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score
import pandas as pd

In [None]:

# Load the data
file_path = '/mnt/data/train.tsv'
data = pd.read_csv(file_path, sep='\t')

# Extract text and labels (adjust column names as per your dataset)
data = data.rename(columns={data.columns[1]: "label", data.columns[2]: "text"})
data = data[["text", "label"]]

# Map labels to binary values (customize mapping based on the data)
label_mapping = {
    "false": 0,
    "half-true": 0,
    "mostly-true": 1,
    "true": 1
}
data["label"] = data["label"].map(label_mapping)

# Drop rows with missing labels
data = data.dropna()

# Split the data into features and target
X = data["text"]
y = data["label"]

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert text data to Bag-of-Words representation
vectorizer = CountVectorizer(stop_words="english", max_features=5000)
X_train_bow = vectorizer.fit_transform(X_train)
X_test_bow = vectorizer.transform(X_test)

# Train logistic regression
model = LogisticRegression()
model.fit(X_train_bow, y_train)

# Predict on test data
y_pred = model.predict(X_test_bow)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
