In [None]:
import json
import os
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import svm
from sklearn.metrics import classification_report

def load_data(folder):
    data = []
    for filename in os.listdir(folder):
        if filename.endswith('.json'):
            with open(os.path.join(folder, filename)) as f:
                file_data = json.load(f)
                for article in file_data['articles']:
                    data.append([article['title'] + ' ' + article['content'], file_data['label_text']])
    return pd.DataFrame(data, columns=['text', 'label'])

# Load training and test data
train_folder = '~/data/task_4/train_json/'  
test_folder = '~/data/task_4/dev_json/'  
df_train = load_data(train_folder)
df_test = load_data(test_folder)

# Feature extraction
vectorizer = TfidfVectorizer()
X_train = vectorizer.fit_transform(df_train['text'])
y_train = df_train['label']
X_test = vectorizer.transform(df_test['text'])
y_test = df_test['label']

# Train model
clf = svm.SVC()
clf.fit(X_train, y_train)

# Evaluate model
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

        high       0.61      1.00      0.76       663
         low       0.33      0.02      0.03       126
       mixed       0.83      0.02      0.03       304

    accuracy                           0.61      1093
   macro avg       0.59      0.34      0.27      1093
weighted avg       0.64      0.61      0.47      1093

