In [None]:
!pip install sentence-transformers

In [9]:
import json
import csv
from sentence_transformers import SentenceTransformer, util

keywords = ['wildfire', 'avalanche', 'blizzard', 'heatwave', 'earthquake', 'flood', 'hurricane', 'drought', 'tsunami', 'landslide', 'tornado', 'volcano']

with open('disaster_news.json', 'r', encoding='utf-8') as f:
    articles = json.load(f)

model = SentenceTransformer('all-MiniLM-L6-v2')
keyword_embeddings = model.encode(keywords, convert_to_tensor=True)

classified_rows = []

for article in articles:
    title = article['title']
    title_embedding = model.encode(title, convert_to_tensor=True)

    cosine_scores = util.cos_sim(title_embedding, keyword_embeddings)[0]
    best_index = cosine_scores.argmax().item()
    best_score = cosine_scores[best_index].item()

    if best_score >= 0.3:
        best_keyword = keywords[best_index]
    else:
        best_keyword = 'no_category'

    classified_rows.append({
        'title': title,
        'author': article.get('author'),
        'date': article.get('date'),
        'matched_keyword': best_keyword,
        'similarity_score': round(best_score, 4),
        'content': article.get('content')
    })

csv_file = 'classified_disaster_news.csv'
json_file = 'classified_disaster_news.json'

with open(csv_file, 'w', encoding='utf-8', newline='') as f:
    writer = csv.DictWriter(f, fieldnames=classified_rows[0].keys())
    writer.writeheader()
    writer.writerows(classified_rows)

with open(json_file, 'w', encoding='utf-8') as f:
    json.dump(classified_rows, f, ensure_ascii=False, indent=4)

print(f"Classified and saved {len(classified_rows)} articles to '{csv_file}' and '{json_file}'")


Classified and saved 42 articles to 'classified_disaster_news.csv' and 'classified_disaster_news.json'
