In [1]:
import os
import sys
import pandas as pd
import argparse
from datetime import datetime
import pathlib

# # Setup Django environment
# # Alternative 1: Use absolute path construction
# current_dir = os.path.abspath('')
# parent_dir = os.path.dirname(current_dir)
# sys.path.insert(0, parent_dir)

# # Alternative 2: Use pathlib for more modern path handling
# parent_path = pathlib.Path().absolute().parent
# sys.path.insert(0, str(parent_path))

# 新增：將上一層目錄加入 sys.path
parent_path = pathlib.Path().absolute().parent
sys.path.insert(0, str(parent_path))

os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'website_configs.settings')
import django
django.setup()
# 重要：設定環境變數以允許在 Jupyter 的異步環境中執行同步操作
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

# Now we can import Django models
from app_user_keyword_db.models import NewsData

In [5]:
# Read CSV file
csv_file_path = '..\\app_sentiment\\dataset\\ithome_news_200_preprocessed.csv'
df = pd.read_csv(csv_file_path, sep='|')

# Process each row and create a NewsData object
for idx, row in df.iterrows():
    try:
        # Convert date string to datetime object
        date_obj = datetime.strptime(row['timestamp'], '%Y-%m-%d').date()

        # Create or update NewsData object
        news_data, created = NewsData.objects.update_or_create(
            id=row['id'],
            defaults={
                'timestamp': date_obj,
                'category': row['category'],
                'title': row['title'],
                'content': row['content'],
                'sentiment': row['sentiment'],
                #'summary': row['summary'],
                'top_key_freq': row['top_key_freq'],
                'tokens': row['tokens'],
                'tokens_v2': row['tokens_v2'],
                'entities': row['entities'],
                'token_pos': row['token_pos'],
                'link': row['link'],
                'photo': row['photo'] if row['photo'] != "" and not pd.isna(row['photo']) else None,
            }
        )
        if created:
            print(f"Created new NewsData object with id: {row['id']}")
        else:
            print(f"Updated existing NewsData object with id: {row['id']}")
    except Exception as e:
        print(f"Error at row {idx}: {e}")
        print(row)
# photo 欄位的值可能為以下幾種情況：
# 實際有值的 URL 字串
# 空字串 ("")
# Pandas NaN 值（當 CSV 檔案中該欄位為空時）
# None 值        

Created new NewsData object with id: ithome_AI_20250307_1
Created new NewsData object with id: ithome_AI_20250307_2
Created new NewsData object with id: ithome_AI_20250307_3
Created new NewsData object with id: ithome_AI_20250304_4
Created new NewsData object with id: ithome_雲端_20250306_1
Created new NewsData object with id: ithome_雲端_20250306_2
Created new NewsData object with id: ithome_雲端_20250305_3
Created new NewsData object with id: ithome_雲端_20250304_4
Created new NewsData object with id: ithome_資安_20250307_1
Created new NewsData object with id: ithome_資安_20250307_2
Created new NewsData object with id: ithome_資安_20250307_3
Created new NewsData object with id: ithome_資安_20250307_4
