In [30]:
import os
import sys
import pandas as pd
import argparse
from datetime import datetime
import pathlib

# # Setup Django environment
# # Alternative 1: Use absolute path construction
# current_dir = os.path.abspath('')
# parent_dir = os.path.dirname(current_dir)
# sys.path.insert(0, parent_dir)

# # Alternative 2: Use pathlib for more modern path handling
# parent_path = pathlib.Path().absolute().parent
# sys.path.insert(0, str(parent_path))

# 新增：將上一層目錄加入 sys.path
parent_path = pathlib.Path().absolute().parent
sys.path.insert(0, str(parent_path))

os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'website_configs.settings')
import django
django.setup()
# 重要：設定環境變數以允許在 Jupyter 的異步環境中執行同步操作
os.environ["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"

# Now we can import Django models
from app_top_person_db.models import TopPerson

In [31]:
# Read CSV file
csv_file_path = '../app_top_person/dataset/news_top_person_by_category_via_ner.csv'
# csv_file_path = '../app_user_keyword/dataset/cna_news_preprocessed_12weeks.csv'
df = pd.read_csv(csv_file_path, sep=',')
print(df.columns)



Index(['category', 'top_keys'], dtype='object')


In [26]:
df['category'] = df['category'].str.strip()



In [28]:
print(df['category'])

0       即時
1     娛樂影劇
2       國際
3       政治
4     社會地方
5       財經
6       運動
7       玩樂
8       品味
9     遊戲3C
10      全部
Name: category, dtype: object


In [35]:
# Process each category separately
for category in df['category'].unique():
    # Filter the dataframe for the specific category
    category_df = df[df['category'] == category]

    try:
        # Iterate through each row in the filtered category dataframe
        for idx, row in category_df.iterrows():
            # Create or update TopPerson object based on category
            news_data, created = TopPerson.objects.update_or_create(
                category=category,  # Ensure category is used as filter
                defaults={
                    'top_keys': row['top_keys'],
                }
            )
            if created:
                print(f"Created new TopPerson object for category: {category}")
            else:
                print(f"Updated existing TopPerson object for category: {category}")
    except Exception as e:
        print(f"Error processing category {category} at row {idx}: {e}")
        print(row)


Created new TopPerson object for category: 即時
Created new TopPerson object for category: 娛樂影劇
Created new TopPerson object for category: 國際
Created new TopPerson object for category: 政治
Created new TopPerson object for category: 社會地方
Created new TopPerson object for category: 財經
Updated existing TopPerson object for category: 運動
Created new TopPerson object for category: 玩樂
Created new TopPerson object for category: 品味
Created new TopPerson object for category: 遊戲3C
Created new TopPerson object for category: 全部


In [34]:
created

False