# Json 轉 excel (原始資料)

In [72]:
import pandas as pd
import json

def extract_top_level_fields(json_data):
    articles = json_data.get('articles', [])
    data = []
    for article in articles:
        try:
            top_level_fields = {key: article.get(key, '') for key in article.keys()}
            data.append(top_level_fields)
        except Exception as e:
            print(f"An error occurred with article: {article.get('article_id', 'Unknown')}, error: {e}")
            # 忽略當前文章，繼續處理下一篇文章
            continue
    return data

# 讀取 JSON 文件
with open('data/HatePolitics-4001-4100.json', 'r', encoding='utf-8') as file:
    json_data = json.load(file)


# 提取數據
data = extract_top_level_fields(json_data)
df = pd.DataFrame(data)

# 儲存為 CSV 文件
csv_file_path = 'raw.csv'
df.to_csv(csv_file_path,mode='a', index=False)


# 將messages中的值全部拆開

In [76]:
import pandas as pd

def extract_complete_data(json_data):
    
    # Create a list to hold the complete data (article fields + messages)
    complete_data = []
    
    for article in json_data.get('articles', []):
        
        try:
            # Extracting basic article information
            article_info = {
                'article_id': article.get('article_id', ''),
                'article_title': article.get('article_title', ''),
                'author': article.get('author', ''),
                'content': article.get('content', ''),
                'date': article.get('date', ''),
                'ip': article.get('ip', '')
            }

            # Extract messages data
            for message in article.get('messages', []):

                # Merging article info with each message
                message_data = {**article_info, **message}
                complete_data.append(message_data)
                
        except Exception as e:
            print(f"An error occurred with article: {article.get('article_id', 'Unknown')}, error: {e}")
            # 忽略當前文章，繼續處理下一篇文章
            continue
        
        

    return complete_data


with open('data/HatePolitics-801-900.json', 'r', encoding='utf-8') as file:
    json_data = json.load(file)

data = extract_complete_data(json_data)
df = pd.DataFrame(data)

# 儲存為 CSV 文件
csv_file_path = 'message_2.csv'
df.to_csv(csv_file_path,mode='a', index=False)


# 將message_count中的值全部拆開

In [75]:
import pandas as pd
import json  # Import the json module

def extract_complete_data_with_message_count(json_data):
    complete_data = []
    for article in json_data.get('articles', []):
        try:
            article_info = {
                'article_id': article.get('article_id', ''),
                'article_title': article.get('article_title', ''),
                'author': article.get('author', ''),
                'content': article.get('content', ''),
                'date': article.get('date', ''),
                'ip': article.get('ip', '')
            }

            message_count = article.get('message_count', {})
            expanded_message_count = {
                f'message_count_{key}': message_count.get(key, 0) for key in message_count
            }

            complete_article_data = {**article_info, **expanded_message_count}
            complete_data.append(complete_article_data)

        except Exception as e:
            print(f"An error occurred with article: {article.get('article_id', 'Unknown')}, error: {e}")
            # Continue to the next article after logging the error
            continue

    return complete_data

# Load JSON data
with open('data/HatePolitics-4001-4100.json', 'r', encoding='utf-8') as file:
    json_data = json.load(file)

# Call the correct function
data = extract_complete_data_with_message_count(json_data)
df = pd.DataFrame(data)

# Save as CSV
csv_file_path = 'message_count.csv'
df.to_csv(csv_file_path, mode='a', index=False)
