In [6]:
import numpy as np
import pandas as pd
import xmltodict

class PageDataContainer:

    def __init__(self, i, df_thread, anotation_master):
        #ページタイトルに使用するindex
        self.id = i
        #title_idxから"Q&A-xxxx"を設定
        self.title = f'Q&A-{i:04d}' 
        #質問したチャンネル名
        self.question_channel = df_thread['channel_name'][0]
        #質問した日付
        self.question_date = df_thread['thread_ts'][0][:10]
        #質問したメンバーの表示名のタプル
        #回答したメンバーの表示名のタプル
        if len(df_thread[df_thread.reply_num == 0]) == 0:
            self.question_members = '',
            self.answer_members = tuple(user for user in df_thread.user_name.unique())
        else:
            self.question_members = tuple(user for user in df_thread[df_thread.reply_num == 0].user_name)
            self.answer_members = tuple(user for user in df_thread.user_name.unique() 
                                                 if user != df_thread[df_thread.reply_num == 0].user_name[0])
        #会話の中から単語リストに該当する単語のタプル。出現順位順
        cnt_dict = {word: " ".join(df_thread.talk_text.to_list()).count(word) for word in anotation_master.values()}
        self.tech_topics = tuple(word_tuple[0] for word_tuple in sorted({k: v for k, v in cnt_dict.items()
                                                   if v != 0}.items(), key=lambda x:x[1], reverse=True))
        #質問本文
        self.question_contains = tuple(text for text in df_thread[df_thread.reply_num == 0].talk_text)
        #回答本文
        self.answer_contains = tuple(text for text in df_thread[df_thread.reply_num != 0].talk_text)

In [30]:
def setup():

    def create_user_master(df) -> {('user_id', 'target_date'): 'user_name'}:
        user_master = dict(zip(tuple(zip(df['user_id'], df['target_date'])), df['user_name']))
        return user_master

    def create_anotation_dict(df) -> {'keyword': 'property'}:
        anotation_master = dict(zip(df['keyword'], df['property']))
        return anotation_master
    
    user_master = create_user_master(pd.read_csv(r'../csv/user_name_master.csv'))
    
    anotation_master = create_anotation_dict(pd.read_csv(r'../csv/annotation_master.csv'))
    
    with open(r'../template/import-template.xml', encoding='utf-8') as xml:
        output_template = xmltodict.parse(xml.read())
        
    return(user_master, anotation_master, output_template)
    
def df_to_container(i, df, anotation):
    return PageDataContainer(i, df, anotation)

def container_to_dict(container):
    container_dict = {}
    container_dict['id'] = container.id
    container_dict['title'] = container.title
    container_dict['question_channel'] = container.question_channel
    container_dict['question_date'] = container.question_date
    container_dict['question_date'] = container.question_date
    container_dict['question_members'] = container.question_date
    container_dict['answer_members'] = container.answer_members
    container_dict['title'] = container.title
    container_dict['tech_topics'] = container.tech_topics
    container_dict['question_contains'] = container.question_contains
    container_dict['answer_contains'] = container.answer_contains
    return container_dict

def dict_to_xml(dict):
    #templateのxmlをどう書き替えたらいいのかわからないので書けず
    pass
    
def main(input_path, output_xml_filename, output_folder_path):
    df_talks = pd.read_csv(input_path)
    
    user_master, anotation_master, output_template = setup()

    container_list = []     #PageDataContainerを格納するリスト

    output_dict_list = []   #出力dictを格納するリスト どう書けばよいかよくわからない

    num_of_pages_in_xml = 1 #1xmlファイルあたりのページ数 どう使うかよくわからない
    
    thread_ts_list = df_talks['thread_ts'].unique().tolist()
    for i, ts in enumerate(thread_ts_list):
        df_thread = df_talks[df_talks['thread_ts']==ts].reset_index()
        container_list.append(df_to_container(i, df_thread, anotation_master))
    
    for i, container in enumerate(container_list):
        output_dict_list.append(container_to_dict(container))
        #print(output_dict_list[i])

In [31]:
if __name__ == '__main__':
    input_path = r'../csv/question_talk_data.csv'
    output_xml_filename = r'output_xml.xml'
    output_folder_path = r'../xml/'
    main(input_path, output_xml_filename, output_folder_path)