# Generate Docs

## Imports and definitions

In [1]:
import pandas as pd
from utils import create_folder, cleanup_folder, human_to_kebab_case
from doc_page import TagDocPage, PostQuestionDocPage

In [2]:
output_dir = 'output'
n_questions_per_tag = 5
redundant_tags = ['android']
eval_answer = lambda answer_row : answer_row.score

## Create DataFrames

In [3]:
comments_df = pd.read_csv('data/comments.csv')
posts_questions_df = pd.read_csv('data/posts_questions.csv').drop_duplicates(subset=['id'])
posts_answers_df = pd.read_csv('data/posts_answers.csv')
posts_tag_wiki_df = pd.read_csv('data/posts_tag_wiki.csv')
posts_tag_wiki_excerpt_df = pd.read_csv('data/posts_tag_wiki_excerpt.csv')
selected_tags_df = pd.read_csv('data/selected_tags.csv')
tags_df = pd.read_csv('data/tags.csv')
users_df = pd.read_csv('data/users.csv')

## Development

### Prepare files and variables

In [4]:
cleanup_folder(output_dir)
questions_for_docs = []

### Generate docs for tags

Docs for tags are based on the `posts_tag_wiki` table from Stackoverflow. Content should be a long description for what each tag represents as well as pointing out relevant complementary docs.

In [5]:
for idx, row in selected_tags_df.iterrows():
    tag_id = row.id
    tag_name = row.tag_name
    tag_description = posts_tag_wiki_excerpt_df.loc[posts_tag_wiki_excerpt_df.id == row.excerpt_post_id]['body'].values[0]
    tag_wiki_body = posts_tag_wiki_df.loc[posts_tag_wiki_df.id == row.wiki_post_id]['body'].values[0]
    # save tag doc page
    TagDocPage(tag_name, tag_description, tag_wiki_body).save(f"{output_dir}/{tag_name}.md")

    # prepare post questions for tag
    create_folder(f"{output_dir}/{tag_name}")
    tag_questions_df = posts_questions_df[posts_questions_df.tag_id == tag_id].sort_values(by='question_view_count_rank').head(n_questions_per_tag)
    questions_for_docs.append(tag_questions_df)

### Generate docs for questions

In [6]:
questions_for_docs_df = pd.concat(questions_for_docs)

for idx, row in questions_for_docs_df.iterrows():
    parent_tag_name = row.tag_name
    question_title = row.title
    question_body = row.body
    question_tags = [tag for tag in row.tags.split('|') if tag not in redundant_tags and tag != row.tag_name]
    
    # select answer
    question_answers_df = posts_answers_df[posts_answers_df.parent_id == row.id].copy()
    question_answers_df['eval'] = question_answers_df.apply(eval_answer, axis=1)
    selected_answer = question_answers_df.sort_values(by='eval', ascending=False).iloc[0]
    answer_body = selected_answer.body

    # save post question doc page
    output_file_name = human_to_kebab_case(question_title)
    PostQuestionDocPage(question_title, question_body, question_tags, answer_body).save(f"{output_dir}/{parent_tag_name}/{output_file_name}.md")