# Generate Docs

## Imports and definitions

In [1]:
import statistics
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from utils import create_folder, cleanup_folder, human_to_kebab_case
from comment_classifier.utils import preprocess_comment
from comment_classifier.sentence_scorer import SentenceScorer
from doc_page import TagDocPage, PostQuestionDocPage
import joblib

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/gabriel.dutradias/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/gabriel.dutradias/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
output_dir = 'output'
n_questions_per_tag = 15
redundant_tags = ['android']

## Create DataFrames

In [3]:
comments_df = pd.read_csv('data/comments.csv')
posts_questions_df = pd.read_csv('data/posts_questions.csv').drop_duplicates(subset=['id'])
posts_answers_df = pd.read_csv('data/posts_answers.csv')
posts_tag_wiki_df = pd.read_csv('data/posts_tag_wiki.csv')
posts_tag_wiki_excerpt_df = pd.read_csv('data/posts_tag_wiki_excerpt.csv')
selected_tags_df = pd.read_csv('data/selected_tags.csv')
tags_df = pd.read_csv('data/tags.csv')
users_df = pd.read_csv('data/users.csv')

## Development

### Evaluation functions

In [4]:
# pre-compute important values for evaluation

# add user reputation to post answers
posts_answers_with_user_df = pd.merge(left=posts_answers_df, right=users_df[['reputation', 'id']].add_prefix('user_'), left_on='owner_user_id', right_on='user_id')

# scaler
scaler = MinMaxScaler()

In [5]:
def add_scaled_fields_to_questions(questions_df):
    questions_df[['scaled_view_count', 'scaled_score']] = scaler.fit_transform(questions_df[['view_count', 'score']])

def add_scaled_fields_to_answers(answers_df):
    answers_df[['scaled_score', 'scaled_user_reputation']] = scaler.fit_transform(answers_df[['score', 'user_reputation']])

def eval_answer(answer_row):
    return statistics.mean([answer_row.scaled_score, answer_row.scaled_user_reputation])

def get_eval_question(answers_df):
    def eval_question(question_row):
        answer_rows = answers_df.loc[answers_df.parent_id == question_row.id].sort_values(by='eval', ascending=False)
        if len(answer_rows) == 0:
            return 0
        answer_row = answer_rows.iloc[0]
        return statistics.mean([question_row.scaled_view_count, question_row.scaled_score, answer_row.scaled_score, answer_row.scaled_user_reputation])
    return eval_question

### Prepare files and variables

In [6]:
cleanup_folder(output_dir)
questions_for_docs = []

### Generate docs for tags

Docs for tags are based on the `posts_tag_wiki` table from Stackoverflow. Content should be a long description for what each tag represents as well as pointing out relevant complementary docs.

In [7]:
for idx, row in selected_tags_df.iterrows():
    tag_id = row.id
    tag_name = row.tag_name
    tag_description = posts_tag_wiki_excerpt_df.loc[posts_tag_wiki_excerpt_df.id == row.excerpt_post_id]['body'].values[0]
    tag_wiki_body = posts_tag_wiki_df.loc[posts_tag_wiki_df.id == row.wiki_post_id]['body'].values[0]
    # save tag doc page
    TagDocPage(tag_name, tag_description, tag_wiki_body).save(f"{output_dir}/{tag_name}.md")

    # prepare post questions for tag
    create_folder(f"{output_dir}/{tag_name}")
    tag_questions_df = posts_questions_df[posts_questions_df.tag_id == tag_id].copy()
    tag_answers_df = posts_answers_with_user_df[posts_answers_with_user_df.parent_id.isin(tag_questions_df.id)].copy()
    add_scaled_fields_to_answers(tag_answers_df)
    add_scaled_fields_to_questions(tag_questions_df)
    tag_answers_df['eval'] = tag_answers_df.apply(eval_answer, axis=1)
    tag_questions_df['eval'] = tag_questions_df.apply(get_eval_question(tag_answers_df), axis=1)
    selected_questions_df = tag_questions_df.sort_values(by='eval', ascending=False).head(n_questions_per_tag)
    for _, question_row in selected_questions_df.iterrows():
        question_answers_df = tag_answers_df[tag_answers_df.parent_id == question_row.id].copy()
        selected_answer = question_answers_df.sort_values(by='eval', ascending=False).iloc[0]
        questions_for_docs.append((question_row, selected_answer))

### Generate docs for questions

In [8]:
for selected_question, selected_answer in questions_for_docs:
    parent_tag_name = selected_question.tag_name
    question_title = selected_question.title
    question_body = selected_question.body
    question_tags = [tag for tag in selected_question.tags.split('|') if tag not in redundant_tags and tag != selected_question.tag_name]
    
    # select answer
    comments = list(comments_df[comments_df.post_id == selected_answer.id].sort_values(by='score', ascending=False)['text'])
    answer_body = selected_answer.body

    # save post question doc page
    output_file_name = human_to_kebab_case(question_title)
    print(f"Saving '{output_dir}/{parent_tag_name}/{output_file_name}.md'")
    PostQuestionDocPage(question_title, question_body, question_tags, answer_body, comments).save(f"{output_dir}/{parent_tag_name}/{output_file_name}.md")
    print('save done')

Saving 'output/android-actionbar/how-do-i-hide-a-menu-item-in-the-actionbar.md'
save done
Saving 'output/android-actionbar/how-to-disable-action-bar-permanently.md'
save done
Saving 'output/android-actionbar/actionbar-text-color.md'
save done
Saving 'output/android-actionbar/full-screen-theme-for-appcompat.md'
save done
Saving 'output/android-actionbar/how-to-hide-action-bar-before-activity-is-created,-and-then-show-it-again.md'
save done
Saving 'output/android-actionbar/remove-application-icon-and-title-from-honeycomb-action-bar.md'
save done
Saving 'output/android-actionbar/how-to-use-searchview-in-toolbar-android.md'
save done
Saving 'output/android-actionbar/how-to-set-custom-actionbar-color--style.md'
save done
Saving 'output/android-actionbar/how-do-you-remove-the-title-text-from-the-android-actionbar.md'
save done
Saving 'output/android-actionbar/android:-how-to-hide-actionbar-on-certain-activities.md'
save done
Saving 'output/android-actionbar/how-to-customize-the-back-button-o

save done
Saving 'output/android-activity/how-to-show-a-dialog-to-confirm-that-the-user-wishes-to-exit-an-android-activity.md'
save done
Saving 'output/android-activity/how-to-get-current-foreground-activity-context-in-android.md'
save done
Saving 'output/android-activity/start-an-activity-with-a-parameter.md'
save done
Saving 'output/android-gradle-plugin/how-can-i-access-a-buildconfig-value-in-my-androidmanifest.xml-file.md'
save done
Saving 'output/android-gradle-plugin/multi-flavor-app-based-on-multi-flavor-library-in-android-gradle.md'
save done
Saving 'output/android-gradle-plugin/publish-an-android-library-to-maven-with-aar-and-source-jar.md'
save done
Saving 'output/android-gradle-plugin/how-to-use-local-aar-dependency.md'
save done
Saving 'output/android-gradle-plugin/is-it-possible-to-use-multiple-manifest-files-for-different-gradle-buildsflavors.md'
save done
Saving 'output/android-gradle-plugin/build-flavors-for-different-version-of-same-class.md'
save done
Saving 'output/a

save done
Saving 'output/android-layout/how-to-center-items-of-a-recyclerview.md'
save done
Saving 'output/android-layout/how-to-change-the-color-of-a-checkbox.md'
save done
Saving 'output/android-layout/changing-edittext-bottom-line-color-with-appcompat-v7.md'
save done
Saving 'output/android-layout/put-buttons-at-bottom-of-screen-with-linearlayout.md'
save done
Saving 'output/android-layout/border-in-shape-xml.md'
save done
Saving 'output/android-layout/how-to-animate-recyclerview-items-when-they-appear.md'
save done
Saving 'output/android-layout/placingoverlapping(z-index)-a-view-above-another-view-in-android.md'
save done
Saving 'output/android-layout/android:-how-to-draw-a-border-to-a-linearlayout.md'
save done
Saving 'output/android-layout/how-to-customize-a-spinner-in-android.md'
save done


In [9]:
for selected_question, selected_answer in questions_for_docs:
    parent_tag_name = selected_question.tag_name
    question_title = selected_question.title
    question_body = selected_question.body
    question_tags = [tag for tag in selected_question.tags.split('|') if tag not in redundant_tags and tag != selected_question.tag_name]
    
    # select answer
    comments = list(comments_df[comments_df.post_id == selected_answer.id].sort_values(by='score', ascending=False)['text'])
    answer_body = selected_answer.body

    # save post question doc page
    output_file_name = human_to_kebab_case(question_title)
    print(f"{output_dir}/{parent_tag_name}/{output_file_name}.md")

output/android-actionbar/how-do-i-hide-a-menu-item-in-the-actionbar.md
output/android-actionbar/how-to-disable-action-bar-permanently.md
output/android-actionbar/actionbar-text-color.md
output/android-actionbar/full-screen-theme-for-appcompat.md
output/android-actionbar/how-to-hide-action-bar-before-activity-is-created,-and-then-show-it-again.md
output/android-actionbar/remove-application-icon-and-title-from-honeycomb-action-bar.md
output/android-actionbar/how-to-use-searchview-in-toolbar-android.md
output/android-actionbar/how-to-set-custom-actionbar-color--style.md
output/android-actionbar/how-do-you-remove-the-title-text-from-the-android-actionbar.md
output/android-actionbar/android:-how-to-hide-actionbar-on-certain-activities.md
output/android-actionbar/how-to-customize-the-back-button-on-actionbar.md
output/android-actionbar/how-can-i-force-the-action-bar-to-be-at-the-bottom-in-ics.md
output/android-actionbar/how-to-get-the-actionbar-height.md
output/android-actionbar/getting-draw