# 0 Imports

In [1]:
import numpy as np
import matplotlib as pl
import seaborn as sns
import pandas as pd

import re

from bs4 import BeautifulSoup as bs
import nltk
from nltk import pos_tag
from nltk.corpus import stopwords, wordnet
from nltk.stem import SnowballStemmer, WordNetLemmatizer
# nltk.download()

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

np.random.seed(0)


***
# 1 Config

In [2]:
config = {
    "raw_preproc": True,
    "processing": True
}

***
# 2 Raw data preprocessing

## 2.1 Parse raw data body html

In [3]:
if config["raw_preproc"]:
    raw_data_body = pd.read_csv("data/raw_data_body.csv")
    raw_data_body.shape

In [4]:
if config["raw_preproc"]:
    def preproc_raw_data_body(cell):
        soup = bs(cell, "html.parser")

        script_tags = soup.find_all("script")
        for script_tag in script_tags:
            script_tag.extract()

        code_tags = soup.find_all("code")
        for code_tag in code_tags:
            code_tag.extract()

        preproc_cell = soup.get_text()
        preproc_cell = preproc_cell.replace(',', ' ')

        return preproc_cell

In [5]:
if config["raw_preproc"]:
    raw_data_body["Body"] = raw_data_body.apply(lambda row: preproc_raw_data_body(row.Body), axis=1)

***
## 2.2 Merge body raw data to the rest

In [6]:
if config["raw_preproc"]:
    raw_data_rest = pd.read_csv("data/raw_data_id_title_tags.csv")

In [7]:
if config["raw_preproc"]:
    raw_data = raw_data_rest.join(raw_data_body)
    raw_data = raw_data.set_index("Id").reindex(["Title", "Body", "Tags"], axis="columns")

In [8]:
display = None
if config["raw_preproc"]:
    display = raw_data.head()
display

Unnamed: 0_level_0,Title,Body,Tags
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4,How to convert a Decimal to a Double in C#?,I want to use a to change a 's opacity.\nThis is my code:\n\nWhen I build the application it gives the following error:\n\n\n\nI have tried using and but then the doesn't work. This code worked fine in a past VB.NET project.\n,<c#><floating-point><type-conversion><double><decimal>
6,Why did the width collapse in the percentage width child element in an absolutely positioned parent on Internet Explorer 7?,I have an absolutely positioned containing several children one of which is a relatively positioned . When I use a on the child it collapses to on IE7 but not on Firefox or Safari.\nIf I use it works. If the parent is relatively positioned the percentage width on the child works.\n\nIs there something I'm missing here?\nIs there an easy fix for this besides the on the child?\nIs there an area of the CSS specification that covers this?\n\n,<html><css><internet-explorer-7>
9,How do I calculate someone's age based on a DateTime type birthday?,Given a representing a person's birthday how do I calculate their age in years?\n,<c#><.net><datetime>
11,Calculate relative time in C#,Given a specific value how do I display relative time like:\n\n2 hours ago\n3 days ago\na month ago\n\n,<c#><datetime><time><datediff><relative-time-span>
13,Determine a user's timezone,Is there a standard way for a web server to be able to determine a user's timezone within a web page? \nPerhaps from an HTTP header or part of the string?\n,<html><browser><timezone><user-agent><timezone-offset>


***
## 2.3 Tags to list

In [9]:
if config["raw_preproc"]:
    def tags_to_list(cell):
        return [tag for tag in re.split(r'[<>]', cell) if tag]

In [10]:
if config["raw_preproc"]:
    raw_data["Tags"] = raw_data.apply(lambda row: tags_to_list(row["Tags"]), axis="columns")

In [11]:
display = None
if config["raw_preproc"]:
    display = raw_data["Tags"].head()
display

Id
4     [c#, floating-point, type-conversion, double, decimal]
6                           [html, css, internet-explorer-7]
9                                       [c#, .net, datetime]
11        [c#, datetime, time, datediff, relative-time-span]
13    [html, browser, timezone, user-agent, timezone-offset]
Name: Tags, dtype: object

***
## 2.4 Lowering

In [12]:
if config["raw_preproc"]:
    def lower(cell):
        return cell.lower()

In [13]:
if config["raw_preproc"]:
    raw_data["Title"] = raw_data.apply(lambda row: lower(row["Title"]), axis="columns")
    raw_data["Body"] = raw_data.apply(lambda row: lower(row["Body"]), axis="columns")

In [14]:
display = None
if config["raw_preproc"]:
    display = raw_data[["Title", "Body"]].head()
display

Unnamed: 0_level_0,Title,Body
Id,Unnamed: 1_level_1,Unnamed: 2_level_1
4,how to convert a decimal to a double in c#?,i want to use a to change a 's opacity.\nthis is my code:\n\nwhen i build the application it gives the following error:\n\n\n\ni have tried using and but then the doesn't work. this code worked fine in a past vb.net project.\n
6,why did the width collapse in the percentage width child element in an absolutely positioned parent on internet explorer 7?,i have an absolutely positioned containing several children one of which is a relatively positioned . when i use a on the child it collapses to on ie7 but not on firefox or safari.\nif i use it works. if the parent is relatively positioned the percentage width on the child works.\n\nis there something i'm missing here?\nis there an easy fix for this besides the on the child?\nis there an area of the css specification that covers this?\n\n
9,how do i calculate someone's age based on a datetime type birthday?,given a representing a person's birthday how do i calculate their age in years?\n
11,calculate relative time in c#,given a specific value how do i display relative time like:\n\n2 hours ago\n3 days ago\na month ago\n\n
13,determine a user's timezone,is there a standard way for a web server to be able to determine a user's timezone within a web page? \nperhaps from an http header or part of the string?\n


***
## 2.5 Save raw_data

In [15]:
display = None
if config["raw_preproc"]:
    display = raw_data.head()
display

Unnamed: 0_level_0,Title,Body,Tags
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4,how to convert a decimal to a double in c#?,i want to use a to change a 's opacity.\nthis is my code:\n\nwhen i build the application it gives the following error:\n\n\n\ni have tried using and but then the doesn't work. this code worked fine in a past vb.net project.\n,"[c#, floating-point, type-conversion, double, decimal]"
6,why did the width collapse in the percentage width child element in an absolutely positioned parent on internet explorer 7?,i have an absolutely positioned containing several children one of which is a relatively positioned . when i use a on the child it collapses to on ie7 but not on firefox or safari.\nif i use it works. if the parent is relatively positioned the percentage width on the child works.\n\nis there something i'm missing here?\nis there an easy fix for this besides the on the child?\nis there an area of the css specification that covers this?\n\n,"[html, css, internet-explorer-7]"
9,how do i calculate someone's age based on a datetime type birthday?,given a representing a person's birthday how do i calculate their age in years?\n,"[c#, .net, datetime]"
11,calculate relative time in c#,given a specific value how do i display relative time like:\n\n2 hours ago\n3 days ago\na month ago\n\n,"[c#, datetime, time, datediff, relative-time-span]"
13,determine a user's timezone,is there a standard way for a web server to be able to determine a user's timezone within a web page? \nperhaps from an http header or part of the string?\n,"[html, browser, timezone, user-agent, timezone-offset]"


In [16]:
if config["raw_preproc"]:
    raw_data.to_csv("data/raw_data.csv")

***
# 3 Data preprocessing

## 3.0 Utils

In [17]:
if config["processing"]:
    def tokenize(text):
        tokenizer = nltk.RegexpTokenizer(r'\w+')
        tokens = tokenizer.tokenize(text)
        return tokens

In [18]:
if config["processing"]:
    def remove_stop_words(cell):
        return [word for word in cell if word not in stop_words]

In [19]:
if config["processing"]:
    def stemmize(cell):
        stemmer = SnowballStemmer("english")
        stemmed_tokens = [stemmer.stem(token) for token in cell]
        return stemmed_tokens

In [20]:
if config["processing"]:
    def get_wordnet_pos(treebank_tag):
        if treebank_tag.startswith('J'):
            return wordnet.ADJ
        elif treebank_tag.startswith('V'):
            return wordnet.VERB
        elif treebank_tag.startswith('N'):
            return wordnet.NOUN
        elif treebank_tag.startswith('R'):
            return wordnet.ADV
        else:
            return 'n'

In [21]:
if config["processing"]:
    def tag_pos(cell):
        treebank_tags = pos_tag(cell)
        pos = [(tag[0], get_wordnet_pos(tag[1])) for tag in treebank_tags]
        return pos

In [22]:
if config["processing"]:
    def lemmatize(cell, with_pos=False):
        lemmatizer = WordNetLemmatizer()
        if not with_pos:
            lemmatized_tokens = [lemmatizer.lemmatize(token) for token in cell]
        else:
            lemmatized_tokens = [lemmatizer.lemmatize(pair[0], pos=pair[1]) for pair in cell]
        return lemmatized_tokens

***
## 3.1 Loading data

In [23]:
if config["processing"]:
    data = pd.read_csv("data/raw_data.csv", index_col="Id")

In [24]:
display = None
if config["processing"]:
    display = data.head()
display

Unnamed: 0_level_0,Title,Body,Tags
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
4,how to convert a decimal to a double in c#?,i want to use a to change a 's opacity.\nthis is my code:\n\nwhen i build the application it gives the following error:\n\n\n\ni have tried using and but then the doesn't work. this code worked fine in a past vb.net project.\n,"['c#', 'floating-point', 'type-conversion', 'double', 'decimal']"
6,why did the width collapse in the percentage width child element in an absolutely positioned parent on internet explorer 7?,i have an absolutely positioned containing several children one of which is a relatively positioned . when i use a on the child it collapses to on ie7 but not on firefox or safari.\nif i use it works. if the parent is relatively positioned the percentage width on the child works.\n\nis there something i'm missing here?\nis there an easy fix for this besides the on the child?\nis there an area of the css specification that covers this?\n\n,"['html', 'css', 'internet-explorer-7']"
9,how do i calculate someone's age based on a datetime type birthday?,given a representing a person's birthday how do i calculate their age in years?\n,"['c#', '.net', 'datetime']"
11,calculate relative time in c#,given a specific value how do i display relative time like:\n\n2 hours ago\n3 days ago\na month ago\n\n,"['c#', 'datetime', 'time', 'datediff', 'relative-time-span']"
13,determine a user's timezone,is there a standard way for a web server to be able to determine a user's timezone within a web page? \nperhaps from an http header or part of the string?\n,"['html', 'browser', 'timezone', 'user-agent', 'timezone-offset']"


***
## 3.2 Creating unique corpus

In [25]:
if config["processing"]:
    unique_corpus = " ".join([str(item) for item in data["Body"].values])
    unique_corpus = tokenize(unique_corpus)

In [26]:
display = None
if config["processing"]:
    display = unique_corpus[:10]
display

['i', 'want', 'to', 'use', 'a', 'to', 'change', 'a', 's', 'opacity']

***
## 3.2 Tokenize

In [27]:
if config["processing"]:
    data["Body_Tokens"] = data.apply(lambda row: tokenize(row["Body"]), axis="columns")

In [28]:
display = None
if config["processing"]:
    display = data[["Body", "Body_Tokens"]].head()
display

Unnamed: 0_level_0,Body,Body_Tokens
Id,Unnamed: 1_level_1,Unnamed: 2_level_1
4,i want to use a to change a 's opacity.\nthis is my code:\n\nwhen i build the application it gives the following error:\n\n\n\ni have tried using and but then the doesn't work. this code worked fine in a past vb.net project.\n,"[i, want, to, use, a, to, change, a, s, opacity, this, is, my, code, when, i, build, the, application, it, gives, the, following, error, i, have, tried, using, and, but, then, the, doesn, t, work, this, code, worked, fine, in, a, past, vb, net, project]"
6,i have an absolutely positioned containing several children one of which is a relatively positioned . when i use a on the child it collapses to on ie7 but not on firefox or safari.\nif i use it works. if the parent is relatively positioned the percentage width on the child works.\n\nis there something i'm missing here?\nis there an easy fix for this besides the on the child?\nis there an area of the css specification that covers this?\n\n,"[i, have, an, absolutely, positioned, containing, several, children, one, of, which, is, a, relatively, positioned, when, i, use, a, on, the, child, it, collapses, to, on, ie7, but, not, on, firefox, or, safari, if, i, use, it, works, if, the, parent, is, relatively, positioned, the, percentage, width, on, the, child, works, is, there, something, i, m, missing, here, is, there, an, easy, fix, for, this, besides, the, on, the, child, is, there, an, area, of, the, css, specification, that, covers, this]"
9,given a representing a person's birthday how do i calculate their age in years?\n,"[given, a, representing, a, person, s, birthday, how, do, i, calculate, their, age, in, years]"
11,given a specific value how do i display relative time like:\n\n2 hours ago\n3 days ago\na month ago\n\n,"[given, a, specific, value, how, do, i, display, relative, time, like, 2, hours, ago, 3, days, ago, a, month, ago]"
13,is there a standard way for a web server to be able to determine a user's timezone within a web page? \nperhaps from an http header or part of the string?\n,"[is, there, a, standard, way, for, a, web, server, to, be, able, to, determine, a, user, s, timezone, within, a, web, page, perhaps, from, an, http, header, or, part, of, the, string]"


***
## 3.3 StopWords deletion

In [29]:
if config["processing"]:
    stop_words = stopwords.words("english")
    print(f"stopwords: {stop_words}")

stopwords: ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd", 'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers', 'herself', 'it', "it's", 'its', 'itself', 'they', 'them', 'their', 'theirs', 'themselves', 'what', 'which', 'who', 'whom', 'this', 'that', "that'll", 'these', 'those', 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', 'a', 'an', 'the', 'and', 'but', 'if', 'or', 'because', 'as', 'until', 'while', 'of', 'at', 'by', 'for', 'with', 'about', 'against', 'between', 'into', 'through', 'during', 'before', 'after', 'above', 'below', 'to', 'from', 'up', 'down', 'in', 'out', 'on', 'off', 'over', 'under', 'again', 'further', 'then', 'once', 'here', 'there', 'when', 'where', 'why', 'how', 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', 'no', 'nor', 'not', 'only', 'own', 'same', 'so'

In [30]:
if config["processing"]:
    data["Body_Tokens"] = data.apply(lambda row: remove_stop_words(row["Body_Tokens"]), axis="columns")

In [31]:
display = None
if config["processing"]:
    display = data[["Body", "Body_Tokens"]].head()
display

Unnamed: 0_level_0,Body,Body_Tokens
Id,Unnamed: 1_level_1,Unnamed: 2_level_1
4,i want to use a to change a 's opacity.\nthis is my code:\n\nwhen i build the application it gives the following error:\n\n\n\ni have tried using and but then the doesn't work. this code worked fine in a past vb.net project.\n,"[want, use, change, opacity, code, build, application, gives, following, error, tried, using, work, code, worked, fine, past, vb, net, project]"
6,i have an absolutely positioned containing several children one of which is a relatively positioned . when i use a on the child it collapses to on ie7 but not on firefox or safari.\nif i use it works. if the parent is relatively positioned the percentage width on the child works.\n\nis there something i'm missing here?\nis there an easy fix for this besides the on the child?\nis there an area of the css specification that covers this?\n\n,"[absolutely, positioned, containing, several, children, one, relatively, positioned, use, child, collapses, ie7, firefox, safari, use, works, parent, relatively, positioned, percentage, width, child, works, something, missing, easy, fix, besides, child, area, css, specification, covers]"
9,given a representing a person's birthday how do i calculate their age in years?\n,"[given, representing, person, birthday, calculate, age, years]"
11,given a specific value how do i display relative time like:\n\n2 hours ago\n3 days ago\na month ago\n\n,"[given, specific, value, display, relative, time, like, 2, hours, ago, 3, days, ago, month, ago]"
13,is there a standard way for a web server to be able to determine a user's timezone within a web page? \nperhaps from an http header or part of the string?\n,"[standard, way, web, server, able, determine, user, timezone, within, web, page, perhaps, http, header, part, string]"


***
## 3.4 Stemmize

In [32]:
if config["processing"]:
    data["Body_Stemmed"] = data.apply(lambda row: stemmize(row["Body_Tokens"]), axis="columns")

In [33]:
display = None
if config["processing"]:
    display = data[["Body_Tokens", "Body_Stemmed"]].head()
display

Unnamed: 0_level_0,Body_Tokens,Body_Stemmed
Id,Unnamed: 1_level_1,Unnamed: 2_level_1
4,"[want, use, change, opacity, code, build, application, gives, following, error, tried, using, work, code, worked, fine, past, vb, net, project]","[want, use, chang, opac, code, build, applic, give, follow, error, tri, use, work, code, work, fine, past, vb, net, project]"
6,"[absolutely, positioned, containing, several, children, one, relatively, positioned, use, child, collapses, ie7, firefox, safari, use, works, parent, relatively, positioned, percentage, width, child, works, something, missing, easy, fix, besides, child, area, css, specification, covers]","[absolut, posit, contain, sever, children, one, relat, posit, use, child, collaps, ie7, firefox, safari, use, work, parent, relat, posit, percentag, width, child, work, someth, miss, easi, fix, besid, child, area, css, specif, cover]"
9,"[given, representing, person, birthday, calculate, age, years]","[given, repres, person, birthday, calcul, age, year]"
11,"[given, specific, value, display, relative, time, like, 2, hours, ago, 3, days, ago, month, ago]","[given, specif, valu, display, relat, time, like, 2, hour, ago, 3, day, ago, month, ago]"
13,"[standard, way, web, server, able, determine, user, timezone, within, web, page, perhaps, http, header, part, string]","[standard, way, web, server, abl, determin, user, timezon, within, web, page, perhap, http, header, part, string]"


***
## 3.5 POS - Part-Of-Speech

In [34]:
if config["processing"]:
    data["Body_POS"] = data.apply(lambda row: tag_pos(row["Body_Tokens"]), axis="columns")

In [35]:
display = None
if config["processing"]:
    display = data[["Body_Tokens", "Body_POS"]].head()
display

Unnamed: 0_level_0,Body_Tokens,Body_POS
Id,Unnamed: 1_level_1,Unnamed: 2_level_1
4,"[want, use, change, opacity, code, build, application, gives, following, error, tried, using, work, code, worked, fine, past, vb, net, project]","[(want, n), (use, n), (change, n), (opacity, n), (code, n), (build, a), (application, n), (gives, v), (following, v), (error, n), (tried, v), (using, v), (work, n), (code, n), (worked, v), (fine, a), (past, n), (vb, a), (net, n), (project, n)]"
6,"[absolutely, positioned, containing, several, children, one, relatively, positioned, use, child, collapses, ie7, firefox, safari, use, works, parent, relatively, positioned, percentage, width, child, works, something, missing, easy, fix, besides, child, area, css, specification, covers]","[(absolutely, r), (positioned, v), (containing, v), (several, a), (children, n), (one, n), (relatively, r), (positioned, v), (use, n), (child, n), (collapses, n), (ie7, v), (firefox, n), (safari, n), (use, n), (works, v), (parent, n), (relatively, r), (positioned, v), (percentage, n), (width, n), (child, n), (works, v), (something, n), (missing, v), (easy, a), (fix, a), (besides, n), (child, a), (area, n), (css, n), (specification, n), (covers, n)]"
9,"[given, representing, person, birthday, calculate, age, years]","[(given, v), (representing, v), (person, n), (birthday, a), (calculate, a), (age, n), (years, n)]"
11,"[given, specific, value, display, relative, time, like, 2, hours, ago, 3, days, ago, month, ago]","[(given, v), (specific, a), (value, n), (display, n), (relative, a), (time, n), (like, n), (2, n), (hours, n), (ago, r), (3, n), (days, n), (ago, r), (month, n), (ago, n)]"
13,"[standard, way, web, server, able, determine, user, timezone, within, web, page, perhaps, http, header, part, string]","[(standard, a), (way, n), (web, n), (server, n), (able, a), (determine, n), (user, n), (timezone, n), (within, n), (web, a), (page, n), (perhaps, r), (http, a), (header, a), (part, n), (string, n)]"


***
## 3.6 Lemmatize

In [36]:
if config["processing"]:
    data["Body_Lemmatized"] = data.apply(lambda row: lemmatize(row["Body_POS"], with_pos=True), axis="columns")

In [37]:
display = None
if config["processing"]:
    display = data[["Body_Tokens", "Body_Lemmatized"]].head()
display

Unnamed: 0_level_0,Body_Tokens,Body_Lemmatized
Id,Unnamed: 1_level_1,Unnamed: 2_level_1
4,"[want, use, change, opacity, code, build, application, gives, following, error, tried, using, work, code, worked, fine, past, vb, net, project]","[want, use, change, opacity, code, build, application, give, follow, error, try, use, work, code, work, fine, past, vb, net, project]"
6,"[absolutely, positioned, containing, several, children, one, relatively, positioned, use, child, collapses, ie7, firefox, safari, use, works, parent, relatively, positioned, percentage, width, child, works, something, missing, easy, fix, besides, child, area, css, specification, covers]","[absolutely, position, contain, several, child, one, relatively, position, use, child, collapse, ie7, firefox, safari, use, work, parent, relatively, position, percentage, width, child, work, something, miss, easy, fix, besides, child, area, cs, specification, cover]"
9,"[given, representing, person, birthday, calculate, age, years]","[give, represent, person, birthday, calculate, age, year]"
11,"[given, specific, value, display, relative, time, like, 2, hours, ago, 3, days, ago, month, ago]","[give, specific, value, display, relative, time, like, 2, hour, ago, 3, day, ago, month, ago]"
13,"[standard, way, web, server, able, determine, user, timezone, within, web, page, perhaps, http, header, part, string]","[standard, way, web, server, able, determine, user, timezone, within, web, page, perhaps, http, header, part, string]"


***
## 3.7 Saving

In [38]:
display = None
if config["processing"]:
    display = data.head()
display

Unnamed: 0_level_0,Title,Body,Tags,Body_Tokens,Body_Stemmed,Body_POS,Body_Lemmatized
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
4,how to convert a decimal to a double in c#?,i want to use a to change a 's opacity.\nthis is my code:\n\nwhen i build the application it gives the following error:\n\n\n\ni have tried using and but then the doesn't work. this code worked fine in a past vb.net project.\n,"['c#', 'floating-point', 'type-conversion', 'double', 'decimal']","[want, use, change, opacity, code, build, application, gives, following, error, tried, using, work, code, worked, fine, past, vb, net, project]","[want, use, chang, opac, code, build, applic, give, follow, error, tri, use, work, code, work, fine, past, vb, net, project]","[(want, n), (use, n), (change, n), (opacity, n), (code, n), (build, a), (application, n), (gives, v), (following, v), (error, n), (tried, v), (using, v), (work, n), (code, n), (worked, v), (fine, a), (past, n), (vb, a), (net, n), (project, n)]","[want, use, change, opacity, code, build, application, give, follow, error, try, use, work, code, work, fine, past, vb, net, project]"
6,why did the width collapse in the percentage width child element in an absolutely positioned parent on internet explorer 7?,i have an absolutely positioned containing several children one of which is a relatively positioned . when i use a on the child it collapses to on ie7 but not on firefox or safari.\nif i use it works. if the parent is relatively positioned the percentage width on the child works.\n\nis there something i'm missing here?\nis there an easy fix for this besides the on the child?\nis there an area of the css specification that covers this?\n\n,"['html', 'css', 'internet-explorer-7']","[absolutely, positioned, containing, several, children, one, relatively, positioned, use, child, collapses, ie7, firefox, safari, use, works, parent, relatively, positioned, percentage, width, child, works, something, missing, easy, fix, besides, child, area, css, specification, covers]","[absolut, posit, contain, sever, children, one, relat, posit, use, child, collaps, ie7, firefox, safari, use, work, parent, relat, posit, percentag, width, child, work, someth, miss, easi, fix, besid, child, area, css, specif, cover]","[(absolutely, r), (positioned, v), (containing, v), (several, a), (children, n), (one, n), (relatively, r), (positioned, v), (use, n), (child, n), (collapses, n), (ie7, v), (firefox, n), (safari, n), (use, n), (works, v), (parent, n), (relatively, r), (positioned, v), (percentage, n), (width, n), (child, n), (works, v), (something, n), (missing, v), (easy, a), (fix, a), (besides, n), (child, a), (area, n), (css, n), (specification, n), (covers, n)]","[absolutely, position, contain, several, child, one, relatively, position, use, child, collapse, ie7, firefox, safari, use, work, parent, relatively, position, percentage, width, child, work, something, miss, easy, fix, besides, child, area, cs, specification, cover]"
9,how do i calculate someone's age based on a datetime type birthday?,given a representing a person's birthday how do i calculate their age in years?\n,"['c#', '.net', 'datetime']","[given, representing, person, birthday, calculate, age, years]","[given, repres, person, birthday, calcul, age, year]","[(given, v), (representing, v), (person, n), (birthday, a), (calculate, a), (age, n), (years, n)]","[give, represent, person, birthday, calculate, age, year]"
11,calculate relative time in c#,given a specific value how do i display relative time like:\n\n2 hours ago\n3 days ago\na month ago\n\n,"['c#', 'datetime', 'time', 'datediff', 'relative-time-span']","[given, specific, value, display, relative, time, like, 2, hours, ago, 3, days, ago, month, ago]","[given, specif, valu, display, relat, time, like, 2, hour, ago, 3, day, ago, month, ago]","[(given, v), (specific, a), (value, n), (display, n), (relative, a), (time, n), (like, n), (2, n), (hours, n), (ago, r), (3, n), (days, n), (ago, r), (month, n), (ago, n)]","[give, specific, value, display, relative, time, like, 2, hour, ago, 3, day, ago, month, ago]"
13,determine a user's timezone,is there a standard way for a web server to be able to determine a user's timezone within a web page? \nperhaps from an http header or part of the string?\n,"['html', 'browser', 'timezone', 'user-agent', 'timezone-offset']","[standard, way, web, server, able, determine, user, timezone, within, web, page, perhaps, http, header, part, string]","[standard, way, web, server, abl, determin, user, timezon, within, web, page, perhap, http, header, part, string]","[(standard, a), (way, n), (web, n), (server, n), (able, a), (determine, n), (user, n), (timezone, n), (within, n), (web, a), (page, n), (perhaps, r), (http, a), (header, a), (part, n), (string, n)]","[standard, way, web, server, able, determine, user, timezone, within, web, page, perhaps, http, header, part, string]"


In [39]:
if config["processing"]:
    data.to_csv("data/data_cleaned.csv", index_label="Id")