# Julia Assistent Tools for Machine Learning

**Author: [Oliveira, D. M.](http://br.linkedin.com/in/dmztheone)**

In [158]:
render_section("", "", "Introduction")

Write a proper introduction...

In [155]:
render_section("01", "", "Import Packages")

In [3]:
using DataFrames
using MLBase

In [151]:
render_section("02", "", "HTML Assistent Tools")

### CSS Style

In [153]:
style_css = """
<style>
span.tt {
    font-family: 'Lucida Sans Typewriter', 'Lucida Console', 
                  monaco, 'Bitstream Vera Sans Mono', monospace;
    color: #03396c;
}

div.method {
    margin-bottom: 10px;
}

div.method_title {
    padding: 4px 0px 4px 10px;
    background-color:#85bdde; color:#005b96;
    border-left: 8px solid #cb2c31;
}

div.method_body {
    padding: 5px 20px 20px 20px;
    border-left: 8px solid #e69598;
    background-color: #ccebfb;
}

div.method_cell {}

div.method_cell_title {
    padding: 20px 0px 0px 0px;
}

div.method_cell_body {}


h1.section {
    border-bottom:5px solid #696565;
}

span.section_number {
    background-color:#696565;
    color:#ffb400;
    padding:3px 5px 3px 5px;
}

span.section_title {
    padding:5px 5px 0px 5px;
    color:#ff9d23;
}

</style>
"""
display("text/html", style_css)

### HTML Tools

In [148]:
function render_doc(method_signature, description, arguments, outputs)
    arguments_html       = field_list_to_html(arguments)
    outputs_html         = field_list_to_html(outputs)
    html_method_template = """
    <div class="method">
        <div class="method_title"><span class="tt">Method: </span><b>$method_signature</b></div>
            <div class="method_body">
                <div class="method_cell">
                    <div class="method_cell_title"><span class="tt"><b>Description</b></span></div>
                    <div class="method_cell_body">
                        <span class="tt">$description.</span>
                    </div>
                </div>
                <div class="method_cell">
                    <div class="method_cell_title"><span class="tt"><b>Arguments</b></span></div>
                    <div class="method_cell_body">
                        <ul>
                            $arguments_html
                        </ul>
                    </div>
                </div>
                <div class="method_cell">
                    <div class="method_cell_title"><span class="tt"><b>Outputs</b></span></div>
                    <div class="method_cell_body">
                        <ul>
                            $outputs_html
                        </ul>
                    </div>
                </div>
            </div>
        </div>
    <div/>
    """
    return display("text/html", html_method_template)
end

function field_list_to_html(list)
    length(list) == 0 && return "<span class=\"tt\">None.</span>"
    html = ""
    for (field_name, field_type, field_description) in list
        html *= "<li><span class=\"tt\">$field_name, <b>$field_type</b>: "
        html *= "$field_description.</span></li>\n"
    end
    return html
end

function render_section(section_number, subsection_number, title)
    return display("text/html", """
        <h1 class="section">
            <span class="section_number">$section_number.$subsection_number</span>
            <span class="section_title">$title</span>
        </h1>
    """)
end

render_section (generic function with 1 method)

In [156]:
render_section("03", "", "Functions: Missing Values")

In [146]:
render_doc("get_default_values (df, features)", 
           """Calculates the mode for all features informed. 
              Usually it is used to replace missing data""", 
           [("df", "DataFrame", "Data used to calculate modes by column"),
            ("features", "Array{Symbol,1}", "Column names that will be considered")],
           [("default_values", "Dict", "Dictionary with the default values")])

render_doc("apply_default_values! (df, default_values)",
           """Apply a dictionary of default values to missing values (NAs) of the
              given dataframe. Only keys identified as column will be used""",
          [("df", "DataFrame", "Data that will be used to replace NAs to default values"),
           ("default_values", "Dict", "Dictionary with features/default values")],
          [("df", "DataFrame", "Original dataframe with NAs replaced by default values")])

In [None]:
function get_default_values(df, features)
    default_values = Dict()
    for feature in features
        default_value = mode(dropna(df[feature]))
        default_values[feature] = default_value
    end
    return default_values
end

function apply_default_values!(df, default_values)
    for feature in keys(default_values)
        df[isna(df[feature]), feature] = default_values[feature]
    end
    return df
end

In [157]:
render_section("04", "", "Functions: Label Encoding")

In [164]:
render_doc("get_label_encoding (df, features)",
           "Get label encoding for features of a given dataframe",
           [("df", "DataFrame", "Data that will be used to create label encoding"), 
            ("features", "Array{Symbol,1}", "Features that will be created label encoding")],
            [("_", "Dict", "A dictionary with the encoding for each feature informed")])

render_doc("apply_encoding! (df, encoding)",
           "Apply encoding to a given dataframe",
           [("df", "DataFrame", "Dataframe that will be encoded"),
            ("encoding", "Dict", "A dictionary with the feature/encoding data")],
           [("df", "DataFrame", "Dataframe with encoded columns")])

In [161]:
get_label_encoding(df, features) = Dict([f => labelmap(dropna(df[f])) for f in features])

function apply_encoding!(df, encoding)
    for feature in keys(encoding)
        df[feature] = labelencode(encoding[feature], df[feature])
    end
    return df
end

apply_encoding! (generic function with 1 method)

In [165]:
render_section("05", "", "Functions: One-Hot-Encoding")

In [168]:
render_doc("get_all_values (df, features)",
           "Get all unique values from a given dataframe",
           [("df", "DataFrame", "Dataframe used to extract unique values"),
            ("features", "Array{Symbol,1}", "Features that will be extract unique values")],
           [("_", "Dict", "A dictionary with unique values for each feature informed")])



In [166]:
get_all_values(df, features) = Dict([f => Set(dropna(df[f])) for f in features])

get_all_values (generic function with 1 method)