## Setting up the environment

In [None]:
ENV["OS_AUTH_URL"]="https://keystone-yeg.cloud.cybera.ca:5000/v2.0"
ENV["OS_TENANT_NAME"]="julia_workshop"
ENV["OS_PROJECT_NAME"]="julia_workshop"
ENV["OS_USERNAME"]=""
ENV["OS_PASSWORD"]=""

include(joinpath("..", "src", "lib", "Config.jl"))

## Loading Modules

In [None]:
using FreqTables
using PlotlyJS
using MultivariateStats

## Fetching the dataset

In [None]:
titanic = Dataset.fetch(:titanic)

## Preparing data for visualization 

In [None]:
male = titanic[titanic[:Sex].=="male",:]
female = titanic[titanic[:Sex].=="female",:]

In [None]:
sex_ftable = freqtable(titanic, :Sex)
survived_ftable = freqtable(titanic, :Survived)
female_ftable = freqtable(female, :Survived)
male_ftable = freqtable(male, :Survived)

### Why use javascript charting library?
- Easy to integrate with any application. Separates out the visualization from 
  rest of the data wrangling and stats stuff
- Dynamic and interactive

### Why Plotly?

Go see for yourselves: https://plot.ly/javascript/

- It’s open source. Built on D3.js and stack.gl
- D3.js is “The” go to charting library widely used.
- Based on declarative json schema

## Let's hit it 

In [None]:
trace1 = PlotlyJS.pie(;values=[sex_ftable["male"],sex_ftable["female"]],labels=["Male","Female"])
PlotlyJS.plot([trace1], Layout(height=400))

In [None]:
trace1 = PlotlyJS.pie(;values=[survived_ftable[0],survived_ftable[1]],labels=["Dead","Survived"])
PlotlyJS.plot([trace1], Layout(height=400))

In [None]:
titanic_clean = titanic
titanic_clean = delete!(titanic_clean,[1,4,9,11])
titanic_clean = titanic_clean[~isna(titanic_clean[:Age]),:]
titanic_clean = titanic_clean[~isna(titanic_clean[:Sex]),:]
titanic_clean = titanic_clean[~isna(titanic_clean[:Survived]),:]

In [None]:
trace1 = PlotlyJS.box(;y=titanic_clean[:Age],x=titanic_clean[:Sex])
layout = Layout(;yaxis=attr(title="Age"),title="Age Distribution by Gender")
PlotlyJS.plot([trace1], layout)

In [None]:
trace1 = PlotlyJS.histogram(;x=titanic_clean[:Age])
layout = Layout(;yaxis=attr(title="Frequency of Bucket"), xaxis=attr(title="Distribution of Age"),title="Distribution of Passenger Ages on Titanic")
PlotlyJS.plot([trace1], layout)


In [None]:
age = titanic[:Age]
fare = titanic[:Fare]
survival = titanic[:Survived]

trace1 = PlotlyJS.scatter(;x=age, y=fare, marker_opacity =1, mode="markers", name="Training")
trace2 = PlotlyJS.contour(x=age, y=fare, z=survival, opacity =0.2, name="trace2_y, trace5_y, trace8_y, trace11_y, trace14_y, trace17_y, trace20_y, trace23_y, trace26_y")




data = [trace1, trace2]
layout = Layout(;title="Trying out...", yaxis_title="Fare")
PlotlyJS.plot(data, layout)

In [None]:
age = titanic[:Age]
sex = titanic[:Sex]
survival = titanic[:Survived]

trace1 = PlotlyJS.scatter(;x=age, y=sex, marker_opacity =1, mode="markers", name="Training")
trace2 = PlotlyJS.contour(x=age, y=sex, z=survival, opacity =0.2, name="trace2_y, trace5_y, trace8_y, trace11_y, trace14_y, trace17_y, trace20_y, trace23_y, trace26_y")




data = [trace1, trace2]
layout = Layout(;title="Trying out...", yaxis_title="Gender")
PlotlyJS.plot(data, layout)

In [None]:
age = titanic[:Age]
sex = titanic[:Sex]
survival = titanic[:Survived]
pclass = titanic[:Pclass]

trace1 = PlotlyJS.scatter(;x=sex, y=pclass, marker_opacity =1, mode="markers", name="Training")
trace2 = PlotlyJS.contour(x=sex, y=pclass, z=survival, opacity =0.2, name="trace2_y, trace5_y, trace8_y, trace11_y, trace14_y, trace17_y, trace20_y, trace23_y, trace26_y")




data = [trace1, trace2]
layout = Layout(;title="Trying out...", yaxis_title="Passenger class", xaxis_titel="Gender")
PlotlyJS.plot(data, layout)

In [None]:
@time titanic_array_survived = array(titanic_clean[:2])
@time titanic_array = array(titanic_clean[:,[6,10]])

In [None]:
M = MultivariateStats.fit(PCA, titanic_array, maxoutdim=100)
Yte = MultivariateStats.transform(M,titanic_array)
Xr = reconstruct(M,Yte)
final_titanic = convert(DataFrame,Xr)

In [None]:
age = final_titanic[:x1]
fare = final_titanic[:x2]
survival = titanic_clean[:Survived]

trace1 = PlotlyJS.scatter(;x=age, y=fare, marker_opacity =1, mode="markers", name="Training")
trace2 = PlotlyJS.contour(x=age, y=fare, z=survival, opacity =0.2, name="trace2_y, trace5_y, trace8_y, trace11_y, trace14_y, trace17_y, trace20_y, trace23_y, trace26_y")




data = [trace1, trace2]
layout = Layout(;title="Trying out...", yaxis_title="Fare")
PlotlyJS.plot(data, layout)

In [None]:
function clustering_alpha_shapes()
    @eval using DataFrames, Colors
    # load data

    nms = unique(titanic[:Survived])
    colors = [RGB(0.89, 0.1, 0.1), RGB(0.21, 0.50, 0.72), RGB(0.28, 0.68, 0.3)]
    data = GenericTrace[]
    for (i, nm) in enumerate(nms)
        df = titanic[titanic[:Survived] .== nm, :]
        x=df[:Age]
        y=log(df[:Fare])
        z=df[:Pclass]
        trace = PlotlyJS.scatter3d(;name=nm, mode="markers",
                           marker_size=3, marker_color=colors[i], marker_line_width=0,
                           x=x, y=y, z=z)
        push!(data, trace)
        cluster = mesh3d(;color=colors[i], opacity=0.3, x=x, y=y, z=z)
        push!(data, cluster)
    end
    # notice the nested attrs to create complex JSON objects
    layout = Layout(width=800, height=550, autosize=false, title="Titanic Survival",
                    scene=attr(xaxis=attr(gridcolor="rgb(255, 255, 255)",
                                          zerolinecolor="rgb(255, 255, 255)",
                                          showbackground=true,
                                          backgroundcolor="rgb(230, 230,230)",
                                            title = "Age"),
                               yaxis=attr(gridcolor="rgb(255, 255, 255)",
                                           zerolinecolor="rgb(255, 255, 255)",
                                           showbackground=true,
                                           backgroundcolor="rgb(230, 230,230)",
    title = "Log of Fare Price"),
                               zaxis=attr(gridcolor="rgb(255, 255, 255)",
                                           zerolinecolor="rgb(255, 255, 255)",
                                           showbackground=true,
                                           backgroundcolor="rgb(230, 230,230)",
    title = "Class"),
                               aspectratio=attr(x=1, y=1, z=0.7),
                               aspectmode = "manual"))
    PlotlyJS.plot(data, layout)
end
clustering_alpha_shapes()

## How to get data to JSON?

Set the path to where you want to save the json formatted javascript file.

In [None]:
results_js_path = joinpath(Config.Path.results,"titanic_survived.js")

Create the directory to store the results if it is not already available.

In [None]:
if !ispath(Config.Path.results)
  mkdir(Config.Path.results)
end

To get data written to a file in JSON format we created a utility function in Julia:

http://juliabox.cloud.cybera.ca/edit/titanic-julia/src/lib/DataFrameUtil.jl

In [None]:
write_js(results_js_path, titanic, [:Survived], append=true)