### Set up your environment

In [None]:
ENV["OS_AUTH_URL"]="https://keystone-yeg.cloud.cybera.ca:5000/v2.0"
ENV["OS_TENANT_NAME"]="julia_workshop"
ENV["OS_PROJECT_NAME"]="julia_workshop"
ENV["OS_USERNAME"]=""
ENV["OS_PASSWORD"]=""

include(joinpath("..", "src", "lib", "Config.jl"))

### Exploring existing datasets

This framework is integrated with Swift, so that you can fetch datasets from a shared container. You can also push new datasets or transformations back to the container for your teammates to share.

Right now, let's see what's there.

In [None]:
Dataset.list()

We'll grab the `:titanic` dataset to start exploring.

In [None]:
titanic = Dataset.fetch(:titanic)

### Loading modules

We're going to grab the `FreqTables` module and our own `Titanic` module. To do that, we use the `using` keyword. This will put any exported functions into the current namespace. If you want to keep from polluting the namespace and don't mind prepending the module name to function calls, you can use `import` instead.

In [4]:
using FreqTables
using Titanic

In [None]:
freqtable(titanic[:Survived])

In [None]:
freqtable(titanic, :Sex, :Survived)

In [7]:
@enum SurvivedType Dead=0 Survived=1
titanic[:Survived] = to_enum(SurvivedType, titanic[:Survived])
pool!(titanic, [:Sex, :Survived])

In [None]:
levels(titanic[:Survived])

In [None]:
freqtable(titanic, :Sex, :Survived)

In [33]:
using StatPlots
pyplot()

Plots.PyPlotBackend()

In [None]:
pie(["Female", "Male"], freqtable(titanic, :Sex))

In [None]:
pie(["Dead", "Survived"],freqtable(titanic, :Survived))

In [None]:
male = titanic[titanic[:Sex] .== "male",:]
female = titanic[titanic[:Sex] .== "female",:]
# or (via framework):
male = subset(titanic, :Sex, "male")
female = subset(titanic, :Sex, "female")

In [None]:
layout = grid(1,2)
pie(["Dead","Survived"],freqtable(male, :Survived),title="Survival Portion of Men",layout=layout)
pie!(["Dead","Survived"],freqtable(female, :Survived),title="Survival Portion of Women",layout=layout,subplot=2)

In [None]:
titanic_clean = titanic
titanic_clean = titanic_clean[~isna(titanic_clean[:Age]),:]
titanic_clean = titanic_clean[~isna(titanic_clean[:Sex]),:]
titanic_clean = titanic_clean[~isna(titanic_clean[:Survived]),:]

In [None]:
boxplot(titanic_clean, :Sex, :Age, title="Age Distribution by Gender", notch=true)

In [None]:
boxplot(titanic_clean, :Survived, :Age, title="Age Distribution By Survival", notch=true, ylabel="Age")

In [None]:
histogram(titanic_clean[:Age], xlabel="Distribution of Age", ylabel="Frequency of Bucket", title="Distribution of Passenger Ages on Titanic",bins=12)

In [None]:
density(titanic_clean, :Age, groups=:Survived, linecolor=:auto, linewidth=3)

In [None]:
density(titanic_clean, :Age, groups=:Sex, linecolor=:auto, linewidth=3)

In [None]:
@enum ChildType Child=0 Adult=1

titanic[:Child] = to_enum(ChildType, map(titanic[:Age]) do x
  if isna(x)
    NA
  elseif x < 13
    Child
  else
    Adult
  end
end)

In [30]:
function remove_na(titanic_df, colnames...)
  ret = titanic_df;
  for colname in colnames
    ret = ret[~isna(ret[colname]),:];
  end
  ret
end

remove_na (generic function with 1 method)

In [None]:
titanic_clean = remove_na(titanic, :Age, :Sex, :Survived, :Child)

In [None]:
import CustomPlots

CustomPlots.facetgridbox(titanic_clean, :Fare, xsplit=:Survived, ysplit=:Sex, boxsplit=:Child)