In [219]:
# Import Packages
using Pkg  # Package to install new packages

# Install packages 
#Pkg.add("DataFrames")
#Pkg.add("CSV")
#Pkg.add("Plots")
#Pkg.add("Lathe")
#Pkg.add("GLM")
#Pkg.add("StatsPlots")
#Pkg.add("MLBase")
#Pkg.add("flux")
#Pkg.add("ANOVA")


# Load the installed packages
using DataFrames
using CSV
using Plots
using Lathe
using GLM
using Statistics
using StatsPlots
using MLBase
using Printf
using Dates
using Flux: onehot
using Flux: onehotbatch
using Query
using VegaLite
using DataVoyager
using FileIO
using ElectronDisplay
using Lathe.preprocess: TrainTestSplit

# Enable printing of 1000 columns
ENV["COLUMNS"] = 1000

1000

In [220]:
data = DataFrame(CSV.File("../Datasets/sleepdata.csv"))

rename!(data, :"Sleep quality" => "SleepQuality", :"Time in bed" => "Time_in_bed", "Wake up" => "Wake_up", :"Sleep Notes" => "Sleep_Notes", :"Heart rate" => "Heart_rate", :"Activity (steps)" => "Activity_(steps)")

coalesce.(data.Sleep_Notes, "None")

dropmissing!(data)

first(data,5)

Unnamed: 0_level_0,Start,End,SleepQuality,Time_in_bed,Wake_up,Sleep_Notes,Heart_rate,Activity_(steps)
Unnamed: 0_level_1,String,String,String,Time,String,String,Int64,Int64
1,2014-12-30 21:17:50,2014-12-30 21:33:54,3%,00:16:00,:|,Stressful day,72,0
2,2015-01-01 22:12:10,2015-01-02 04:56:35,72%,06:44:00,:),Drank coffee:Drank tea,68,0
3,2015-01-03 00:34:57,2015-01-03 07:47:23,83%,07:12:00,:),Drank coffee:Drank tea,60,0
4,2015-01-04 21:34:44,2015-01-05 04:53:34,78%,07:18:00,:),Ate late:Drank coffee,57,0
5,2015-01-05 21:32:25,2015-01-06 05:00:03,69%,07:27:00,:),Drank coffee:Drank tea:Worked out,56,0


In [221]:
data.Start = Date.(data.Start, "yyyy-mm-dd HH:MM:SS");
data.End = Date.(data.End, "yyyy-mm-dd HH:MM:SS");
first(data, 5)

Unnamed: 0_level_0,Start,End,SleepQuality,Time_in_bed,Wake_up,Sleep_Notes,Heart_rate,Activity_(steps)
Unnamed: 0_level_1,Date,Date,String,Time,String,String,Int64,Int64
1,2014-12-30,2014-12-30,3%,00:16:00,:|,Stressful day,72,0
2,2015-01-01,2015-01-02,72%,06:44:00,:),Drank coffee:Drank tea,68,0
3,2015-01-03,2015-01-03,83%,07:12:00,:),Drank coffee:Drank tea,60,0
4,2015-01-04,2015-01-05,78%,07:18:00,:),Ate late:Drank coffee,57,0
5,2015-01-05,2015-01-06,69%,07:27:00,:),Drank coffee:Drank tea:Worked out,56,0


In [222]:
const nano = 60000000000 # how many nanoseconds are in a minute

insertcols!(data, :Minutes_in_bed => 1)
for i in range(1, length=nrow(data))
    data.Minutes_in_bed[i] = Dates.value(data.Time_in_bed[i]) / nano
end

In [223]:
first(data, 5)

Unnamed: 0_level_0,Start,End,SleepQuality,Time_in_bed,Wake_up,Sleep_Notes,Heart_rate,Activity_(steps),Minutes_in_bed
Unnamed: 0_level_1,Date,Date,String,Time,String,String,Int64,Int64,Int64
1,2014-12-30,2014-12-30,3%,00:16:00,:|,Stressful day,72,0,16
2,2015-01-01,2015-01-02,72%,06:44:00,:),Drank coffee:Drank tea,68,0,404
3,2015-01-03,2015-01-03,83%,07:12:00,:),Drank coffee:Drank tea,60,0,432
4,2015-01-04,2015-01-05,78%,07:18:00,:),Ate late:Drank coffee,57,0,438
5,2015-01-05,2015-01-06,69%,07:27:00,:),Drank coffee:Drank tea:Worked out,56,0,447


In [224]:
onehotDF = DataFrame(Stressful_day = Bool[], Drank_coffee = Bool[], Drank_tea = Bool[], Worked_out = Bool[], Ate_late = Bool[])

for i in range(1, length = nrow(data))
    currentSleepNote = data.Sleep_Notes[i]
    if occursin(":", data.Sleep_Notes[i])
        sleepList = []

        note1 = split(currentSleepNote, ":")[1]
        note2 = split(currentSleepNote, ":")[2]

        push!(sleepList, note1, note2)

        ohb = onehotbatch(sleepList, ["Stressful day", "Drank coffee", "Drank tea", "Worked out", "Ate late"])

        enc = ohb[:, 1] + ohb[:, 2]
        
        push!(onehotDF, enc)
    else
        sleepList = []

        push!(sleepList, currentSleepNote)

        enc = onehot(currentSleepNote, ["Stressful day", "Drank coffee", "Drank tea", "Worked out", "Ate late"])

        push!(onehotDF, enc)
    end
end

data = hcat(data, onehotDF)

Unnamed: 0_level_0,Start,End,SleepQuality,Time_in_bed,Wake_up,Sleep_Notes,Heart_rate,Activity_(steps),Minutes_in_bed,Stressful_day,Drank_coffee,Drank_tea,Worked_out,Ate_late
Unnamed: 0_level_1,Date,Date,String,Time,String,String,Int64,Int64,Int64,Bool,Bool,Bool,Bool,Bool
1,2014-12-30,2014-12-30,3%,00:16:00,:|,Stressful day,72,0,16,1,0,0,0,0
2,2015-01-01,2015-01-02,72%,06:44:00,:),Drank coffee:Drank tea,68,0,404,0,1,1,0,0
3,2015-01-03,2015-01-03,83%,07:12:00,:),Drank coffee:Drank tea,60,0,432,0,1,1,0,0
4,2015-01-04,2015-01-05,78%,07:18:00,:),Ate late:Drank coffee,57,0,438,0,1,0,0,1
5,2015-01-05,2015-01-06,69%,07:27:00,:),Drank coffee:Drank tea:Worked out,56,0,447,0,1,1,0,0
6,2015-01-06,2015-01-07,74%,07:35:00,:|,Drank tea:Worked out,64,0,455,0,0,1,1,0
7,2015-01-07,2015-01-08,81%,09:19:00,:),Drank coffee:Drank tea:Stressful day,62,0,559,0,1,1,0,0
8,2015-01-08,2015-01-09,88%,07:16:00,:),Drank coffee:Drank tea,58,0,436,0,1,1,0,0
9,2015-01-09,2015-01-10,77%,09:01:00,:|,Drank coffee:Drank tea,65,0,541,0,1,1,0,0
10,2015-01-10,2015-01-11,89%,08:50:00,:|,Drank coffee:Drank tea,65,0,530,0,1,1,0,0


In [225]:
first(data, 10)

Unnamed: 0_level_0,Start,End,SleepQuality,Time_in_bed,Wake_up,Sleep_Notes,Heart_rate,Activity_(steps),Minutes_in_bed,Stressful_day,Drank_coffee,Drank_tea,Worked_out,Ate_late
Unnamed: 0_level_1,Date,Date,String,Time,String,String,Int64,Int64,Int64,Bool,Bool,Bool,Bool,Bool
1,2014-12-30,2014-12-30,3%,00:16:00,:|,Stressful day,72,0,16,1,0,0,0,0
2,2015-01-01,2015-01-02,72%,06:44:00,:),Drank coffee:Drank tea,68,0,404,0,1,1,0,0
3,2015-01-03,2015-01-03,83%,07:12:00,:),Drank coffee:Drank tea,60,0,432,0,1,1,0,0
4,2015-01-04,2015-01-05,78%,07:18:00,:),Ate late:Drank coffee,57,0,438,0,1,0,0,1
5,2015-01-05,2015-01-06,69%,07:27:00,:),Drank coffee:Drank tea:Worked out,56,0,447,0,1,1,0,0
6,2015-01-06,2015-01-07,74%,07:35:00,:|,Drank tea:Worked out,64,0,455,0,0,1,1,0
7,2015-01-07,2015-01-08,81%,09:19:00,:),Drank coffee:Drank tea:Stressful day,62,0,559,0,1,1,0,0
8,2015-01-08,2015-01-09,88%,07:16:00,:),Drank coffee:Drank tea,58,0,436,0,1,1,0,0
9,2015-01-09,2015-01-10,77%,09:01:00,:|,Drank coffee:Drank tea,65,0,541,0,1,1,0,0
10,2015-01-10,2015-01-11,89%,08:50:00,:|,Drank coffee:Drank tea,65,0,530,0,1,1,0,0


In [226]:
insertcols!(data, :"Wake_up_ord" => 1)

#data[!, "Wake_up"] = convert.(Int, data[:, "Wake_up"])

Unnamed: 0_level_0,Start,End,SleepQuality,Time_in_bed,Wake_up,Sleep_Notes,Heart_rate,Activity_(steps),Minutes_in_bed,Stressful_day,Drank_coffee,Drank_tea,Worked_out,Ate_late,Wake_up_ord
Unnamed: 0_level_1,Date,Date,String,Time,String,String,Int64,Int64,Int64,Bool,Bool,Bool,Bool,Bool,Int64
1,2014-12-30,2014-12-30,3%,00:16:00,:|,Stressful day,72,0,16,1,0,0,0,0,1
2,2015-01-01,2015-01-02,72%,06:44:00,:),Drank coffee:Drank tea,68,0,404,0,1,1,0,0,1
3,2015-01-03,2015-01-03,83%,07:12:00,:),Drank coffee:Drank tea,60,0,432,0,1,1,0,0,1
4,2015-01-04,2015-01-05,78%,07:18:00,:),Ate late:Drank coffee,57,0,438,0,1,0,0,1,1
5,2015-01-05,2015-01-06,69%,07:27:00,:),Drank coffee:Drank tea:Worked out,56,0,447,0,1,1,0,0,1
6,2015-01-06,2015-01-07,74%,07:35:00,:|,Drank tea:Worked out,64,0,455,0,0,1,1,0,1
7,2015-01-07,2015-01-08,81%,09:19:00,:),Drank coffee:Drank tea:Stressful day,62,0,559,0,1,1,0,0,1
8,2015-01-08,2015-01-09,88%,07:16:00,:),Drank coffee:Drank tea,58,0,436,0,1,1,0,0,1
9,2015-01-09,2015-01-10,77%,09:01:00,:|,Drank coffee:Drank tea,65,0,541,0,1,1,0,0,1
10,2015-01-10,2015-01-11,89%,08:50:00,:|,Drank coffee:Drank tea,65,0,530,0,1,1,0,0,1


In [227]:
i = 1
for emoticon in data.Wake_up
    if occursin(":)", emoticon)
        data[i, "Wake_up_ord"] = 2 
        i += 1
    elseif occursin(":|", emoticon)
        data[i, "Wake_up_ord"] = 1
        i += 1
    elseif occursin(":()", emoticon)
        data[i, "Wake_up_ord"] = 0
        i += 1
    end
end

In [228]:
insertcols!(data, :"Sleep_quality_num" => 1.0)

i = 1
for percentage in data.SleepQuality
    number = percentage[1:end-1]
    #println(parse(Float64, number))
    data[i, "Sleep_quality_num"] = parse(Float64, number)
    i += 1
end

insertcols!(data, :"Sleep_quality_ord" => 1)

i = 1
for num in data.Sleep_quality_num
    #println(num)
    if num >= 0.0 && num < 25.0
        data[i, "Sleep_quality_ord"] = 1
        i += 1
    elseif num >= 25.0 && num < 50.0
        data[i, "Sleep_quality_ord"] = 2
        i += 1
    elseif num >= 50.0 && num < 75.0
        data[i, "Sleep_quality_ord"] = 3
        i += 1
    elseif num >= 75.0
        data[i, "Sleep_quality_ord"] = 4
        i += 1
    end
end

In [229]:
first(data, 10)
#filter(row -> row.Wake_up_ord ∈ [1], data)

Unnamed: 0_level_0,Start,End,SleepQuality,Time_in_bed,Wake_up,Sleep_Notes,Heart_rate,Activity_(steps),Minutes_in_bed,Stressful_day,Drank_coffee,Drank_tea,Worked_out,Ate_late,Wake_up_ord,Sleep_quality_num,Sleep_quality_ord
Unnamed: 0_level_1,Date,Date,String,Time,String,String,Int64,Int64,Int64,Bool,Bool,Bool,Bool,Bool,Int64,Float64,Int64
1,2014-12-30,2014-12-30,3%,00:16:00,:|,Stressful day,72,0,16,1,0,0,0,0,1,3.0,1
2,2015-01-01,2015-01-02,72%,06:44:00,:),Drank coffee:Drank tea,68,0,404,0,1,1,0,0,2,72.0,3
3,2015-01-03,2015-01-03,83%,07:12:00,:),Drank coffee:Drank tea,60,0,432,0,1,1,0,0,2,83.0,4
4,2015-01-04,2015-01-05,78%,07:18:00,:),Ate late:Drank coffee,57,0,438,0,1,0,0,1,2,78.0,4
5,2015-01-05,2015-01-06,69%,07:27:00,:),Drank coffee:Drank tea:Worked out,56,0,447,0,1,1,0,0,2,69.0,3
6,2015-01-06,2015-01-07,74%,07:35:00,:|,Drank tea:Worked out,64,0,455,0,0,1,1,0,1,74.0,3
7,2015-01-07,2015-01-08,81%,09:19:00,:),Drank coffee:Drank tea:Stressful day,62,0,559,0,1,1,0,0,2,81.0,4
8,2015-01-08,2015-01-09,88%,07:16:00,:),Drank coffee:Drank tea,58,0,436,0,1,1,0,0,2,88.0,4
9,2015-01-09,2015-01-10,77%,09:01:00,:|,Drank coffee:Drank tea,65,0,541,0,1,1,0,0,1,77.0,4
10,2015-01-10,2015-01-11,89%,08:50:00,:|,Drank coffee:Drank tea,65,0,530,0,1,1,0,0,1,89.0,4


In [230]:
train, test = TrainTestSplit(data, .75)

([1m114×17 DataFrame[0m
[1m Row [0m│[1m Start      [0m[1m End        [0m[1m SleepQuality [0m[1m Time_in_bed [0m[1m Wake_up [0m[1m Sleep_Notes                       [0m[1m Heart_rate [0m[1m Activity_(steps) [0m[1m Minutes_in_bed [0m[1m Stressful_day [0m[1m Drank_coffee [0m[1m Drank_tea [0m[1m Worked_out [0m[1m Ate_late [0m[1m Wake_up_ord [0m[1m Sleep_quality_num [0m[1m Sleep_quality_ord [0m
[1m     [0m│[90m Date       [0m[90m Date       [0m[90m String       [0m[90m Time        [0m[90m String  [0m[90m String                            [0m[90m Int64      [0m[90m Int64            [0m[90m Int64          [0m[90m Bool          [0m[90m Bool         [0m[90m Bool      [0m[90m Bool       [0m[90m Bool     [0m[90m Int64       [0m[90m Float64           [0m[90m Int64             [0m
─────┼─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

In [239]:
train.Stressful_day = categorical(train.Stressful_day)
train.Drank_coffee = categorical(train.Drank_coffee)
train.Drank_tea = categorical(train.Drank_tea)
train.Worked_out = categorical(train.Worked_out)
train.Ate_late = categorical(train.Ate_late)
train.Wake_up_ord = categorical(train.Wake_up_ord)
train.Sleep_quality_ord = categorical(train.Sleep_quality_ord)

first(train, 5)

fm = @formula(Minutes_in_bed ~ Sleep_quality_ord + Wake_up_ord)
linearRegressor = lm(fm, train) 

println(r2(linearRegressor))

println(adjr2(linearRegressor))

println(linearRegressor)

0.5345353008121816
0.5218408090161502
StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

Minutes_in_bed ~ 1 + Sleep_quality_ord + Wake_up_ord

Coefficients:
─────────────────────────────────────────────────────────────────────────────────
                         Coef.  Std. Error      t  Pr(>|t|)  Lower 95%  Upper 95%
─────────────────────────────────────────────────────────────────────────────────
(Intercept)            16.0        42.3056   0.38    0.7060   -67.8397    99.8397
Sleep_quality_ord: 3  444.157      45.0952   9.85    <1e-16   354.789    533.525
Sleep_quality_ord: 4  475.812      44.6351  10.66    <1e-17   387.356    564.269
Wake_up_ord: 2        -22.4698     14.7222  -1.53    0.1298   -51.6457     6.7062
─────────────────────────────────────────────────────────────────────────────────
