# Annotations Overview

In [1]:
function example(x::Int64)
    
end

example (generic function with 1 method)

In [2]:
function mean_and_inc(x::Array)
    newarray::Array{Int64} = x .+ 1
    mean::Float64 = sum(x) / length(x)
    return(mean::Float64, newarray::Array{Int64})
end

mean_and_inc (generic function with 1 method)

# Annotate everything?
How does Julia react when we annotate everything? Is this something important to consider and likely do whenever we go about writing some code in Julia? Today I wanted to compare a relatively complex model with and without these annotations.

In [3]:
"""
      Linear Regression is a well-known linear function used for predicting
      continuous features with a mostly linear or semi-linear slope.\n
      --------------------\n
      ==PARAMETERS==\n
     [y] <- Fill with your trainY values. Should be an array of shape (0,1) or (1,0)\n
     [x] <- Fill in with your trainX values. Should be an array of shape (0,1) or (1,0)\n
     --------------------\n
     ==Functions==\n
     predict(xt) <- Returns a prediction from the model based on the xtrain value passed (xt)
       """
mutable struct LinearRegression{A<:AbstractFloat, B<:AbstractFloat, P<:Function}
    a::A
    b::B
    predict::P
    function LinearRegression(x::Array,y::Array)
        # a = ((∑y)(∑x^2)-(∑x)(∑xy)) / (n(∑x^2) - (∑x)^2)
        # b = (x(∑xy) - (∑x)(∑y)) / n(∑x^2) - (∑x)^2
        if length(x) != length(y)
            throw(ArgumentError("The array shape does not match!"))
        end
        Σx::Float64 = sum(x)
        Σy::Float64 = sum(y)
        xy::Array = x .* y
        Σxy::Float64 = sum(xy)
        x2::Array{Float64} = x .^ 2
        Σx2::Float64 = sum(x2)
        n::Int64 = length(x)
        # Calculate a
        a::Float64 = (((Σy) * (Σx2)) - ((Σx * (Σxy)))) / ((n * (Σx2))-(Σx^2))
        b::Float64 = ((n*(Σxy)) - (Σx * Σy)) / ((n * (Σx2)) - (Σx ^ 2))
        predict(xt::Array) = (xt = [i = a + (b * i) for i in xt]::Array)
        return new{Float64, Float64, Function}(a::Float64, b::Float64, predict::Function)
    end
end

LinearRegression

In [4]:
"""
      Linear Regression is a well-known linear function used for predicting
      continuous features with a mostly linear or semi-linear slope.\n
      --------------------\n
      ==PARAMETERS==\n
     [y] <- Fill with your trainY values. Should be an array of shape (0,1) or (1,0)\n
     [x] <- Fill in with your trainX values. Should be an array of shape (0,1) or (1,0)\n
     --------------------\n
     ==Functions==\n
     predict(xt) <- Returns a prediction from the model based on the xtrain value passed (xt)
       """
mutable struct LinearRegressionNA
    a
    b
    predict
    function LinearRegressionNA(x, y)
        # a = ((∑y)(∑x^2)-(∑x)(∑xy)) / (n(∑x^2) - (∑x)^2)
        # b = (x(∑xy) - (∑x)(∑y)) / n(∑x^2) - (∑x)^2
        if length(x) != length(y)
            throw(ArgumentError("The array shape does not match!"))
        end
        Σx = sum(x)
        Σy = sum(y)
        xy = x .* y
        Σxy = sum(xy)
        x2 = x .^ 2
        Σx2 = sum(x2)
        n = length(x)
        # Calculate a
        a = (((Σy) * (Σx2)) - ((Σx * (Σxy)))) / ((n * (Σx2))-(Σx^2))
        b = ((n*(Σxy)) - (Σx * Σy)) / ((n * (Σx2)) - (Σx ^ 2))
        predict(xt) = (xt = [i = a + (b * i) for i in xt])
        return new(a, b, predict)
    end
end

LinearRegressionNA

In [5]:
mutable struct LinearRegressionNVA{A<:AbstractFloat, B<:AbstractFloat, P<:Function}
    a::A
    b::B
    predict::P
    function LinearRegressionNVA(x::Array, y::Array)
        # a = ((∑y)(∑x^2)-(∑x)(∑xy)) / (n(∑x^2) - (∑x)^2)
        # b = (x(∑xy) - (∑x)(∑y)) / n(∑x^2) - (∑x)^2
        if length(x) != length(y)
            throw(ArgumentError("The array shape does not match!"))
        end
        Σx = sum(x)
        Σy = sum(y)
        xy = x .* y
        Σxy = sum(xy)
        x2 = x .^ 2
        Σx2 = sum(x2)
        n = length(x)
        # Calculate a
        a = (((Σy) * (Σx2)) - ((Σx * (Σxy)))) / ((n * (Σx2))-(Σx^2))
        b = ((n*(Σxy)) - (Σx * Σy)) / ((n * (Σx2)) - (Σx ^ 2))
        predict(xt::Array) = (xt = [i = a + (b * i) for i in xt])
        return new{Float64, Float64, Function}(a::Float64, b::Float64, predict::Function)
    end
end

In [6]:
mutable struct LinearRegressionRA{A<:AbstractFloat, B<:AbstractFloat, P<:Function}
    a::A
    b::B
    predict::P
    function LinearRegressionRA(x::Array,y::Array)
        # a = ((∑y)(∑x^2)-(∑x)(∑xy)) / (n(∑x^2) - (∑x)^2)
        # b = (x(∑xy) - (∑x)(∑y)) / n(∑x^2) - (∑x)^2
        if length(x) != length(y)
            throw(ArgumentError("The array shape does not match!"))
        end
        Σx::Float64 = sum(x)
        Σy::Float64 = sum(y)
        xy::Array = x .* y
        Σxy::Float64 = sum(xy)
        x2::Array{Float64} = x .^ 2
        Σx2::Float64 = sum(x2)
        n::Int64 = length(x)
        # Calculate a
        a::Float64 = (((Σy) * (Σx2)) - ((Σx * (Σxy)))) / ((n * (Σx2))-(Σx^2))
        b::Float64 = ((n*(Σxy)) - (Σx * Σy)) / ((n * (Σx2)) - (Σx ^ 2))
        predict(xt::Array) = (xt = [i = a + (b * i) for i in xt]::Array)
        return new{Float64, Float64, Function}(a, b, predict)
    end
end

In [7]:
mutable struct LinearRegressionNFA
    a
    b
    predict
    function LinearRegressionNFA(x::Array, y::Array)
        # a = ((∑y)(∑x^2)-(∑x)(∑xy)) / (n(∑x^2) - (∑x)^2)
        # b = (x(∑xy) - (∑x)(∑y)) / n(∑x^2) - (∑x)^2
        if length(x) != length(y)
            throw(ArgumentError("The array shape does not match!"))
        end
        Σx = sum(x)
        Σy = sum(y)
        xy = x .* y
        Σxy = sum(xy)
        x2 = x .^ 2
        Σx2 = sum(x2)
        n = length(x)
        # Calculate a
        a = (((Σy) * (Σx2)) - ((Σx * (Σxy)))) / ((n * (Σx2))-(Σx^2))
        b = ((n*(Σxy)) - (Σx * Σy)) / ((n * (Σx2)) - (Σx ^ 2))
        predict(xt::Array) = (xt = [i = a + (b * i) for i in xt])
        return new(a::Float64, b::Float64, predict::Function)
    end
end

In [8]:
x = randn(50000000)

50000000-element Vector{Float64}:
  0.6633069614939759
  0.4156659206954698
 -0.5659964657209032
 -1.0471406687176998
  0.4501469257757367
 -1.0287395908363681
 -0.820501930364141
 -1.611218950404726
 -0.2903289821813945
 -0.7574910986473768
  0.13498916130150704
 -0.5206425203421942
 -1.369309926937457
  ⋮
  2.297516681377142
 -1.272848724044494
 -0.6807819724661306
  0.6459817289243162
 -0.3778089842895379
 -0.9753492610293001
 -1.0915261285587894
 -0.7164055598978412
  0.9766895032823073
  0.46841190861493803
 -1.5664692671850065
 -0.4079230116067243

In [9]:
y = randn(50000000)

50000000-element Vector{Float64}:
  0.46387035230452794
 -0.5900015563804207
  0.5069795191843152
  2.042457737142603
  0.6915274813749784
  0.30943280943515056
 -0.4475394220711997
  0.7471308042653596
 -1.2542272050132308
  0.37519485475058717
 -2.2044430561461166
  0.6226436747338088
 -1.121236156550528
  ⋮
  0.0749669386972949
  0.25947834583697127
 -0.23740712196497574
  1.419040518385935
 -0.5357882807001885
  0.20458262501709573
 -0.45869344471370177
  0.6051138154255613
  0.9116256326934371
  0.9845269389129089
 -0.9707227199076656
  0.8280097428680043

### Due to precompilation, these cells need to be re-ordered, with the annotation currently tested at the top.

In [10]:
@time LinearRegressionNFA(x, y).predict(y)

  1.004386 seconds (514.88 k allocations: 1.144 GiB, 14.45% gc time, 19.73% compilation time)


50000000-element Vector{Float64}:
 -0.00011134813461067394
 -0.00015301699680844052
 -0.00010964364859167011
 -4.8932636423583957e-5
 -0.00010234683809503484
 -0.00011745441435772198
 -0.00014738421070416145
 -0.00010014834776602669
 -0.0001792797017852147
 -0.00011485426003013244
 -0.0002168501247490422
 -0.00010507042321286016
 -0.00017402139133376352
  ⋮
 -0.0001267249205523944
 -0.00011942955539970429
 -0.0001390758254210278
 -7.358182383052553e-5
 -0.0001508734673197117
 -0.00012160006797076693
 -0.00014782522770880022
 -0.00010576353333928252
 -9.364441471540783e-5
 -9.076198236488916e-5
 -0.00016807026615555306
 -9.695048916522143e-5

In [11]:
@time LinearRegressionRA(x, y).predict(y)

  0.819315 seconds (93.98 k allocations: 1.122 GiB, 6.94% gc time, 15.14% compilation time)


50000000-element Vector{Float64}:
 -0.00011134813461067394
 -0.00015301699680844052
 -0.00010964364859167011
 -4.8932636423583957e-5
 -0.00010234683809503484
 -0.00011745441435772198
 -0.00014738421070416145
 -0.00010014834776602669
 -0.0001792797017852147
 -0.00011485426003013244
 -0.0002168501247490422
 -0.00010507042321286016
 -0.00017402139133376352
  ⋮
 -0.0001267249205523944
 -0.00011942955539970429
 -0.0001390758254210278
 -7.358182383052553e-5
 -0.0001508734673197117
 -0.00012160006797076693
 -0.00014782522770880022
 -0.00010576353333928252
 -9.364441471540783e-5
 -9.076198236488916e-5
 -0.00016807026615555306
 -9.695048916522143e-5

In [12]:
@time LinearRegressionNVA(x, y).predict(y)

  0.850578 seconds (89.39 k allocations: 1.122 GiB, 6.44% gc time, 13.51% compilation time)


50000000-element Vector{Float64}:
 -0.00011134813461067394
 -0.00015301699680844052
 -0.00010964364859167011
 -4.8932636423583957e-5
 -0.00010234683809503484
 -0.00011745441435772198
 -0.00014738421070416145
 -0.00010014834776602669
 -0.0001792797017852147
 -0.00011485426003013244
 -0.0002168501247490422
 -0.00010507042321286016
 -0.00017402139133376352
  ⋮
 -0.0001267249205523944
 -0.00011942955539970429
 -0.0001390758254210278
 -7.358182383052553e-5
 -0.0001508734673197117
 -0.00012160006797076693
 -0.00014782522770880022
 -0.00010576353333928252
 -9.364441471540783e-5
 -9.076198236488916e-5
 -0.00016807026615555306
 -9.695048916522143e-5

In [13]:
@time LinearRegression(x, y).predict(y)

  0.835412 seconds (92.70 k allocations: 1.122 GiB, 6.86% gc time, 12.77% compilation time)


50000000-element Vector{Float64}:
 -0.00011134813461067394
 -0.00015301699680844052
 -0.00010964364859167011
 -4.8932636423583957e-5
 -0.00010234683809503484
 -0.00011745441435772198
 -0.00014738421070416145
 -0.00010014834776602669
 -0.0001792797017852147
 -0.00011485426003013244
 -0.0002168501247490422
 -0.00010507042321286016
 -0.00017402139133376352
  ⋮
 -0.0001267249205523944
 -0.00011942955539970429
 -0.0001390758254210278
 -7.358182383052553e-5
 -0.0001508734673197117
 -0.00012160006797076693
 -0.00014782522770880022
 -0.00010576353333928252
 -9.364441471540783e-5
 -9.076198236488916e-5
 -0.00016807026615555306
 -9.695048916522143e-5

In [14]:
@time LinearRegressionNA(x, y).predict(y)

  0.812969 seconds (88.03 k allocations: 1.122 GiB, 8.19% gc time, 13.09% compilation time)


50000000-element Vector{Float64}:
 -0.00011134813461067394
 -0.00015301699680844052
 -0.00010964364859167011
 -4.8932636423583957e-5
 -0.00010234683809503484
 -0.00011745441435772198
 -0.00014738421070416145
 -0.00010014834776602669
 -0.0001792797017852147
 -0.00011485426003013244
 -0.0002168501247490422
 -0.00010507042321286016
 -0.00017402139133376352
  ⋮
 -0.0001267249205523944
 -0.00011942955539970429
 -0.0001390758254210278
 -7.358182383052553e-5
 -0.0001508734673197117
 -0.00012160006797076693
 -0.00014782522770880022
 -0.00010576353333928252
 -9.364441471540783e-5
 -9.076198236488916e-5
 -0.00016807026615555306
 -9.695048916522143e-5

In [15]:
no_annot = [0.982570, 1.006735, 1.012081, 0.962322]

4-element Vector{Float64}:
 0.98257
 1.006735
 1.012081
 0.962322

In [16]:
sum(no_annot) / length(no_annot)

0.990927

In [17]:
f_annot = [0.980571, 0.915812, 0.938984, 0.913891]

4-element Vector{Float64}:
 0.980571
 0.915812
 0.938984
 0.913891

In [18]:
sum(f_annot) / length(f_annot)

0.9373144999999999

In [19]:
e_annot = [0.936667, 0.955871, 0.954231, 0.955191]

4-element Vector{Float64}:
 0.936667
 0.955871
 0.954231
 0.955191

In [20]:
sum(e_annot) / length(e_annot)

0.9504900000000001

In [21]:
r_annot = [0.993342, 0.969020, 0.958454, 0.965281]

4-element Vector{Float64}:
 0.993342
 0.96902
 0.958454
 0.965281

In [22]:
sum(r_annot) / length(r_annot)

0.97152425

In [23]:
fa_annot = [1.047707, 0.903389, 0.898407, 0.946624]

4-element Vector{Float64}:
 1.047707
 0.903389
 0.898407
 0.946624

In [24]:
sum(fa_annot) / length(fa_annot)

0.94903175

In [37]:
using UnicodePlots, DataFrames

In [26]:
averages = DataFrame(:all_annotations => [sum(f_annot) / length(f_annot)],
    :no_annotations => [sum(no_annot) / length(no_annot)],
    :no_varannotations => [sum(e_annot) / length(e_annot)],
    :no_returnannotations => [sum(r_annot) / length(r_annot)],
    :no_fieldannotations => [sum(fa_annot) / length(fa_annot)]
)

Unnamed: 0_level_0,all_annotations,no_annotations,no_varannotations,no_returnannotations,no_fieldannotations
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64
1,0.937314,0.990927,0.95049,0.971524,0.949032


In [28]:
values = DataFrame(:all_annotations => f_annot,
    :no_annotations => no_annot,
    :no_varannotations => e_annot,
    :no_returnannotations => r_annot,
    :no_fieldannotations => fa_annot
)

Unnamed: 0_level_0,all_annotations,no_annotations,no_varannotations,no_returnannotations,no_fieldannotations
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Float64
1,0.980571,0.98257,0.936667,0.993342,1.04771
2,0.915812,1.00673,0.955871,0.96902,0.903389
3,0.938984,1.01208,0.954231,0.958454,0.898407
4,0.913891,0.962322,0.955191,0.965281,0.946624


In [38]:
histogram(values[!, :all_annotations], title = "Frequency of different times with all annotations")

                [1mFrequency of different times with all annotations[0m 
                [38;5;8m┌                                        ┐[0m 
   [0.9 , 0.95) [38;5;8m┤[0m[38;5;2m█████████████████████████████████████[0m[38;5;2m [0m 3[38;5;8m [0m [38;5;8m[0m
   [0.95, 1.0 ) [38;5;8m┤[0m[38;5;2m████████████[0m[38;5;2m▍[0m 1                         [38;5;8m [0m [38;5;8m[0m
                [38;5;8m└                                        ┘[0m 
                                 Frequency                 

In [39]:
histogram(values[!, :no_fieldannotations], title = "Frequency of different times with no field annotations")

                [1mFrequency of different times with no field annotations[0m 
                [38;5;8m┌                                        ┐[0m 
   [0.85, 0.9 ) [38;5;8m┤[0m[38;5;2m██████████████████[0m[38;5;2m▌[0m 1                   [38;5;8m [0m [38;5;8m[0m
   [0.9 , 0.95) [38;5;8m┤[0m[38;5;2m█████████████████████████████████████[0m[38;5;2m [0m 2[38;5;8m [0m [38;5;8m[0m
   [0.95, 1.0 ) [38;5;8m┤[0m[38;5;2m[0m[38;5;2m [0m 0                                     [38;5;8m [0m [38;5;8m[0m
   [1.0 , 1.05) [38;5;8m┤[0m[38;5;2m██████████████████[0m[38;5;2m▌[0m 1                   [38;5;8m [0m [38;5;8m[0m
                [38;5;8m└                                        ┘[0m 
                                 Frequency                 

In [40]:
histogram(values[!, :no_annotations], title = "Frequency of different times with no annotations")

                [1mFrequency of different times with no annotations[0m 
                [38;5;8m┌                                        ┐[0m 
   [0.96, 0.98) [38;5;8m┤[0m[38;5;2m██████████████████[0m[38;5;2m▌[0m 1                   [38;5;8m [0m [38;5;8m[0m
   [0.98, 1.0 ) [38;5;8m┤[0m[38;5;2m██████████████████[0m[38;5;2m▌[0m 1                   [38;5;8m [0m [38;5;8m[0m
   [1.0 , 1.02) [38;5;8m┤[0m[38;5;2m█████████████████████████████████████[0m[38;5;2m [0m 2[38;5;8m [0m [38;5;8m[0m
                [38;5;8m└                                        ┘[0m 
                                 Frequency                 

In [32]:
using UnicodePlots

In [33]:
function route_from_dir(dir)
    dirs = readdir(dir)
    routes = []
    for directory in dirs
        if isfile("$dir/" * directory)
            push!(routes, "$dir/$directory")
        else
            if ~(directory in routes)
                newread = dir * "/$directory"
                newrs = route_from_dir(newread)
                [push!(routes, r) for r in newrs]
            end
        end
    end
    routes
end

route_from_dir (generic function with 1 method)

In [34]:
function route_from_dira(dir::String)
    dirs::Vector{String} = readdir(dir)
    routes::Vector{String} = []
    for directory in dirs
        if isfile("$dir/" * directory)
            push!(routes, "$dir/$directory")
        else
            if ~(directory in routes)
                newread::String = dir * "/$directory"
                newrs::Vector{String} = route_from_dir(newread)
                [push!(routes, r) for r in newrs]
            end
        end
    end
    routes::Vector{String}
end

route_from_dira (generic function with 1 method)

In [35]:
@time route_from_dira(".")

  0.021062 seconds (8.58 k allocations: 799.109 KiB)


1130-element Vector{String}:
 "./.ipynb_checkpoints/10 Julia array methods-checkpoint.ipynb"
 "./.ipynb_checkpoints/15 Python Decorator-checkpoint.ipynb"
 "./.ipynb_checkpoints/7 Of My Favorite Pandas Tricks-checkpoint.ipynb"
 "./.ipynb_checkpoints/Algebraic Arrays-checkpoint.ipynb"
 "./.ipynb_checkpoints/All about constructors-checkpoint.ipynb"
 "./.ipynb_checkpoints/Analyzing Features-checkpoint.ipynb"
 "./.ipynb_checkpoints/Beauty of method errors-checkpoint.ipynb"
 "./.ipynb_checkpoints/Better functions-checkpoint.ipynb"
 "./.ipynb_checkpoints/Bring a model back with data-checkpoint.ipynb"
 "./.ipynb_checkpoints/Comprehensive Julia Tutorial 15-checkpoint.ipynb"
 "./.ipynb_checkpoints/Different paradigms of Julia-checkpoint.ipynb"
 "./.ipynb_checkpoints/Functions as types in Julia-checkpoint.ipynb"
 "./.ipynb_checkpoints/Julia Indexing-checkpoint.ipynb"
 ⋮
 "./oddframes testing.ipynb"
 "./pycall basic examples.ipynb"
 "./pycallcomparison.ipynb"
 "./python_recursionisslow.ipynb"
 "./

In [36]:
@time route_from_dir(".")

  0.005813 seconds (8.24 k allocations: 785.062 KiB)


1130-element Vector{Any}:
 "./.ipynb_checkpoints/10 Julia array methods-checkpoint.ipynb"
 "./.ipynb_checkpoints/15 Python Decorator-checkpoint.ipynb"
 "./.ipynb_checkpoints/7 Of My Favorite Pandas Tricks-checkpoint.ipynb"
 "./.ipynb_checkpoints/Algebraic Arrays-checkpoint.ipynb"
 "./.ipynb_checkpoints/All about constructors-checkpoint.ipynb"
 "./.ipynb_checkpoints/Analyzing Features-checkpoint.ipynb"
 "./.ipynb_checkpoints/Beauty of method errors-checkpoint.ipynb"
 "./.ipynb_checkpoints/Better functions-checkpoint.ipynb"
 "./.ipynb_checkpoints/Bring a model back with data-checkpoint.ipynb"
 "./.ipynb_checkpoints/Comprehensive Julia Tutorial 15-checkpoint.ipynb"
 "./.ipynb_checkpoints/Different paradigms of Julia-checkpoint.ipynb"
 "./.ipynb_checkpoints/Functions as types in Julia-checkpoint.ipynb"
 "./.ipynb_checkpoints/Julia Indexing-checkpoint.ipynb"
 ⋮
 "./oddframes testing.ipynb"
 "./pycall basic examples.ipynb"
 "./pycallcomparison.ipynb"
 "./python_recursionisslow.ipynb"
 "./ran