In [None]:
# Data Preprocessing Template

using DataFrames
using Statistics
using CSV
using CategoricalArrays
using MLLabelUtils
using MLDataUtils
using Random
using MLPreprocessing

Random.seed!(0)

# Importing the dataset
df = CSV.File("Data.csv") |> DataFrames.DataFrame!;

extract_features = Not([ :Purchased, :Country ])
predictions_for  = :Purchased
encode_one_hot   = [ :Country ]
categorical_f    = [ :Country, :Purchased ]
fill_missing     = [ :Age => mean, :Salary => mean ]
split_size       = 1.0 - 0.2

# Taking care of missing data
for (entry, callback) in fill_missing
    fill_with = callback(filter(a -> !ismissing(a), df[!, entry]))
    setproperty!(df, entry, map(a -> ismissing(a) ? fill_with : a, df[!, entry]))
end

# Encoding categorical data
categorical!(df, categorical_f);

X = Matrix(df[!, extract_features ]); # or df[!, 1:3] or : instead of ! for copy
y = df[!, predictions_for ];

# One-Hot encoding for columns
for entry in encode_one_hot
    onehot = convertlabel(LabelEnc.OneOfK, df[!, entry]);
    X = hcat(transpose(onehot), X);
end

# Splitting the dataset into the Training set and Test set
(X_train, y_train), (X_test, y_test) = splitobs(shuffleobs((X, y), obsdim = 1), at = split_size, obsdim = 1);

scaler = fit(StandardScaler, X_train, obsdim = 1, operate_on = collect(1:size(X_train)[2]))

X_train = transform(X_train, scaler);
X_test  = transform(X_test, scaler);

In [None]:
# Data Preprocessing Template

using DataFrames
using CSV
using Random
using MLDataUtils
using GLM

Random.seed!(0)

# Importing the dataset
df = CSV.File("Data.csv") |> DataFrames.DataFrame!;

(df_train, df_test) = splitobs(shuffleobs(df));

regressor = lm(@formula(Salary ~ YearsExperience), df_train)