## Read the data with DataFrames package

In [None]:
using DataFrames 
data = readtable("../sample10k.csv")
size(data)
showcols(data)

## View the data table

In [None]:
head(data)

## Split into train/test

In [None]:
using Random
function partitionTrainTest(data, at = 0.6)
    n = nrow(data)
    idx = shuffle(1:n)
    train_idx = view(idx, 1:floor(Int, at*n))
    test_idx = view(idx, (floor(Int, at*n)+1):n)
    data[train_idx,:], data[test_idx,:]
end

train,test = partitionTrainTest(data, 0.6)

## Visuzalize with Histogram

In [None]:
using Plots, StatsPlots 
pyplot()  
Plots.histogram(train[:f1],bins=50,xlabel="Feature #1",labels="Frequency")   

## Visualize with Boxplot

In [None]:
 Plots.boxplot(train[:f1], xlabel="Feature #1")

## Import Machine Learning libraries

In [None]:
using ScikitLearn: fit!, predict, @sk_import, fit_transform! 
 @sk_import preprocessing: LabelEncoder 
 @sk_import model_selection: cross_val_score  
 @sk_import metrics: roc_auc_score 
 @sk_import linear_model: LogisticRegression 
 @sk_import ensemble: RandomForestClassifier 
 @sk_import tree: DecisionTreeClassifier 


 function classification_model(model, predictors) 
     y = convert(Array, train[:2]) 
     X = convert(Array, train[predictors]) 
     X2 = convert(Array, test[predictors])                  
     Y2 = convert(Array, test[:2]) 
    
     #Fit the model: 
     fit!(model, X, y) 

     #return auc 
     pred = predict(model, X2) 
     auc = roc_auc_score(Y2,pred)
     return auc 
 end

## Random Forest Training & AUC Calculation

In [None]:
rf = RandomForestClassifier(n_estimators=500, max_depth=5, n_jobs=-1)
predictors = [:f1, :f2, :f3, :f4, :f5, :f6, :f7, :f8, :f9, :f10, :f11, :f12 ]
classification_model(rf, predictors)

## Feature Importances

In [None]:
labels=[]
print("Feature\tImportance\n")
for i in range(1,stop=length(predictors))
    push!(labels,i)
    print(predictors[i], "\t", rf.feature_importances_[i],"\n")
end

## Visualize Feature Importances

In [None]:
Plots.bar(labels, rf.feature_importances_, label="Importance")   