In [1]:
from pycaret.datasets import get_data
import altair as alt

# Data

In [2]:
df_data = get_data('satellite')

Unnamed: 0,Attribute1,Attribute2,Attribute3,Attribute4,Attribute5,Attribute6,Attribute7,Attribute8,Attribute9,Attribute10,...,Attribute28,Attribute29,Attribute30,Attribute31,Attribute32,Attribute33,Attribute34,Attribute35,Attribute36,Class
0,80,102,102,79,76,102,102,79,76,102,...,87,79,107,109,87,79,107,113,87,3
1,76,102,102,79,76,102,106,83,76,102,...,87,79,107,113,87,79,103,104,83,3
2,80,98,106,79,76,94,102,76,76,94,...,79,79,95,100,79,79,95,96,75,4
3,76,94,102,76,76,94,102,76,76,94,...,79,79,95,96,75,79,95,100,75,4
4,76,94,102,76,76,94,102,76,76,89,...,75,79,95,100,75,75,95,100,79,4


# PCA - improve predictions

In [3]:
def model_construction_evaluation(with_PCA):
    
    # Importing module and initializing setup 
    import pycaret.classification as pc_clf
    clf = pc_clf.setup(data = df_data, target = 'Class', 
                       data_split_shuffle=False, 
                       pca=with_PCA, 
                       silent=True, verbose=False)
    
    # train logistic regression model
    lr = pc_clf.create_model('lr', verbose=False)
    
    # evaluate model on test data
    print('Evaluation on test data')
    predictions = pc_clf.predict_model(lr)
    
    print('# attr transformed dataset = ', pc_clf.get_config('X').shape[1])
    
    return predictions

In [4]:
pred_nopca = model_construction_evaluation(False)

Evaluation on test data


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Logistic Regression,0.8156,0.9729,0.6908,0.8108,0.8103,0.7266,0.7275


# attr transformed dataset =  36


In [5]:
pred_pca = model_construction_evaluation(True)

Evaluation on test data


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
0,Logistic Regression,0.8685,0.9845,0.7901,0.8779,0.8721,0.8058,0.8062


# attr transformed dataset =  15


# PCA - visualization

In [7]:
pred_pca['Error'] = pred_pca['Class'] != pred_pca['Label'].astype(str)
pred_pca

Unnamed: 0,Component_1,Component_2,Component_3,Component_4,Component_5,Component_6,Component_7,Component_8,Component_9,Component_10,Component_11,Component_12,Component_13,Component_14,Component_15,Class,Label,Score,Error
0,-28.275118,47.174454,-8.012526,-5.108666,10.971801,0.587040,-3.856696,2.293972,-3.295930,-8.887474,1.542313,1.477528,0.887585,5.611454,5.118178,7,7,0.7284,False
1,-35.006004,21.630428,0.873397,4.747522,-3.447356,19.787781,-3.640810,6.382989,-7.608947,-11.689558,7.403757,-2.485934,-3.965992,-0.493570,-1.711767,7,4,0.5406,True
2,-47.942829,-14.815460,-0.063192,3.804026,-7.804442,12.443890,-6.532684,-4.870722,3.899764,2.118107,-0.605306,3.599617,4.501104,-0.545925,-2.590748,7,4,0.7262,True
3,-54.240395,-26.378593,-1.263925,1.546785,-2.695212,10.262909,-5.258085,1.913638,3.754922,1.003019,-2.454178,3.279565,-0.291866,-2.594531,2.973466,3,3,0.5080,False
4,-56.705162,-28.209408,-2.483271,-3.582956,5.024928,-1.688777,-0.358829,-5.786405,3.581427,2.886940,-2.582798,-6.898177,-2.554672,-2.930027,-1.365450,3,3,0.5828,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1926,24.973282,-41.749512,2.027667,16.442945,8.862819,2.267540,-22.577454,1.280477,-2.901271,5.935664,-10.995161,0.126785,-2.371190,-1.969614,-5.185268,5,1,0.9226,True
1927,2.269403,-39.940613,0.639725,14.891664,1.459452,-7.480066,-18.892725,-0.064636,-7.800798,3.387546,-16.209526,0.645534,-1.009785,3.689035,0.642979,5,1,0.9422,True
1928,-18.133480,-28.942589,-2.751947,15.828526,0.005419,-10.100183,-17.583099,-6.696681,0.763916,-0.352354,-14.084351,1.230318,-2.581653,4.382914,4.960061,4,1,0.6837,True
1929,-28.493469,-24.236900,-4.106470,13.067345,-1.066252,-1.614430,-2.040647,2.593337,0.531666,-6.732936,-7.979114,2.095236,-0.018583,5.312784,10.185766,4,4,0.6238,False


In [8]:
alt.Chart(pred_pca).mark_point().encode(
           x = 'Component_1:Q',
           y = 'Component_2:Q',
           color = 'Class:N',
           size = 'Error:N', 
           tooltip = ['Class', 'Label', 'Score']).interactive()