# <span style='color:Red'> 1. Classification : Basics </span>

### <span style='color:DarkBlue'>1.1 Data loading</span>

#### Get the version of the pycaret

In [None]:
from pycaret.utils import version
version()

#### Loading dataset from pycaret

In [None]:
from pycaret.datasets import get_data

#### Get the list of datasets available in pycaret

In [None]:
# Internet connection is required
dataSets = get_data('index')
dataSets

#### Get diabetes dataset

In [None]:
# Internet connection is required
diabetes_df = get_data("diabetes")
# This is classification dataset. The values in Class variable is discrete (binary) values

#### Get the dimention of dataset

In [None]:
diabetes_df.shape

#### Remove duplicates

In [None]:
print(diabetes_df.shape)
diabetes_df.drop_duplicates()
print(diabetes_df.shape)

### <span style='color:DarkBlue'>1.2 Parameter setting for all regression models</span>
- Train/Test division
- Sampling
- Normalization
- Transformation
- PCA (Dimention Reduction)
- Handaling of Outliers
- Feature Selection

#### Setup parameters for classification models (defaults)

In [None]:
from pycaret.classification import *
clf = setup(data=diabetes_df, target='Class variable')

### <span style='color:DarkBlue'>1.3 Run and compare the Model Performance</span>

#### Comparing models

In [None]:
compare_models()
# Explore more parameters

### <span style='color:DarkBlue'>1.4 Plot the Best Model</span>

##### Plot AUC

In [None]:
lrModel = create_model('lr')
plot_model(lrModel, plot='auc')

##### Plot Precision Recall Curve

In [None]:
plot_model(lrModel, plot='pr')

##### Plot Confusion Matrix

In [None]:
plot_model(lrModel, plot='confusion_matrix')

##### Plot error

In [None]:
plot_model(lrModel, plot='error')

##### Plot Class Report

In [None]:
plot_model(lrModel, plot='class_report')

##### Plot Boundary

In [None]:
plot_model(lrModel, plot='boundary')

##### Plot Learning

In [None]:
plot_model(lrModel, plot='learning')

##### Plot Validation Curve

In [None]:
# Take long time and may show error
#plot_model(lrModel, plot='vc')

##### Plot Feature Importance

In [None]:
plot_model(lrModel, plot='feature')

##### Plot Feature Importance (All)

In [None]:
plot_model(lrModel, plot='feature_all')

### <span style='color:DarkBlue'>2.1 Model Performance using Data Normalization</span>

In [None]:
setup(data=diabetes_df, target='Class variable', normalize=True, normalize_method='zscore')
compare_models()

#normalize_method = {zscore, minmax, maxabs, robust}

### <span style='color:DarkBlue'>2.2 Model Performance using Feature Selection</span>

In [None]:
setup(data=diabetes_df, target='Class variable', feature_selection=True, feature_selection_threshold=0.8)
compare_models()

### <span style='color:DarkBlue'>2.3 Model Performance using Outlier Removal</span>

In [None]:
setup(data=diabetes_df, target='Class variable', remove_outliers=True, outliers_threshold=0.05)
compare_models()

### <span style='color:DarkBlue'>2.4 Model Performance using Transformation</span>

In [None]:
setup(data=diabetes_df, target='Class variable', transformation = True, transformation_method='yeo-johnson')
compare_models()

### <span style='color:DarkBlue'>2.5 Model Performance using PCA</span>

In [None]:
setup(data=diabetes_df, target='Class variable', pca = True, pca_method='linear')
compare_models()

### <span style='color:DarkBlue'>3.1 Running single model</span>

In [None]:
class_model_lr = create_model('lr', fold=10)
# Explore more parameters

#### Other models

In [None]:
# Create Other Models
lr     - Logistic Regression             
knn    - K Neighbors Classifier          
nb     - Naive Bayes             
dt     - Decision Tree Classifier                   
svm    - SVM - Linear Kernel               
rbfsvm - SVM - Radial Kernel               
gpc    - Gaussian Process Classifier                  
mlp    - MLP Classifier                  
ridge  - Ridge Classifier                
rf     - Random Forest Classifier                   
qda    - Quadratic Discriminant Analysis                  
ada    - Ada Boost Classifier                 
gbc    - Gradient Boosting Classifier                  
lda    - Linear Discriminant Analysis                  
et     - Extra Trees Classifier                   
xgboost  - Extreme Gradient Boosting              
lightgbm - Light Gradient Boosting Machine             
catboost - CatBoost Classifier       

### <span style='color:DarkBlue'>3.2 Parameter tuning for model</span>

In [None]:
class_model_lr_tuned = tune_model(class_model_lr, n_iter=20, optimize = 'Accuracy')
# Explore more parameters

### <span style='color:DarkBlue'>3.3 Save the trained model </span>

In [None]:
save_model(class_model_lr_tuned, 'LRModel')

### <span style='color:DarkBlue'>3.4 Load the model </span>

In [None]:
LRModel = load_model('LRModel')

### <span style='color:DarkBlue'>3.5 Make prediction on new dataset</span>

#### Read New Data

In [None]:
data = get_data("diabetes")

#### Select some data

In [None]:
# Select top 10 rows
new_data = data.iloc[:10]
new_data

#### Make prediction on new dataset

In [None]:
newPredictions = predict_model(LRModel, data = new_data)
newPredictions

### <span style='color:DarkBlue'>3.6 Save prediction results to csv</span>

In [None]:
newPredictions.to_csv("NewPredictions.csv")
# Email the result to the user

### <span style='color:DarkBlue'>3.7 Plot the Model</span>

In [None]:
Area Under the Curve         'auc'
Discrimination Threshold     'threshold'
Precision Recall Curve       'pr'
Confusion Matrix             'confusion_matrix'
Class Prediction Error       'error'
Classification Report        'class_report'
Decision Boundary            'boundary'
Recursive Feat. Selection    'rfe'
Learning Curve               'learning'
Manifold Learning            'manifold'
Calibration Curve            'calibration'
Validation Curve             'vc'
Dimension Learning           'dimension'
Feature Importance           'feature'
Model Hyperparameter         'parameter'

#### Create Linear Model

In [None]:
lrModel = create_model('lr')

##### Plot AUC

In [None]:
plot_model(lrModel, plot='auc')

##### Plot Precision Recall Curve

In [None]:
plot_model(lrModel, plot='pr')

##### Plot Confusion Matrix

In [None]:
plot_model(lrModel, plot='confusion_matrix')

##### Plot error

In [None]:
plot_model(lrModel, plot='error')

##### Plot Class Report

In [None]:
plot_model(lrModel, plot='class_report')

##### Plot Boundary

In [None]:
plot_model(lrModel, plot='boundary')

##### Plot Learning

In [None]:
plot_model(lrModel, plot='learning')

##### Plot Validation Curve

In [None]:
# Take long time and may show error
#plot_model(lrModel, plot='vc')

### <span style='color:DarkBlue'>3.8 Feature Importance</span>

##### Plot Feature Importance

In [None]:
plot_model(lrModel, plot='feature')

##### Plot Feature Importance (All)

In [None]:
plot_model(lrModel, plot='feature_all')