# <span style='color:Red'> 1. Regression: Basics </span>

#### Get the version of the pycaret

In [None]:
from pycaret.utils import version
version()

#### Loading dataset from pycaret

In [None]:
from pycaret.datasets import get_data

#### Get the list of datasets available in pycaret

In [None]:
# Internet connection is required
dataSets = get_data('index')
dataSets

#### Get boston dataset

In [None]:
# Internet connection is required
boston_df = get_data("boston")
# This is regression dataset. The values in medv are continuous values

#### Get the dimention of dataset

In [None]:
print(boston_df.shape)

#### Remove duplicates

In [None]:
print(boston_df.shape)
boston_df.drop_duplicates()
print(boston_df.shape)

### <span style='color:DarkBlue'>1.2 Parameter setting for all regression models</span>
- Train/Test division
- Sampling
- Normalization
- Transformation
- PCA (Dimention Reduction)
- Handaling of Outliers
- Feature Selection

#### Setup parameters for regression models (defaults)

In [None]:
from pycaret.regression import *
reg = setup(data = boston_df, target='medv')

### <span style='color:DarkBlue'>1.3 Run and compare the Model Performance</span>

#### Comparing models

In [None]:
compare_models()
# Explore more parameters

### <span style='color:DarkBlue'>1.4 Plot the Best Model</span>

##### Plot Residuals

In [None]:
catboostModel = create_model('catboost')
plot_model(catboostModel, plot='residuals')

##### Plot Error (Scatter Plot)

In [None]:
plot_model(catboostModel, plot='error')

##### Plot Learning Curve

In [None]:
plot_model(catboostModel, plot='learning')

##### Plot Validation Curve

In [None]:
# Take long time and may show error
#plot_model(catboostModel, plot='vc')

### <span style='color:DarkBlue'>2.1 Model Performance using Data Normalization</span>

In [None]:
setup(data = boston_df, target = 'medv', normalize = True, normalize_method = 'zscore')
compare_models()

#normalize_method = {zscore, minmax, maxabs, robust}

### <span style='color:DarkBlue'>2.2 Model Performance using Feature Selection</span>

In [None]:
setup(data = boston_df, target = 'medv', feature_selection = True, feature_selection_threshold = 0.8)
compare_models()

### <span style='color:DarkBlue'>2.3 Model Performance using Outlier Removal</span>

In [None]:
setup(data = boston_df, target = 'medv', remove_outliers = True, outliers_threshold = 0.05)
compare_models()

### <span style='color:DarkBlue'>2.4 Model Performance using Transformation</span>

In [None]:
setup(data = boston_df, target = 'medv', transformation = True, transformation_method = 'yeo-johnson')
compare_models()

### <span style='color:DarkBlue'>2.5 Model Performance using PCA</span>

In [None]:
setup(data = boston_df, target = 'medv', pca = True, pca_method = 'linear')
compare_models()

### <span style='color:DarkBlue'>3.1 Running single model</span>

In [None]:
reg_model_catboost = create_model('catboost', fold=10)
# Explore more parameters

#### Other models

In [None]:
# Create Other Models
Linear Regression             'lr'                   linear_model.LinearRegression
Lasso Regression              'lasso'                linear_model.Lasso
Ridge Regression              'ridge'                linear_model.Ridge
Elastic Net                   'en'                   linear_model.ElasticNet
Least Angle Regression        'lar'                  linear_model.Lars
Lasso Least Angle Regression  'llar'                 linear_model.LassoLars
Orthogonal Matching Pursuit   'omp'                  linear_model.OMP
Bayesian Ridge                'br'                   linear_model.BayesianRidge
Automatic Relevance Determ.   'ard'                  linear_model.ARDRegression
Passive Aggressive Regressor  'par'                  linear_model.PAR
Random Sample Consensus       'ransac'               linear_model.RANSACRegressor
TheilSen Regressor            'tr'                   linear_model.TheilSenRegressor
Huber Regressor               'huber'                linear_model.HuberRegressor 
Kernel Ridge                  'kr'                   kernel_ridge.KernelRidge
Support Vector Machine        'svm'                  svm.SVR
K Neighbors Regressor         'knn'                  neighbors.KNeighborsRegressor 
Decision Tree                 'dt'                   tree.DecisionTreeRegressor
Random Forest                 'rf'                   ensemble.RandomForestRegressor
Extra Trees Regressor         'et'                   ensemble.ExtraTreesRegressor
AdaBoost Regressor            'ada'                  ensemble.AdaBoostRegressor
Gradient Boosting             'gbr'                  ensemble.GradientBoostingRegressor 
Multi Level Perceptron        'mlp'                  neural_network.MLPRegressor
Extreme Gradient Boosting     'xgboost'              xgboost.readthedocs.io
Light Gradient Boosting       'lightgbm'             github.com/microsoft/LightGBM
CatBoost Regressor            'catboost'             https://catboost.ai

### <span style='color:DarkBlue'>3.2 Parameter tuning for model</span>

In [None]:
reg_model_catboost_tuned = tune_model(reg_model_catboost, n_iter=10, optimize = 'mae')
# Explore more parameters

### <span style='color:DarkBlue'>3.3 Save the trained model </span>

In [None]:
save_model(reg_model_catboost_tuned, 'CatBoostModel')

### <span style='color:DarkBlue'>3.4 Load the model </span>

In [None]:
CatBoostModel = load_model('CatBoostModel')

### <span style='color:DarkBlue'>3.5 Make prediction on new dataset</span>

#### Read New Data

In [None]:
data = get_data("boston")

#### Select some data

In [None]:
# Select top 10 rows
new_data = data.iloc[:10]
new_data

#### Make prediction on new dataset

In [None]:
newPredictions = predict_model(CatBoostModel, data = new_data)
newPredictions

### <span style='color:DarkBlue'>3.6 Scatter plot b/w actual and predicted</span>

In [None]:
import matplotlib.pyplot as plt
actual = newPredictions.iloc[:,-2]
predicted = newPredictions.iloc[:,-1]
plt.scatter(actual, predicted)
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Actul Vs Predicted')
plt.savefig("result-scatter-plot-lss.jpg", dpi=300)
plt.show()

### <span style='color:DarkBlue'>3.7 Save prediction results to csv</span>

In [None]:
newPredictions.to_csv("NewPredictions.csv")
# Email the result to the user

### <span style='color:DarkBlue'>3.8 Plot the Model</span>

In [None]:
Residuals Plot               'residuals'
Prediction Error Plot        'error'
Cooks Distance Plot          'cooks'
Recursive Feat. Selection    'rfe'
Learning Curve               'learning'
Validation Curve             'vc'
Manifold Learning            'manifold'
Feature Importance           'feature'
Model Hyperparameter         'parameter'

#### Create RandomForest

In [None]:
rf = create_model('rf')

##### Plot Residuals

In [None]:
plot_model(rf, plot='residuals')

##### Plot Error

In [None]:
plot_model(rf, plot='error')

##### Plot Cooks Distance Plot

In [None]:
plot_model(rf, plot='cooks')

##### Plot Recursive Feature Selection

In [None]:
# Take 3-4 minutes
# plot_model(rf, plot='rfe')

##### Plot Learning Curve

In [None]:
plot_model(rf, plot='learning')

##### Plot Validation Curve

In [None]:
plot_model(rf, plot='vc')

##### Plot Manifold Learning

In [None]:
plot_model(rf, plot='manifold')

##### Plot Model Hyperparameter

In [None]:
plot_model(rf, plot='parameter')

### <span style='color:DarkBlue'>3.9 Feature Importance</span>

#### Feature Importance-1

In [None]:
model = create_model('catboost')
plot_model(model, plot='feature')

#### Feature Importance-2

In [None]:
model = create_model('et')
plot_model(model, plot='feature')

#### Feature Importance-3

In [None]:
model = create_model('lightgbm')
plot_model(model, plot='feature')

#### Feature Importance-4

In [None]:
model = create_model('gbr')
plot_model(model, plot='feature')

#### Feature Importance-5

In [None]:
model = create_model('xgboost')
plot_model(model, plot='feature')

#### Feature Importance-6

In [None]:
model = create_model('rf')
plot_model(model, plot='feature')