**Setting Up Environment**

In [1]:
! pip install pycaret[full]

Collecting pycaret[full]
[?25l  Downloading https://files.pythonhosted.org/packages/30/4b/c2b856b18c0553238908f34d53e6c211f3cc4bfa13a8e8d522567a00b3d7/pycaret-2.3.0-py3-none-any.whl (261kB)
[K     |████████████████████████████████| 266kB 5.3MB/s 
Collecting yellowbrick>=1.0.1
[?25l  Downloading https://files.pythonhosted.org/packages/3a/15/58feb940b6a2f52d3335cccf9e5d00704ec5ba62782da83f7e2abeca5e4b/yellowbrick-1.3.post1-py3-none-any.whl (271kB)
[K     |████████████████████████████████| 276kB 29.7MB/s 
Collecting scikit-learn==0.23.2
[?25l  Downloading https://files.pythonhosted.org/packages/f4/cb/64623369f348e9bfb29ff898a57ac7c91ed4921f228e9726546614d63ccb/scikit_learn-0.23.2-cp37-cp37m-manylinux1_x86_64.whl (6.8MB)
[K     |████████████████████████████████| 6.8MB 28.7MB/s 
Collecting Boruta
[?25l  Downloading https://files.pythonhosted.org/packages/b2/11/583f4eac99d802c79af9217e1eff56027742a69e6c866b295cce6a5a8fc2/Boruta-0.3-py3-none-any.whl (56kB)
[K     |████████████████████

In [2]:
from pycaret.utils import enable_colab
enable_colab()

Colab mode enabled.


**Importing Dataset**

In [9]:
from pycaret.datasets import get_data
dataset = get_data('traffic')

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,Rush Hour,traffic_volume
0,,288.28,0.0,0.0,40,Clouds,1,5545
1,,289.36,0.0,0.0,75,Clouds,0,4516
2,,289.58,0.0,0.0,90,Clouds,0,4767
3,,290.13,0.0,0.0,90,Clouds,0,5026
4,,291.14,0.0,0.0,75,Clouds,0,4918


In [10]:
#check the shape of data
dataset.shape

(48204, 8)

In [11]:
data = dataset.sample(frac=0.7)
data_unseen = dataset.drop(data.index)

data.reset_index(drop=True, inplace=True)
data_unseen.reset_index(drop=True, inplace=True)

print('Data for Modeling: ' + str(data.shape))
print('Unseen Data For Predictions: ' + str(data_unseen.shape))

Data for Modeling: (33743, 8)
Unseen Data For Predictions: (14461, 8)


In [12]:
from pycaret.anomaly import *

exp_ano101 = setup(data)

Unnamed: 0,Description,Value
0,session_id,8240
1,Original Data,"(33743, 8)"
2,Missing Values,False
3,Numeric Features,5
4,Categorical Features,3
5,Ordinal Features,False
6,High Cardinality Features,False
7,High Cardinality Method,
8,Transformed Data,"(33743, 30)"
9,CPU Jobs,-1


**Create Model**


In [13]:
iforest = create_model('iforest')
print(iforest)

IForest(behaviour='new', bootstrap=False, contamination=0.05,
    max_features=1.0, max_samples='auto', n_estimators=100, n_jobs=-1,
    random_state=8240, verbose=0)


**Assign Anomaly Labels**

In [14]:
iforest_results = assign_model(iforest)
iforest_results.head()

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,Rush Hour,traffic_volume,Anomaly,Anomaly_Score
0,,248.61,0.0,0.0,75,Haze,1,5494,1,0.001494
1,,267.43,0.0,0.0,90,Clouds,1,3449,0,-0.081886
2,,276.5,0.0,0.0,40,Clouds,1,4468,0,-0.075454
3,,296.41,0.0,0.0,1,Clear,1,4717,0,-0.063563
4,,288.62,3.45,0.0,90,Rain,0,6354,1,0.025396


**Analyze Model**

In [15]:
plot_model(iforest)

In [16]:
plot_model(iforest, plot = 'umap')

In [17]:
unseen_predictions = predict_model(iforest, data=data_unseen)
unseen_predictions.head()

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,Rush Hour,traffic_volume,Anomaly,Anomaly_Score
0,,288.28,0.0,0.0,40,Clouds,1,5545,0,-0.07168
1,,284.63,0.0,0.0,1,Clear,0,506,0,-0.139766
2,,283.47,0.0,0.0,1,Clear,0,321,0,-0.137538
3,,278.12,0.0,0.0,1,Clear,1,6511,0,-0.030766
4,,291.97,0.0,0.0,1,Clear,0,5097,0,-0.137659


In [19]:
data_predictions = predict_model(iforest, data = data)
data_predictions.head()

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,Rush Hour,traffic_volume,Anomaly,Anomaly_Score
0,,248.61,0.0,0.0,75,Haze,1,5494,1,0.001494
1,,267.43,0.0,0.0,90,Clouds,1,3449,0,-0.081886
2,,276.5,0.0,0.0,40,Clouds,1,4468,0,-0.075454
3,,296.41,0.0,0.0,1,Clear,1,4717,0,-0.063563
4,,288.62,3.45,0.0,90,Rain,0,6354,1,0.025396


In [20]:
save_model(iforest,'traffic')

Transformation Pipeline and Model Succesfully Saved


(Pipeline(memory=None,
          steps=[('dtypes',
                  DataTypes_Auto_infer(categorical_features=[],
                                       display_types=True, features_todrop=[],
                                       id_columns=[], ml_usecase='regression',
                                       numerical_features=[],
                                       target='UNSUPERVISED_DUMMY_TARGET',
                                       time_features=[])),
                 ('imputer',
                  Simple_Imputer(categorical_strategy='most frequent',
                                 fill_value_categorical=None,
                                 fill_value_numerical=None...
                 ('fix_perfect', 'passthrough'),
                 ('clean_names', Clean_Colum_Names()),
                 ('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),
                 ('dfs', 'passthrough'), ('pca', 'passthrough'),
                 ['trained_model',
                  IFo

In [21]:
traffic=load_model('traffic')

Transformation Pipeline and Model Successfully Loaded


In [22]:
traffic


Pipeline(memory=None,
         steps=[('dtypes',
                 DataTypes_Auto_infer(categorical_features=[],
                                      display_types=True, features_todrop=[],
                                      id_columns=[], ml_usecase='regression',
                                      numerical_features=[],
                                      target='UNSUPERVISED_DUMMY_TARGET',
                                      time_features=[])),
                ('imputer',
                 Simple_Imputer(categorical_strategy='most frequent',
                                fill_value_categorical=None,
                                fill_value_numerical=None...
                ('fix_perfect', 'passthrough'),
                ('clean_names', Clean_Colum_Names()),
                ('feature_select', 'passthrough'), ('fix_multi', 'passthrough'),
                ('dfs', 'passthrough'), ('pca', 'passthrough'),
                ['trained_model',
                 IForest(behaviour='ne