In [3]:
import numpy as np
import pandas as pd
import seaborn as sns

In [4]:
from sklearn.datasets import load_iris

In [6]:
df = sns.load_dataset("iris")

In [8]:
df = df.sample(frac=1)

In [20]:
df.isna().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [16]:
col = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']

### Checking Skew

In [19]:
for i in col:
    print(f'{i} is {df[i].skew()}')

sepal_length is 0.3149109566369726
sepal_width is 0.31896566471359966
petal_length is -0.27488417975101254
petal_width is -0.10296674764897927


In [21]:
def odiqr(df):
    q1 = df.quantile(0.25)
    q3 = df.quantile(0.75)
    iqr = q3 - q1
    low = q1 - (1.5 * iqr)
    high = q3 + (1.5 * iqr)
    m = df.mean()
    df = df.apply(lambda x : m if x < low else (m if x > high else x ) )
    return df

In [22]:
def odmsd(df):
    m = round(df.mean(),2)
    s = round(df.std(),2)
    low = round(m-(3*s),2)
    high = round(m+(3*s),2)
    ft1 = df[df<low]
    ft2 = df[df>high]
    df = df.map(lambda x : low if x < low else (high if x > high else x ))

In [23]:
for i in col:
    if df[i].skew() <= 0.5:
        odmsd(df[i])
    else:
        df[i] = odiqr(df[i])

### After Skew

In [24]:
for i in col:
    print(f'{i} is {df[i].skew()}')

sepal_length is 0.3149109566369726
sepal_width is 0.31896566471359966
petal_length is -0.27488417975101254
petal_width is -0.10296674764897927


## Encoding With Label Encoder Of df["species"]

In [25]:
from sklearn.preprocessing import LabelEncoder

In [26]:
le = LabelEncoder()

In [29]:
le.classes_

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [35]:
df = pd.concat([df,pd.DataFrame(le.fit_transform(df["species"]),columns=["SPECIES"])],axis=1).drop("species",axis=1)

In [36]:
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,SPECIES
0,5.1,3.5,1.4,0.2,2
1,4.9,3.0,1.4,0.2,2
2,4.7,3.2,1.3,0.2,1
3,4.6,3.1,1.5,0.2,1
4,5.0,3.6,1.4,0.2,1
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,1
146,6.3,2.5,5.0,1.9,2
147,6.5,3.0,5.2,2.0,1
148,6.2,3.4,5.4,2.3,0


In [37]:
X = df.drop("SPECIES",axis=1)

In [39]:
X.sample()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
83,6.0,2.7,5.1,1.6


In [40]:
y = df["SPECIES"]

In [41]:
y.sample()

105    2
Name: SPECIES, dtype: int64

In [217]:
from sklearn.model_selection import train_test_split

In [218]:
Xtrain,Xtest,ytrain,ytest = train_test_split(X,y,test_size=.20)

In [219]:
X.shape , Xtrain.shape , Xtest.shape

((150, 4), (120, 4), (30, 4))

In [220]:
y.shape , ytrain.shape , ytest.shape

((150,), (120,), (30,))

In [221]:
from sklearn.ensemble import RandomForestClassifier

In [222]:
rfc = RandomForestClassifier()

In [223]:
rfc.fit(Xtrain,ytrain)

RandomForestClassifier()

In [224]:
rfc.score(Xtest,ytest)

0.3

### With Feature Scaling

In [225]:
from sklearn.preprocessing import StandardScaler

In [226]:
ss= StandardScaler()

In [227]:
ss.fit(Xtrain)

StandardScaler()

In [228]:
Xtrain_ss = ss.transform(Xtrain)

In [229]:
Xtest_ss = ss.transform(Xtest)

In [230]:
rfc1 = RandomForestClassifier()

In [231]:
rfc1.fit(Xtrain_ss,ytrain)

RandomForestClassifier()

In [232]:
rfc1.score(Xtest_ss,ytest)

0.4

In [240]:
from sklearn.ensemble import BaggingClassifier
from sklearn.svm import SVC

In [234]:
bagcal = BaggingClassifier(SVC(),n_estimators=75)

In [235]:
bagcal.fit(Xtrain,ytrain)

BaggingClassifier(base_estimator=SVC(), n_estimators=75)

In [236]:
bagcal.score(Xtest,ytest)

0.3

In [237]:
bagcal1 = BaggingClassifier(SVC(),n_estimators=75)

In [238]:
bagcal1.fit(Xtrain_ss,ytrain)

BaggingClassifier(base_estimator=SVC(), n_estimators=75)

In [239]:
bagcal1.score(Xtest_ss,ytest)

0.3

In [249]:
from sklearn.linear_model import LogisticRegression

In [243]:
bagcal2 = BaggingClassifier(LogisticRegression(max_iter=1000),n_estimators=75)

In [244]:
bagcal2.fit(Xtrain,ytrain)

BaggingClassifier(base_estimator=LogisticRegression(max_iter=1000),
                  n_estimators=75)

In [245]:
bagcal2.score(Xtest,ytest)

0.3

In [246]:
bagcal3 = BaggingClassifier(LogisticRegression(max_iter=1000),n_estimators=75)

In [247]:
bagcal3.fit(Xtrain_ss,ytrain)

BaggingClassifier(base_estimator=LogisticRegression(max_iter=1000),
                  n_estimators=75)

In [248]:
bagcal3.score(Xtest_ss,ytest)

0.3