In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# installing modules/libraries
!pip install mlxtend
!pip install scikit-plot

In [None]:
# importing important modules/libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scikitplot.metrics as splt

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier


In [None]:
# loading and reading dataset
Dataset="/content/drive/Youdataset"
df=pd.read_csv(Dataset)

In [None]:
# displaying first 5 rows of original dataframe
df.head(5)

# **Randomization**

In [None]:
random_df=df.sample(frac=1)
# "sample" function will randomly samples the specified number of rows.
# frac=1 means all rows or 100% of rows.

In [None]:
# reseting the index of dataframe that has been previously shuffled.
random_df.reset_index(drop=True, inplace=True)
# "reset_index" funcion will resets the index of the dataframe to a sequential numeric index.
# "drop=True" will drop all the old index.
# "inplace=True" will make inchanges in the current dataframe instead of returning new dataframce.

In [None]:
# displaying first 5 rows of randomizied dataframe.
random_df.head(5)

In [None]:
random_df.isna().sum()

In [None]:
random_df['srv_count'].describe()

In [None]:
random_df.boxplot(column=['srv_count'])

# **Missing data imputation**

In [None]:
random_df['srv_count'] = random_df['srv_count'].fillna(random_df['srv_count'].median())
random_df['srv_count'].isna().sum()

# **Data Encoding**

In [None]:
encoding = pd.get_dummies(random_df[['class']])
encoding

In [None]:
encoded_df=pd.concat([random_df,encoding],axis=1)

In [None]:
encoded_df.drop(columns=['class','class_normal'],inplace=True)
encoded_df


# **Spliting randomized dataframe into test and train**

In [None]:
X = encoded_df.iloc[:,:-1] # assigning all rows and all columns to 'X' except last column and its rows.
y = encoded_df.iloc[:,-1]  # assigning last/target column and all its rows to 'y'

In [None]:
# spliting data into X_train, X_test, y_train and y_test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=43)

# **Providing data to Models**

In [None]:
# Decision Tree
clf = DecisionTreeClassifier()
clf = clf.fit(X_train,y_train)
dt_pred = clf.predict(X_test)
# print("Accuracy:",metrics.accuracy_score(y_test, dt_pred))
acc1 = accuracy_score(y_test ,dt_pred)
pre = precision_score(y_test ,dt_pred,average='weighted',labels=np.unique(dt_pred))
rec = recall_score(y_test ,dt_pred,average='weighted')
f1 = f1_score(y_test ,dt_pred,average='weighted')
print('ACCURACY : ',acc1)
print('PRECISON : ',pre)
print('RECALL : ',rec)
print('F1_SCORE : ',f1)

In [None]:
splt.plot_confusion_matrix(y_test, dt_pred)

# **Bagging/ Bootstrap Aggregation**

In [None]:
# Define the base estimator (in this case, a decision tree)
base_estimator = DecisionTreeClassifier()
# Define the bagging classifier
bagging = BaggingClassifier(base_estimator=base_estimator,
                            n_estimators=10,
                            max_samples=0.5,
                            max_features=0.5)
# Train the bagging classifier
bagging.fit(X, y)
# Make predictions using the bagging classifier
predictions = bagging.predict(X)
predictions #This will output an array of predicted labels for each data point in the dataset.
# To compare the predicted labels with the actual labels, you can also print the y variable that contains
# the true labels:

In [None]:
y

In [None]:
# Calculate the accuracy of the bagging classifier
accuracy = accuracy_score(y, predictions)

# Print the accuracy
print("Accuracy:", accuracy)

# **Voting / Stacking**

In [None]:
# Combining different kind of models and then applying aggregation.
# Define the base models
model1 = LogisticRegression()
model2 = KNeighborsClassifier()
model3 = DecisionTreeClassifier()

# Define the stacking classifier
stacking = StackingClassifier(estimators=[('lr', model1), ('knn', model2), ('dt', model3)],
                              final_estimator=LogisticRegression())

# Train the stacking classifier
stacking.fit(X_train, y_train)

# Make predictions using the stacking classifier
predictions = stacking.predict(X_test)

# Calculate the accuracy of the stacking classifier
accuracy = accuracy_score(y_test, predictions)

# Print the accuracy of the stacking classifier
print("Accuracy:", accuracy)

# **Boosting**

In [None]:
from sklearn.ensemble import AdaBoostClassifier

# create an AdaBoostClassifier object with a DecisionTreeClassifier base estimator
ada = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1), n_estimators=100, random_state=42)

# train the AdaBoostClassifier on the training set
ada.fit(X_train, y_train)

# make predictions on the test set
y_pred = ada.predict(X_test)

# calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)

# **Random Forest**

In [None]:
from sklearn.ensemble import RandomForestClassifier

# create a Random Forest classifier object
rf = RandomForestClassifier(n_estimators=100, max_depth=2, random_state=42)

# train the Random Forest classifier on the training set
rf.fit(X_train, y_train)

# make predictions on the test set
y_pred = rf.predict(X_test)

# calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)