# DECISION TREES

In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = DecisionTreeClassifier(criterion='gini')

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print(y_pred[0:10])

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

[1 0 2 1 1 0 1 2 1 1]
Accuracy: 1.0


<hr>

# RANDOM FOREST

In [4]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

clf = RandomForestClassifier(n_estimators=100, random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print(y_pred[0:10])

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

[1 0 2 1 1 0 1 2 1 1]
Accuracy: 1.0


<hr>

# BAGGING and BOOSTING

In [7]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier

iris = load_iris()
X = iris.data
y = iris.target

xtrain, xtest, ytrain, ytest = train_test_split(
    X, y, test_size=0.3, random_state=101)


# BAGGING
bag = BaggingClassifier(base_estimator=DecisionTreeClassifier(criterion='entropy'),
                        n_estimators=10, random_state=101)
bag.fit(xtrain, ytrain)
preds = bag.predict(xtest)
print(preds[0:10])

[0 0 0 1 1 2 1 1 2 0]




In [8]:
# ADABOOST
abo = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(criterion='entropy'),
                         n_estimators=10, random_state=101)
abo.fit(xtrain, ytrain)
preds = abo.predict(xtest)
print(preds[0:10])

[0 0 0 1 1 2 1 1 2 0]




In [9]:
# GRADIENT BOOST
gbo = GradientBoostingClassifier(n_estimators=10, random_state=101)
gbo.fit(xtrain, ytrain)
preds = gbo.predict(xtest)
print(preds[0:10])

[0 0 0 1 1 2 1 1 2 0]


In [11]:
import xgboost as xgb

model = xgb.XGBClassifier(objective='multi:softmax', num_class=3, seed=42)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 1.0


<hr>

# DIMENSIONALITY REDUCTION

In [12]:
import numpy as np
from sklearn.decomposition import PCA

data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]])

# PRINCIPAL COMPONENT ANALYSIS

pca = PCA(n_components=2)
pca.fit(data)
pca_transformed_data = pca.transform(data)

print("Original data shape:", data.shape)
print("Transformed data shape:", pca_transformed_data.shape)
print("Transformed data:", pca_transformed_data)

Original data shape: (4, 3)
Transformed data shape: (4, 2)
Transformed data: [[-7.79422863e+00 -1.66533454e-15]
 [-2.59807621e+00 -5.55111512e-16]
 [ 2.59807621e+00  5.55111512e-16]
 [ 7.79422863e+00  1.66533454e-15]]


In [None]:
# LINEAR DISCRIMIMNANT ANALYSIS

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

lda = LinearDiscriminantAnalysis(n_components=2)
lda.fit(data, [1, 0, 1])
lda_transformed_data = lda.transform(data)

print("Original data shape:", data.shape)
print("Transformed data shape:", lda_transformed_data.shape)
print("Transformed data:", lda_transformed_data)

In [13]:
# SINGULAR VALUE DECOMPOSITION

from sklearn.decomposition import TruncatedSVD

U, s, V = np.linalg.svd(data)

print("Original data shape:", data.shape)
print("Singular values:", s)
print("Left singular vectors (U):", U)
print("Right singular vectors (V):", V)


# SVD ALTERNATE
tsvd = TruncatedSVD(n_components=2)
tsvd.fit(data)
transformed_svd = tsvd.transform(data)
print(transformed_svd)

Original data shape: (4, 3)
Singular values: [2.54624074e+01 1.29066168e+00 2.40694596e-15]
Left singular vectors (U): [[-0.14087668 -0.82471435  0.53999635 -0.09167299]
 [-0.34394629 -0.42626394 -0.65166613  0.52472017]
 [-0.54701591 -0.02781353 -0.31665681 -0.77442137]
 [-0.75008553  0.37063688  0.42832658  0.34137419]]
Right singular vectors (V): [[-0.50453315 -0.5745157  -0.64449826]
 [ 0.76077568  0.05714052 -0.64649464]
 [-0.40824829  0.81649658 -0.40824829]]
[[ 3.58705934  1.06442721]
 [ 8.75770068  0.55016253]
 [13.92834202  0.03589786]
 [19.09898335 -0.47836682]]


<hr>

# BACKPROPAGATION

In [None]:
import numpy as np


def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def sigmoid_derivative(x):
    return x * (1 - x)


X = np.array([[0, 0, 1],
              [0, 1, 1],
              [1, 0, 1],
              [1, 1, 1]])

y = np.array([[0],
              [1],
              [1],
              [0]])

# RANDOM WEIGHTS
w1 = 2 * np.random.random((3, 4)) - 1
w2 = 2 * np.random.random((4, 1)) - 1

num_iterations = 60000

for i in range(num_iterations):

    # Forward propagation
    layer_0 = X
    layer_1 = sigmoid(np.dot(layer_0, w1))
    layer_2 = sigmoid(np.dot(layer_1, w2))

    # Calculate error
    layer_2_error = y - layer_2

    # Backpropagation
    layer_2_delta = layer_2_error * sigmoid_derivative(layer_2)
    layer_1_error = layer_2_delta.dot(w1.T)
    layer_1_delta = layer_1_error * sigmoid_derivative(layer_1)

    # Update weights
    w1 += layer_1.T.dot(layer_2_delta)
    w2 += layer_0.T.dot(layer_1_delta)

print("Output after training:")
print(layer_2)


<hr>