In [None]:
import numpy as np
from matplotlib import pyplot as plt
import sklearn

### Matplotlib Example

In [None]:
# plt.plot(listing of x coordinates, listing of y coordinates)
# plot -> continuous line, scatter -> points
plt.plot([1,2,3], [3,2,1])
plt.show()

In [None]:
plt.scatter([1,2,3], [3,2,1])
plt.show()

In [None]:
# together
plt.plot([1,2,3], [3,2,1])
plt.scatter([1,2,3], [3,2,1])
plt.show()

### Drawing a function

$-\left(\frac{2}{7}x^3 - \frac{9}{2}x^2 + 15x - 10\right)$

In [None]:
foo = lambda x: -(2/7*x**3-9/2*x**2+15*x-10.)
x_line = np.linspace(0, 10, 100)

# Quiz: Draw the function foo using x_line

y_line = foo(x_line)
plt.plot(x_line, y_line)
plt.show()


In [None]:
# Making 5 continuous points of foo in the domain [0, 10]
# with Gaussian noise where mu=0, sigma=0.1 and visualize.

sample_size = 10
x_sample = np.linspace(0, 10, sample_size)
np.random.seed(200)
y_sample = foo(x_sample) + np.random.normal(loc=0, scale=1, size=sample_size)

plt.scatter(x_sample, y_sample)
plt.show()

In [None]:
# put together?

plt.plot(x_line, y_line)
plt.scatter(x_sample, y_sample)
plt.show()

## Linear Regression

In the previous section, we first defined the target function,`foo`, and added Gaussian noise to the sampled data points. However, in real-world scenarios, the task is to infer the underlying data distribution function from noisy data points (training data) and predict the \( y \)-values for new data points. This time, we will work on the task of restoring the original function, `foo`, from the noisy samples generated earlier.









In [None]:
print(x_sample.shape)
print(y_sample.shape)

In [None]:
from sklearn.linear_model import LinearRegression

# Why is there an error?
lr = LinearRegression()
lr.fit(x_sample, y_sample)

In [None]:
# from sklearn.linear_model import LinearRegression

# lr = LinearRegression()
# lr.fit(x_sample.reshape(-1,1), y_sample)

In [None]:
# Now let's predict

y_hat = lr.predict(x_sample)
# y_hat = lr.predict(x_sample.reshape(-1,1))

In [None]:
print(f"X: {x_sample}")
print(f"y: {y_sample}")
print(f"y_hat: {y_hat}")

In [None]:
# Calculating Mean Square Error
mse = ((y_hat-y_sample)**2).sum() / x_sample.size
print(mse)

In [None]:
plt.plot(x_sample, y_hat)
plt.scatter(x_sample, y_sample, color='orange')
plt.show()

In [None]:
# # Adding Legend
# plt.plot(x_sample, y_hat, label='Predicted Function')  # Add label for the line
# plt.scatter(x_sample, y_sample, color='orange', label='Actual Data')  # Add label for the scatter points
# plt.legend()  # Display the legend
# plt.show()

### Iris Dataset

The Iris dataset is a well-known dataset in machine learning and statistics. It contains 150 samples from three species of Iris flowers: Iris setosa, Iris versicolor, and Iris virginica. Each sample includes four features: sepal length, sepal width, petal length, and petal width, all measured in centimeters. These features are used to classify the species of the iris flowers.

In [None]:
from sklearn.datasets import load_iris
X, y = load_iris(return_X_y=True)

In [None]:
print(X.shape)
print(y.shape)

### Train Test Split

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0)

### Classifiers

In [None]:
from sklearn.linear_model import LogisticRegression
logistic = LogisticRegression(random_state=1234)
logistic.fit(X_train[:,:2], y_train)

from sklearn.svm import SVC
svc = SVC(C=1.0, kernel='poly', degree=3)
svc.fit(X_train[:,:2], y_train)

from sklearn.tree import DecisionTreeClassifier as DTC
tree = DTC(max_depth=2, random_state=1234)
tree.fit(X_train[:, :2], y_train)

In [None]:
# Use the model's predict() to calculate the test accuracy.

y_svc = svc.predict(X_test[:, :2])
print(y_svc)
print((y_test == y_svc).mean())

In [None]:
# More simple way

print(svc.score(X_test[:, :2], y_test))
# print(tree.score(X_test[:, :2], y_test))
# print(logistic.score(X_test[:, :2], y_test))

In [None]:
y_logistic = logistic.predict(X_test[:, :2])
y_svc = svc.predict(X_test[:, :2])
y_tree = tree.predict(X_test[:, :2])

plt.figure(figsize=(20,5))

plt.subplot(141)
plt.title('Logistic Regression')
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_logistic)

plt.subplot(142)
plt.title('SVM')
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_svc)

plt.subplot(143)
plt.title('Decision Tree')
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_tree)

plt.subplot(144)
plt.title('Ground Truth')
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test)

plt.show()

# Adding Decision boundary Using AI504 Chatbot?