In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from matplotlib import pyplot as plt

In [None]:
# Load the iris dataset
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df

In [None]:
print(df.describe())

In [None]:
iris.target_names

In [None]:
# Check for missing values
print(df.isnull().sum())

In [None]:
iris.target.shape

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

df['target'] = iris.target
sns.pairplot(df,hue='target')
plt.show()

# Linear Regression

In statistics, linear regression is a linear approach to modelling the relationship between a dependent variable and one or more independent variables. Let X be the independent variable and Y be the dependent variable. We will define a linear relationship between these two variables as follows:

In [None]:
X = df['petal length (cm)'].values
y = df['petal width (cm)'].values

In [None]:
print(type(X))
print(type(y))
print(X.shape)
print(y.shape)


In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
plt.scatter(X_train, y_train)
plt.show()

In [None]:
# Building the model
m = 0
c = 0

L = 0.0001  # The learning Rate
epochs = 1000  # The number of iterations to perform gradient descent

n = float(len(X_train)) # Number of elements in X

# Performing Gradient Descent 
losses = []
for i in range(epochs): 
    y_pred = m*X_train + c  # The current predicted value of Y

    residuals = y_train - y_pred
    loss = np.sum(residuals ** 2)
    losses.append(loss)
    D_m = (-2/n) * sum(X_train * residuals)  # Derivative wrt m
    D_c = (-2/n) * sum(residuals)  # Derivative wrt c
    m = m - L * D_m  # Update m
    c = c - L * D_c  # Update c
    if i % 100 == 0:
        print(np.mean(y_train-y_pred))
    

In [None]:
# Visualize the loss
plt.plot(losses)

In [None]:
# Run the model on the test set
pred = []
for X in X_test:
    y_pred = m*X + c
    pred.append(y_pred)
print(pred)
print(y_test)

In [None]:
# Calculate the Mean Absolue Error
print("Mean Absolute Error:", np.mean(np.abs(y_pred - y_test)))

# Calculate the Mean Squared Error
print("Mean Squared Error:", np.mean((y_pred - y_test)**2))


In [None]:
# Making predictions
y_pred = m*X_test + c

plt.scatter(X_test, y_test)
plt.plot([min(X_test), max(X_test)], [min(y_pred), max(y_pred)], color='red') # predicted
plt.show()

In [None]:
'''
Your task:

Create a LinearRegression class
(Use the OOP Skeleton attached)

Use the class to train and evaluate
a model in the following columns of iris dataset:
# X petal width
# y sepal length

'''

In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from matplotlib import pyplot as plt

from LinearRegressionSkeleton import LinearRegression


# Load the iris dataset
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
X = df['petal length (cm)'].values
y = df['petal width (cm)'].values

X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size=0.2, random_state=42)



linReg = LinearRegression()
linReg.fit(X_train, Y_train)
linReg.predict(X_test), Y_test



1.1833333333333333
-0.037049758617444216
-0.06751312148702707
-0.06608896243537175
-0.06388378574669487
-0.0617306814483276
-0.059649566643510905
-0.05763859655374806
-0.05569542202508642
-0.05381775788059819


([1.5719142799291432,
  0.5345177185631234,
  2.3326717582642242,
  1.5027545091714085,
  1.6064941653080103,
  0.4653579478053888,
  1.1915355407616026,
  1.7102338214446122,
  1.5027545091714085,
  1.2952751968982046,
  1.7102338214446122,
  0.43077806242652145,
  0.39619817704765414,
  0.4653579478053888,
  0.4653579478053888,
  1.5719142799291432,
  1.9522930190966834,
  1.2952751968982046,
  1.5027545091714085,
  1.883133248338949,
  0.49993783318425616,
  1.6410740506868777,
  0.49993783318425616,
  1.883133248338949,
  2.1597723313698878,
  1.7448137068234797,
  1.9522930190966834,
  1.9868729044755509,
  0.43077806242652145,
  0.49993783318425616],
 array([1.2, 0.3, 2.3, 1.5, 1.4, 0.4, 1.3, 2.3, 1.5, 1.2, 2. , 0.1, 0.2,
        0.1, 0.3, 1.6, 2.2, 1.1, 1.3, 2.2, 0.2, 1.8, 0.4, 2.1, 2. , 2.3,
        1.8, 2.3, 0.3, 0.2]))