# Regression

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge,SGDRegressor,HuberRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error,r2_score

### Load datasets

In [None]:

# Load the Boston Housing dataset
california_housing = fetch_california_housing()
X, y = california_housing.data, california_housing.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Ordinary Least Squares (OLS) Regression



### Linear regression

In [None]:
ols_model = LinearRegression()
ols_model.fit(X_train, y_train)
ols_pred = ols_model.predict(X_test)
ols_mse = mean_squared_error(y_test, ols_pred)



In [None]:
# Ridge Regression
ridge_model = Ridge(alpha=1.0)
ridge_model.fit(X_train, y_train)
ridge_pred = ridge_model.predict(X_test)
ridge_mse = mean_squared_error(y_test, ridge_pred)

print("OLS Mean Squared Error:", ols_mse)
print("Ridge Mean Squared Error:", ridge_mse)

OLS Mean Squared Error: 0.5558915986952422
Ridge Mean Squared Error: 0.5558034669932196


In [None]:
huber_regressor = HuberRegressor(epsilon=1.35)  # Epsilon is the threshold parameter
huber_regressor.fit(X_train, y_train)

# Predict on the testing set
y_pred = huber_regressor.predict(X_test)

# Evaluate the model using Mean Squared Error (MSE) and Mean Absolute Error (MAE)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Huber Regressor Mean Squared Error (MSE):", mse)
print("Huber Regressor Mean Absolute Error (MAE):", mae)

### SGD Regression

In [None]:
'''Let's play around with different loss functions and different regularizers and see how the model can react.
Don't worry you cannot break the california housing dataset'''

# Initialize SGDRegressor with different parameters
# Below are some common parameters you can tune:

# Loss function: 'squared_loss' (default), 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive'
# 'squared_loss': Ordinary least squares (OLS) loss
# 'huber': Huber loss, a combination of MSE and MAE that is less sensitive to outliers
# 'epsilon_insensitive': Linear Support Vector Regression (SVR)
# 'squared_epsilon_insensitive': SVR with squared hinge loss
loss_functions = ['squared_error', 'huber', 'epsilon_insensitive', 'squared_epsilon_insensitive']

# Penalty term:  'l2', 'l1', or 'elasticnet'
penalty_terms = ['elasticnet', 'l1', 'l2']
# Learning rate schedule: 'constant' (default), 'optimal', 'invscaling', 'adaptive'
# 'constant': a constant learning rate given by the 'eta0' parameter
# 'optimal': an optimal learning rate is chosen via theoretical analysis
# 'invscaling': gradually decreases the learning rate 'eta0' at each time step 't' using an inverse scaling exponent of 'power_t'
# 'adaptive': the learning rate is kept constant as long as the training loss decreases, then it is decreased by 'eta0' to improve convergence
learning_rate_schedules = ['constant', 'optimal', 'invscaling', 'adaptive']
penalty = penalty_terms[1]
loss = loss_functions[3]
lr_schedule = learning_rate_schedules[0]
# Initialize lists to store results

            # Initialize SGDRegressor with current parameters
sgd_regressor = SGDRegressor(loss=loss, penalty=penalty, learning_rate=lr_schedule, random_state=42,max_iter = 10)

            # Fit the model on the training data
sgd_regressor.fit(X_train, y_train)

            # Make predictions on the test set
y_pred = sgd_regressor.predict(X_test)

            # Calculate evaluation metrics
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

 # Store results
result = []
result.append((loss, penalty, lr_schedule, mse, mae))

# Print the results
print(result)
print("Loss function:", result[0][0])
print("Penalty term:", result[0][1])
print("Learning rate schedule:", result[0][2])
print("Mean Squared Error (MSE):", result[0][3])
print("Mean Absolute Error (MAE):", result[0][4])

[('squared_epsilon_insensitive', 'l1', 'constant', 2.123072719133613e+30, 1338361073750148.5)]
Loss function: squared_epsilon_insensitive
Penalty term: l1
Learning rate schedule: constant
Mean Squared Error (MSE): 2.123072719133613e+30
Mean Absolute Error (MAE): 1338361073750148.5


# Classification

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score,precision_score,recall_score,f1_score

### Load dataset

In [None]:


# Load the Iris dataset
iris = load_breast_cancer()
X, y = iris.data, iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Logistic Regression
logistic_model = LogisticRegression(max_iter=100000)
logistic_model.fit(X_train, y_train)
logistic_pred = logistic_model.predict(X_test)
logistic_accuracy = accuracy_score(y_test, logistic_pred)

print("Logistic Regression Accuracy:", logistic_accuracy)


Logistic Regression Accuracy: 0.956140350877193


### Support Vector Machine

In [None]:

kernels = ['linear','poly','rbf','sigmoid']
kernel = kernels[2]
C_values = [0.1, 1, 10]
svm_classifier = SVC(kernel=kernel,C=C_values[0],max_iter=100)  # You can adjust kernel and other parameters

# Train the classifier
svm_classifier.fit(X_train, y_train)

# Predict on the testing set
y_pred = svm_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)


precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')




In [None]:
print(f"Kernel: {kernel}")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-score: {f1}")

Kernel: rbf
Accuracy: 0.9385964912280702
Precision: 0.9441070625281152
Recall: 0.9385964912280702
F1-score: 0.937318446911604


### Train Vs Test

In [None]:
training_accuracy = svm_classifier.score(X_train,y_train)
print("Training Accuracy: ",training_accuracy*100,"%")
testing_accuracy = svm_classifier.score(X_test, y_test)
print("testing accuracy ",testing_accuracy,"%")

Training Accuracy:  87.6923076923077 %
testing accuracy  0.9385964912280702 %
