In [5]:
# Step 1: Download the dataset
!gdown --id 1v-uxWEgTI0GDCOTZOX3shUMkTf1a_CL7 -O dataset.csv

# Step 2: Import the necessary libraries and load the data
import pandas as pd
import numpy as np
from sklearn.linear_model import SGDRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Load the data
data = pd.read_csv("dataset.csv")

# Drop rows with NaN values
data = data.dropna()

# Step 3: Separate features and target variable
X = data.drop(columns=['Target'])  # Replace 'target' with the actual name of your target column
y = data['Target']                 # Replace 'target' with the actual name of your target column

# Step 4: Convert dataframe X and series y into arrays
X_array = X.values
y_array = y.values

# Step 5: Split the dataset using train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_array, y_array, test_size=0.3, random_state=10)

# Step 6: Check the shape of X_train and X_test (no additional reshaping needed since it has 10 features)
print("Shape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)

# Step 7: Initialize SGDRegressor and fit it using partial_fit
sgd_regressor = SGDRegressor(random_state=10)
for i in range(5):  # Looping 5 times as per the question about the 5th iteration
    sgd_regressor.partial_fit(X_train, y_train)

# Get the intercept and coefficients after training
intercept = sgd_regressor.intercept_
coefficients = sgd_regressor.coef_

# Step 8: Calculate evaluation metrics
y_pred = sgd_regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Displaying the results
print(f"Number of features in the dataset: {X.shape[1]}")
print(f"Intercept after training: {intercept}")
print(f"Coefficient for 'feature-3': {coefficients[2]}")
print(f"R2 score on test data: {r2}")
print(f"Coefficient for 'feature-5' after 5th iteration: {coefficients[4]}")


Downloading...
From: https://drive.google.com/uc?id=1v-uxWEgTI0GDCOTZOX3shUMkTf1a_CL7
To: /content/dataset.csv
100% 9.49M/9.49M [00:00<00:00, 201MB/s]
Shape of X_train: (63000, 10)
Shape of X_test: (27000, 10)
Number of features in the dataset: 10
Intercept after training: [0.00858904]
Coefficient for 'feature-3': 81.2538457066686
R2 score on test data: 0.9999919892315331
Coefficient for 'feature-5' after 5th iteration: 76.46446678410382


In [6]:
from sklearn import datasets
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC

def compute_GridSearchCV(kernel_params, reg_params):
    # Load the Iris dataset
    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    # Split the dataset into train and test sets with a 70:30 ratio
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

    # Set up the parameter grid for GridSearchCV
    param_grid = {
        'kernel': kernel_params,
        'C': reg_params,
        'gamma': ['auto']
    }

    # Initialize the model
    model = SVC(random_state=0)

    # Initialize GridSearchCV with 4-fold cross-validation
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=4)

    # Fit the model on the training data
    grid_search.fit(X_train, y_train)

    # Return the best score
    return grid_search.best_score_

# Define parameters
kernels = ['linear', 'rbf']
regularization = [1, 15, 25]

# Call the function with given parameters
best_score = compute_GridSearchCV(kernels, regularization)
print(f"Mean cross-validated score of the best model: {best_score:.4f}")


Mean cross-validated score of the best model: 0.9808


  _data = np.array(data, dtype=dtype, copy=copy,


In [7]:
# Step 1: Download the dataset
!gdown --id 1qUa1GlG4X4ZY_4E0e7jPR-z7AG7NIDbE -O Social_Network_Ads.csv

# Step 2: Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the dataset
data = pd.read_csv("Social_Network_Ads.csv")

# Step 3: Split the data into features (X) and target (y)
X = data.drop(columns=['Purchased'])  # Replace 'Purchased' with the actual target column name
y = data['Purchased']

# Step 4: Split the dataset into training and testing sets with a 75:25 ratio
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

# Step 5: Standardize the feature matrix
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 6: Initialize and train the linear SVM model
model = SVC(kernel='linear', random_state=0)
model.fit(X_train, y_train)

# Step 7: Make predictions on the test set
y_pred = model.predict(X_test)

# Step 8: Calculate accuracy score
accuracy = accuracy_score(y_test, y_pred)

# Step 9: Calculate the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Display the results
print(f"Accuracy Score: {accuracy:.4f}")
print("Confusion Matrix:")
print(conf_matrix)

Downloading...
From: https://drive.google.com/uc?id=1qUa1GlG4X4ZY_4E0e7jPR-z7AG7NIDbE
To: /content/Social_Network_Ads.csv
100% 4.90k/4.90k [00:00<00:00, 16.3MB/s]
Accuracy Score: 0.9000
Confusion Matrix:
[[66  2]
 [ 8 24]]


In [8]:
from sklearn.datasets import fetch_openml
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

# Step 1: Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)

# Step 2: Split the dataset into training and test data
X = mnist.data
y = mnist.target

# Using the first 20,000 samples as training data and the next 5,000 as test data
X_train, y_train = X[:20000], y[:20000]
X_test, y_test = X[20000:25000], y[20000:25000]

# Step 3: Create a pipeline with MinMaxScaler and SVC with specified parameters
pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    ('svc', SVC(kernel='linear', decision_function_shape='ovr', class_weight=None, random_state=0))
])

# Step 4: Fit the pipeline on the training data
pipeline.fit(X_train, y_train)

# Step 5: Predict on the test data
y_pred = pipeline.predict(X_test)

# Step 6: Calculate the confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Calculate the sum of the main diagonal elements of the confusion matrix
diagonal_sum = conf_matrix.trace()

# Step 7: Calculate precision, recall, and F1 score
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
f1 = f1_score(y_test, y_pred, average='macro')

# Display the results
print(f"Sum of the main diagonal elements of the confusion matrix: {diagonal_sum}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

Sum of the main diagonal elements of the confusion matrix: 4623
Precision: 0.9240
Recall: 0.9234
F1 Score: 0.9233


In [9]:
from sklearn.datasets import fetch_openml
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

# Step 1: Load the MNIST dataset
mnist = fetch_openml('mnist_784', version=1)
X = mnist.data
y = mnist.target

# Step 2: Split the data into training and test sets with a 50:50 ratio and random_state=42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)

# Step 3: Create a pipeline with StandardScaler and SVM classifier with specified parameters
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svc', SVC(kernel='poly', degree=3, decision_function_shape='ovr', class_weight='balanced', C=10, random_state=0))
])

# Step 4: Train the model on the training data
pipeline.fit(X_train, y_train)

# Step 5: Make predictions on the test data
y_pred = pipeline.predict(X_test)

# Step 6: Generate the classification report
report = classification_report(y_test, y_pred, output_dict=True)
weighted_f1_score = report['weighted avg']['f1-score']

# Display the results
print(f"Weighted average F1 score: {weighted_f1_score:.4f}")
print("Full Classification Report:")
print(classification_report(y_test, y_pred))

Weighted average F1 score: 0.9723
Full Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.98      0.99      3463
           1       0.99      0.99      0.99      3927
           2       0.96      0.97      0.96      3520
           3       0.98      0.96      0.97      3551
           4       0.96      0.98      0.97      3333
           5       0.97      0.97      0.97      3144
           6       0.98      0.98      0.98      3490
           7       0.98      0.97      0.97      3718
           8       0.96      0.96      0.96      3344
           9       0.96      0.96      0.96      3510

    accuracy                           0.97     35000
   macro avg       0.97      0.97      0.97     35000
weighted avg       0.97      0.97      0.97     35000

