In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from sklearn.metrics import precision_score
from sklearn.pipeline import Pipeline

# Step 1: Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Step 2: Drop rows where the class is "Iris-setosa" (class = 0)
X_filtered = X[y != 0]
y_filtered = y[y != 0]

# Step 3: Split the dataset into train and test sets (75:25 ratio) with random_state=0
X_train, X_test, y_train, y_test = train_test_split(X_filtered, y_filtered, test_size=0.25, random_state=0)

# Step 4: Create a pipeline with MinMaxScaler and SVC classifier
pipeline = Pipeline([
    ('Scaler', MinMaxScaler()),
    ('classifier', SVC())
])

# Step 5: Train the model
pipeline.fit(X_train, y_train)

# Step 6: Make predictions on the test set
y_pred = pipeline.predict(X_test)

# Step 7: Calculate the precision score
precision = precision_score(y_test, y_pred, average='macro')

# Display the precision score
print(f"Precision score: {precision:.4f}")

Precision score: 0.9286


In [2]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

def compute_score(X_train, y_train, X_test, y_test):
    # Initialize the SVM model with the specified parameters
    model = SVC(kernel='sigmoid', C=25, gamma='auto', random_state=42)

    # Train the model on the training data
    model.fit(X_train, y_train)

    # Make predictions on the test data
    y_pred = model.predict(X_test)

    # Compute the accuracy score on the test set
    accuracy = accuracy_score(y_test, y_pred)

    return accuracy

# Step 1: Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Step 2: Split the dataset into train and test sets with 70:30 ratio and random_state=42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Call the compute_score function and print the result
accuracy = compute_score(X_train, y_train, X_test, y_test)
print(f"Accuracy score on the test data: {accuracy:.4f}")

Accuracy score on the test data: 0.2889


In [3]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

def compute_score(X_train, y_train, X_test, y_test):
    # Initialize the SVM model with the specified parameters
    model = SVC(kernel='poly', C=10, gamma='auto', random_state=42)

    # Train the model on the training data
    model.fit(X_train, y_train)

    # Make predictions on the test data
    y_pred = model.predict(X_test)

    # Compute the accuracy score on the test set
    accuracy = accuracy_score(y_test, y_pred)

    return accuracy

# Step 1: Load the Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Step 2: Split the dataset into train and test sets with 70:30 ratio and random_state=42
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Step 3: Call the compute_score function and print the result
accuracy = compute_score(X_train, y_train, X_test, y_test)
print(f"Accuracy score on the test data: {accuracy:.4f}")

Accuracy score on the test data: 1.0000


In [1]:
# Step 1: Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Step 2: Load the Iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Labels

# Step 3: Split the dataset into training and test sets (80:20 ratio, random_state=10)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

# Step 4: Scale the data using Normalizer
normalizer = Normalizer()
X_train_normalized = normalizer.fit_transform(X_train)
X_test_normalized = normalizer.transform(X_test)

# Step 5: Evaluate KNN classifier for different values of k
k_values = [2, 3, 4]
best_k = None
best_accuracy = 0

for k in k_values:
    # Initialize KNN classifier with current k value
    knn = KNeighborsClassifier(n_neighbors=k)

    # Train the classifier
    knn.fit(X_train_normalized, y_train)

    # Make predictions on the test set
    y_pred = knn.predict(X_test_normalized)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)

    print(f"Accuracy for k={k}: {accuracy:.4f}")

    # Track the best accuracy and corresponding k value
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_k = k

print(f"The best k value is: {best_k} with accuracy: {best_accuracy:.4f}")

Accuracy for k=2: 0.9667
Accuracy for k=3: 0.9667
Accuracy for k=4: 0.9667
The best k value is: 2 with accuracy: 0.9667


In [2]:
# Step 1: Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import f1_score

# Step 2: Load the Iris dataset
iris = load_iris()
X = iris.data  # Features
y = iris.target  # Labels

# Step 3: Split the dataset into training and test sets (80:20 ratio, random_state=10)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=10)

# Step 4: Scale the data using Normalizer
normalizer = Normalizer()
X_train_normalized = normalizer.fit_transform(X_train)
X_test_normalized = normalizer.transform(X_test)

# Step 5: Initialize KNN classifier for k=3
k = 3
knn = KNeighborsClassifier(n_neighbors=k)

# Step 6: Train the classifier
knn.fit(X_train_normalized, y_train)

# Step 7: Make predictions on the test set
y_pred = knn.predict(X_test_normalized)

# Step 8: Compute the weighted F1 score
f1 = f1_score(y_test, y_pred, average='weighted')

# Step 9: Print the weighted F1 score
print(f"Weighted F1 score for k={k}: {f1:.4f}")

Weighted F1 score for k=3: 0.9671


In [6]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDRegressor
from sklearn.impute import SimpleImputer
import numpy as np

# Define column names
columns = ['OSM_ID', 'LONGITUDE', 'LATITUDE', 'ALTITUDE']

# Initialize scaler and regressor
scaler = StandardScaler()
sgd_regressor = SGDRegressor(random_state=10)

# Initialize variables for tracking results
num_samples = 0
iteration = 7
intercept_after_iteration = None
longitude_coefficient_after_iteration = None

# Step 1: Load the dataset in chunks
file_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00246/3D_spatial_network.txt"
chunk_size = 20000
iterator = pd.read_csv(file_url, sep=" ", header=None, names=columns, iterator=True, chunksize=chunk_size)

# Create an imputer to handle missing values before scaling
imputer = SimpleImputer(strategy='mean')

for chunk in iterator:
    # Step 2: Extract features and target (ALTITUDE is the target)
    X_chunk = chunk[['LONGITUDE', 'LATITUDE']].values
    y_chunk = chunk['ALTITUDE'].values

    # Check for any missing values in the features
    if np.any(np.isnan(X_chunk)) or np.any(np.isnan(y_chunk)):
        print("Missing values detected in the chunk")

    # Step 3: Handle missing values by imputing them
    X_chunk_imputed = imputer.fit_transform(X_chunk)  # Impute missing values
    y_chunk_imputed = imputer.fit_transform(y_chunk.reshape(-1, 1)).flatten()  # Impute missing values in target

    # Check if there are any columns left in X_chunk after imputation
    if X_chunk_imputed.shape[1] == 0:
        print(f"Warning: All feature columns are missing in the chunk, skipping this chunk.")
        continue  # Skip this chunk if no valid feature columns are left

    # Update the total number of samples
    num_samples += len(chunk)

    # Step 4: Scale the features incrementally using partial_fit
    scaler.partial_fit(X_chunk_imputed)
    X_scaled_chunk = scaler.transform(X_chunk_imputed)

    # Step 5: Fit the model incrementally using partial_fit
    sgd_regressor.partial_fit(X_scaled_chunk, y_chunk_imputed)

    # Check if we've reached the 7th iteration
    if sgd_regressor.n_iter_ == iteration:
        intercept_after_iteration = sgd_regressor.intercept_
        longitude_coefficient_after_iteration = sgd_regressor.coef_[0]  # Coefficient for longitude

# Output the results
print(f"Total number of samples in the dataset: {num_samples}")
print(f"Intercept after 7th iteration: {intercept_after_iteration}")
print(f"Longitude coefficient after 7th iteration: {longitude_coefficient_after_iteration}")


Missing values detected in the chunk
Missing values detected in the chunk
Missing values detected in the chunk
Missing values detected in the chunk




Missing values detected in the chunk
Missing values detected in the chunk
Missing values detected in the chunk




Missing values detected in the chunk
Missing values detected in the chunk




Missing values detected in the chunk
Missing values detected in the chunk
Missing values detected in the chunk
Missing values detected in the chunk
Missing values detected in the chunk




Missing values detected in the chunk
Missing values detected in the chunk
Missing values detected in the chunk
Missing values detected in the chunk
Missing values detected in the chunk
Missing values detected in the chunk
Missing values detected in the chunk
Missing values detected in the chunk
Total number of samples in the dataset: 0
Intercept after 7th iteration: None
Longitude coefficient after 7th iteration: None


