In [8]:
import numpy as np
import pandas as pd

# Load the dataset
df = pd.read_csv(r'C:\Users\abhis\Desktop\Aspire_assessment\numpy_assessment\Iris.csv')

# Convert to NumPy array
data = df.values

# Explore data types and shapes
print(f'Data Types:\n{df.dtypes}')
print(f'Shape of the array: {data.shape}')


Data Types:
Id                 int64
SepalLengthCm    float64
SepalWidthCm     float64
PetalLengthCm    float64
PetalWidthCm     float64
Species           object
dtype: object
Shape of the array: (150, 6)


2. Perform broadcasting operations to standardize the numerical features

In [9]:
# Separate numerical features and target labels
features = data[:, 1:-1].astype(float)
labels = data[:, -1]

# Compute mean and standard deviation
means = np.mean(features, axis=0)
stds = np.std(features, axis=0)

# Apply z-score normalization
standardized_features = (features - means) / stds
print('Standardized Features:\n', standardized_features)


Standardized Features:
 [[-9.00681170e-01  1.03205722e+00 -1.34127240e+00 -1.31297673e+00]
 [-1.14301691e+00 -1.24957601e-01 -1.34127240e+00 -1.31297673e+00]
 [-1.38535265e+00  3.37848329e-01 -1.39813811e+00 -1.31297673e+00]
 [-1.50652052e+00  1.06445364e-01 -1.28440670e+00 -1.31297673e+00]
 [-1.02184904e+00  1.26346019e+00 -1.34127240e+00 -1.31297673e+00]
 [-5.37177559e-01  1.95766909e+00 -1.17067529e+00 -1.05003079e+00]
 [-1.50652052e+00  8.00654259e-01 -1.34127240e+00 -1.18150376e+00]
 [-1.02184904e+00  8.00654259e-01 -1.28440670e+00 -1.31297673e+00]
 [-1.74885626e+00 -3.56360566e-01 -1.34127240e+00 -1.31297673e+00]
 [-1.14301691e+00  1.06445364e-01 -1.28440670e+00 -1.44444970e+00]
 [-5.37177559e-01  1.49486315e+00 -1.28440670e+00 -1.31297673e+00]
 [-1.26418478e+00  8.00654259e-01 -1.22754100e+00 -1.31297673e+00]
 [-1.26418478e+00 -1.24957601e-01 -1.34127240e+00 -1.44444970e+00]
 [-1.87002413e+00 -1.24957601e-01 -1.51186952e+00 -1.44444970e+00]
 [-5.25060772e-02  2.18907205e+00 -1.4

3. Compute the correlation matrix for the numerical features

In [10]:
# Compute the correlation matrix
correlation_matrix = np.corrcoef(standardized_features, rowvar=False)
print('Correlation Matrix:\n', correlation_matrix)


Correlation Matrix:
 [[ 1.         -0.10936925  0.87175416  0.81795363]
 [-0.10936925  1.         -0.4205161  -0.35654409]
 [ 0.87175416 -0.4205161   1.          0.9627571 ]
 [ 0.81795363 -0.35654409  0.9627571   1.        ]]


4. Implement k-Nearest Neighbors (k-NN) algorithm

In [11]:
from collections import Counter

def euclidean_distance(a, b):
    return np.sqrt(np.sum((a - b) ** 2, axis=1))

def k_nearest_neighbors(train_data, train_labels, test_data, k=3):
    predictions = []
    for test_point in test_data:
        distances = euclidean_distance(train_data, test_point)
        k_indices = np.argsort(distances)[:k]
        k_nearest_labels = train_labels[k_indices]
        common_label = Counter(k_nearest_labels).most_common(1)[0][0]
        predictions.append(common_label)
    return np.array(predictions)

# Evaluate the model (using holdout validation for simplicity)
np.random.seed(0)
indices = np.random.permutation(len(standardized_features))
split_ratio = 0.8
split_index = int(len(standardized_features) * split_ratio)
train_indices, test_indices = indices[:split_index], indices[split_index:]
train_features, test_features = standardized_features[train_indices], standardized_features[test_indices]
train_labels, test_labels = labels[train_indices], labels[test_indices]

# Make predictions
k = 5
predictions = k_nearest_neighbors(train_features, train_labels, test_features, k)

# Compute accuracy
accuracy = np.mean(predictions == test_labels)
print(f'k-NN Accuracy: {accuracy:.2f}')


k-NN Accuracy: 0.90


5. Additional numerical operations or data manipulations

In [12]:
# Example: Adding polynomial features (degree 2)
poly_features = np.hstack((standardized_features, standardized_features ** 2))
print('Polynomial Features (degree 2):\n', poly_features)

# Any additional manipulations can be done similarly


Polynomial Features (degree 2):
 [[-0.90068117  1.03205722 -1.3412724  ...  1.06514211  1.79901166
   1.72390789]
 [-1.14301691 -0.1249576  -1.3412724  ...  0.0156144   1.79901166
   1.72390789]
 [-1.38535265  0.33784833 -1.39813811 ...  0.11414149  1.95479017
   1.72390789]
 ...
 [ 0.79566902 -0.1249576   0.81962435 ...  0.0156144   0.67178407
   1.10993965]
 [ 0.4321654   0.80065426  0.93335575 ...  0.64104724  0.87115297
   2.09657554]
 [ 0.06866179 -0.1249576   0.76275864 ...  0.0156144   0.58180075
   0.6250338 ]]
