## Python Classes

In [2]:
class VehicleClass():
    
    def __init__(self, horsepower): #constructor
        "This is the 'init' method" 
        # this is a class attribute:
        self.horsepower = horsepower
        
    def horsepower_to_torque(self, rpm):
        "This is a regular method"
        torque = self.horsepower * rpm / 5252
        return torque
    
    def tune_motor(self):
        self.horsepower *= 2
    
    def _private_method(self): #special method
        print('this is private')
    
    def __very_private_method(self): #special method
        print('this is very private')

In [3]:
# instantiate an object:
car1 = VehicleClass(horsepower=123)
print(car1.horsepower)

123


In [4]:
car1.horsepower_to_torque(rpm=5000)

117.0982482863671

In [5]:
car1.tune_motor()
car1.horsepower_to_torque(rpm=5000)

234.1964965727342

In [6]:
car1._private_method()

this is private


In [8]:
# Excecuting the following would raise an error:
car1.__very_private_method()

AttributeError: 'VehicleClass' object has no attribute '__very_private_method'

In [9]:
# If we use "name mangling" we can access this private method:
car1._VehicleClass__very_private_method()

this is very private


## Inheritance

In [10]:
class CarClass(VehicleClass):

    def __init__(self, horsepower):
        super().__init__(horsepower)
        self.num_wheels = 4
    
new_car = CarClass(horsepower=123)
print('Number of wheels:', new_car.num_wheels)
print('Horsepower:', new_car.horsepower)
new_car.tune_motor()
print('Horsepower:', new_car.horsepower)

Number of wheels: 4
Horsepower: 123
Horsepower: 246


## K-Nearest Neighbors Implementation

- Below is a very simple implementation of a K-nearest Neighbor classifier.
- This is a very slow and inefficient implementation, and in real-world problems, it is always recommended to use established libraries (like scikit-learn) instead of implementing algorithms from scratch.
- A scenario where it is useful to implement algorithms from scratch is for learning and teaching purposes, or if we want to try out new algorithms, hence, the implementation below, which gently introduces how things are implemented in scikit-learn.

In [11]:
class KNNClassifier(object):
    def __init__(self, k, dist_fn=None):
        self.k = k
        if dist_fn is None:
            self.dist_fn = self._euclidean_dist
    
    def _euclidean_dist(self, a, b):
        dist = 0.
        for ele_i, ele_j in zip(a, b):
            dist += ((ele_i - ele_j)**2)
        dist = dist**0.5
        return dist
        
    def _find_nearest(self, x):
        dist_idx_pairs = []
        for j in range(self.dataset_.shape[0]):
            d = self.dist_fn(x, self.dataset_[j])
            dist_idx_pairs.append((d, j))
            
        sorted_dist_idx_pairs = sorted(dist_idx_pairs)

        return sorted_dist_idx_pairs
    
    def fit(self, X, y):
        self.dataset_ = X.copy()
        self.labels_ = y.copy()
        self.possible_labels_ = np.unique(y)

    def predict(self, X):
        predictions = np.zeros(X.shape[0], dtype=int)
        for i in range(X.shape[0]):
            k_nearest = self._find_nearest(X[i])[:self.k]
            indices = [entry[1] for entry in k_nearest]
            k_labels = self.labels_[indices]
            counts = np.bincount(k_labels,
                                 minlength=self.possible_labels_.shape[0])
            pred_label = np.argmax(counts)
            predictions[i] = pred_label
        return predictions

In [18]:
import pandas as pd
import numpy as np

df = pd.read_csv("data/iris.csv")
df

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Virginica
146,6.3,2.5,5.0,1.9,Virginica
147,6.5,3.0,5.2,2.0,Virginica
148,6.2,3.4,5.4,2.3,Virginica


In [19]:
d = {'Setosa': 0,
     'Versicolor': 1,
     'virginica': 2}
df['variety'] = df['variety'].map(d)

X = df.iloc[:, 1:5].values
y = df['variety'].values

indices = np.arange(X.shape[0])
rng = np.random.RandomState(123)
permuted_indices = rng.permutation(indices)

train_size, valid_size = int(0.65*X.shape[0]), int(0.15*X.shape[0])
test_size = X.shape[0] - (train_size + valid_size)
train_ind = permuted_indices[:train_size]
valid_ind = permuted_indices[train_size:(train_size + valid_size)]
test_ind = permuted_indices[(train_size + valid_size):]
X_train, y_train = X[train_ind], y[train_ind]
X_valid, y_valid = X[valid_ind], y[valid_ind]
X_test, y_test = X[test_ind], y[test_ind]

print(f'X_train.shape: {X_train.shape}')
print(f'X_valid.shape: {X_valid.shape}')
print(f'X_test.shape: {X_test.shape}')

X_train.shape: (97, 4)
X_valid.shape: (22, 4)
X_test.shape: (31, 4)
