In [42]:
#@title import library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from abc import ABC,abstractmethod
from typing import Dict, Any
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [43]:
#@title import data train

df_train = pd.read_csv('/content/drive/MyDrive/up mlp uas/trainingA.csv', names=(['wrist_x', 'wrist_y', 'wrist_z', 'thigh_x', 'thigh_y', 'thigh_z', 'class']))
df_train.head()

Unnamed: 0,wrist_x,wrist_y,wrist_z,thigh_x,thigh_y,thigh_z,class
0,-0.265625,-1.5625,-0.15625,-1.390625,-0.390625,-0.078125,downstairs
1,-0.25,-1.4375,-0.171875,-1.453125,0.0625,-0.59375,downstairs
2,-0.171875,-0.890625,-0.015625,-1.0625,-0.5,-0.28125,downstairs
3,-1.484375,1.140625,-0.0625,0.234375,0.75,-0.390625,jogging
4,-0.671875,1.015625,0.109375,-3.328125,0.484375,0.75,jogging


In [44]:
classa = df_train['class'].value_counts().index.tolist()
print(classa)

['jogging', 'upstairs', 'downstairs']


In [45]:
#@title ubah kolom class

def change_class(x):
    if 'jogging' in x:
        return 0
    if 'upstairs' in x:
        return 1
    else:
        return 2

df_train['class'] = df_train['class'].map(lambda x: change_class(x))

df = df_train.drop_duplicates()

df.head()

Unnamed: 0,wrist_x,wrist_y,wrist_z,thigh_x,thigh_y,thigh_z,class
0,-0.265625,-1.5625,-0.15625,-1.390625,-0.390625,-0.078125,2
1,-0.25,-1.4375,-0.171875,-1.453125,0.0625,-0.59375,2
2,-0.171875,-0.890625,-0.015625,-1.0625,-0.5,-0.28125,2
3,-1.484375,1.140625,-0.0625,0.234375,0.75,-0.390625,0
4,-0.671875,1.015625,0.109375,-3.328125,0.484375,0.75,0


In [46]:
#@title Generating kNN Class
class KNN(ABC):
    """
    Base class for KNN implementations
    """
    def __init__(self, K : int = 3, metric : str = 'minkowski', p : int = 2) -> None:
        """
        Initializer function. Ensure that input parameters are compatiable.
        Inputs:
        K -> integer specifying number of neighbours to consider
        metric -> string to indicate the distance metric to use (valid
        entries are 'minkowski' or 'cosine')
        p -> order of the minkowski metric (valid only when distance ==
        'minkowski')
        """
        # check distance is a valid entry
        valid_distance = ['minkowski','cosine']
        if metric not in valid_distance:
            msg = "Entered value for metric is not valid. Pick one of {}".format(valid_distance)
            raise ValueError(msg)

        # check minkowski p parameter
        if (metric == 'minkowski') and (p <= 0):
            msg = "Entered value for p is not valid. For metric = 'minkowski', p >= 1"
            raise ValueError(msg)

        # store/initialise input parameters
        self.K = K
        self.metric = metric
        self.p = p
        self.X_train = np.array([])
        self.y_train = np.array([])

    def __del__(self) -> None:
        """
        Destructor function.
        """
        del self.K
        del self.metric
        del self.p
        del self.X_train
        del self.y_train

    def __minkowski(self, x : np.array) -> np.array:
        """
        Private function to compute the minkowski distance between point x and
        the training data X
        Inputs:
        x -> numpy data point of predictors to consider
        Outputs:
        np.array -> numpy array of the computed distances
        """
        return np.power(np.sum(np.power(np.abs(self.X_train - x),self.p),axis=1),1/self.p)

    def __cosine(self, x : np.array) -> np.array:
        """
        Private function to compute the cosine distance between point x and the
        training data X
        Inputs:
        x -> numpy data point of predictors to consider
        Outputs:
        np.array -> numpy array of the computed distances
        """
        return (1 - (np.dot(self.X_train,x)/(np.linalg.norm(x)*np.linalg.norm(self.X_train,axis=1))))

    def __distances(self, X : np.array) -> np.array:
        """
        Private function to compute distances to each point x in X[x,:]
        Inputs:
        X -> numpy array of points [x]
        Outputs:
        D -> numpy array containing distances from x to all points in the
        training set.
        """
        # cover distance calculation
        if self.metric == 'minkowski':
            D = np.apply_along_axis(self.__minkowski,1,X)
        elif self.metric == 'cosine':
            D = np.apply_along_axis(self.__cosine,1,X)
        # return computed distances
        return D

    @abstractmethod
    def _generate_predictions(self, idx_neighbours : np.array) -> np.array:
        """
        Protected function to compute predictions from the K nearest neighbours
        """
        pass
    def fit(self, X : np.array, y : np.array) -> None:
        """
        Public training function for the class. It is assummed input X has been normalised.
        Inputs:
        X -> numpy array containing the predictor features
        y -> numpy array containing the labels associated with each value in X
        """
        # store training data
        self.X_train = np.copy(X)
        self.y_train = np.copy(y)

    def predict(self, X : np.array) -> np.array:
        """
        Public prediction function for the class.
        It is assummed input X has been normalised in the same fashion as the input to the training function
        Inputs:
        X -> numpy array containing the predictor features
        Outputs:
        y_pred -> numpy array containing the predicted labels
        """
        # ensure we have already trained the instance
        if (self.X_train.size == 0) or (self.y_train.size == 0):
            raise Exception('Model is not trained. Call fit before calling predict.')
        # compute distances
        D = self.__distances(X)
        # obtain indices for the K nearest neighbours
        idx_neighbours = D.argsort()[:,:self.K]
        # compute predictions
        y_pred = self._generate_predictions(idx_neighbours)
        # return results
        return y_pred

    def get_params(self, deep : bool = False) -> Dict:
        """
        Public function to return model parameters
        Inputs:
        deep -> boolean input parameter
        Outputs:
        Dict -> dictionary of stored class input parameters
        """
        return {'K':self.K,
                'metric':self.metric,
                'p':self.p}

In [47]:
#@title Generating kNN Classifier
class KNNClassifier(KNN):
      """
      Class for KNN classifiction implementation
      """
      def __init__(self, K : int = 3, metric : str = 'minkowski', p : int = 2) -> None:
          """
          Initializer function. Ensure that input parameters are compatiable.
          Inputs:
          K -> integer specifying number of neighbours to consider metric -> string to indicate the distance metric to use (valid entries are 'minkowski' or 'cosine')
          p -> order of the minkowski metric (valid only when distance == 'minkowski')
          """
          # call base class initialiser
          super().__init__(K,metric,p)

      def _generate_predictions(self, idx_neighbours : np.array) -> np.array:
          """
          Protected function to compute predictions from the K nearest neighbours
          Inputs:
          idx_neighbours -> indices of nearest neighbours
          Outputs:
          y_pred -> numpy array of prediction results
          """
          # compute the mode label for each submitted sample
          y_pred = stats.mode(self.y_train[idx_neighbours],axis=1).mode.flatten()
          # return result
          return y_pred

In [48]:
X = df.drop('class', axis=1)
y = df['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify=y, random_state=42)

In [49]:
knn = KNNClassifier(K=3)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
y_pred

array([1, 1, 1, ..., 1, 2, 1])

In [50]:
akurasi = sum(y_test == y_pred)/len(y_test)
akurasi

0.8758389261744967

In [51]:
#@title import data test

df_test = pd.read_csv('/content/drive/MyDrive/up mlp uas/SoalA.csv', delimiter=',', names=(['wrist_x', 'wrist_y', 'wrist_z', 'thigh_x', 'thigh_y', 'thigh_z']))
df_test.head()

Unnamed: 0,wrist_x,wrist_y,wrist_z,thigh_x,thigh_y,thigh_z
0,-0.3125,-1.09375,0.0,-0.828125,-0.265625,-0.078125
1,0.015625,-1.046875,0.03125,-1.484375,-0.390625,-0.359375
2,-0.21875,-1.03125,0.0625,-1.125,-0.953125,-0.46875
3,0.3125,-0.09375,0.78125,-1.265625,-0.53125,-1.3125
4,-1.625,0.234375,-0.0625,-0.5625,-0.71875,-0.078125


In [52]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   wrist_x  100 non-null    float64
 1   wrist_y  100 non-null    float64
 2   wrist_z  100 non-null    float64
 3   thigh_x  100 non-null    float64
 4   thigh_y  100 non-null    float64
 5   thigh_z  100 non-null    float64
dtypes: float64(6)
memory usage: 4.8 KB


In [53]:
X_test = df_test
y_pred = np.array(knn.predict(X_test))
y_pred

array([1, 2, 2, 2, 0, 1, 0, 1, 0, 1, 0, 2, 0, 2, 0, 0, 0, 2, 0, 1, 0, 2,
       1, 0, 0, 2, 1, 1, 0, 0, 1, 1, 0, 1, 2, 0, 1, 1, 1, 0, 0, 2, 0, 0,
       2, 1, 0, 0, 2, 2, 0, 1, 2, 1, 1, 0, 0, 0, 2, 1, 0, 1, 0, 1, 2, 2,
       1, 2, 1, 2, 0, 1, 0, 1, 1, 0, 0, 1, 0, 2, 0, 0, 1, 1, 0, 2, 0, 2,
       2, 1, 0, 0, 2, 1, 1, 2, 0, 1, 2, 2])

In [60]:
ID = list(range(1,101))

In [62]:
label_mapping = {
    0: 'jogging',
    1: 'upstairs',
    2: 'downstairs'
}

df = pd.DataFrame({'ID': ID, 'label': [label_mapping[label] for label in y_pred]})
df.to_csv('FarahAuliaSahirah_dataprediksibaru.csv', index=False)

In [64]:
df

Unnamed: 0,ID,label
0,1,upstairs
1,2,downstairs
2,3,downstairs
3,4,downstairs
4,5,jogging
...,...,...
95,96,downstairs
96,97,jogging
97,98,upstairs
98,99,downstairs
