# ProgressLines

In [1]:
class ProgressLines():
    
    def progress_lines(self, num, total, description, unit, colour):
        desc = self.set_strings_to_equal_len_(description)
        manager = enlighten.get_manager()
        progresses = []
        for i in range(num):
            prog = manager.counter(total=total[i], desc=desc[i], unit=unit[i], color=colour[i])
            prog.refresh()
            progresses.append(prog)
        self.progresses = progresses
    
    
    def set_strings_to_equal_len_(self, description):
        max_len = 0
        longest_string_length = len(max(description, key=len))
        w = []
        for i, word in enumerate(description):
            temp = longest_string_length - len(word)
            w.append(word + " " * temp)
        return w

# TimeseriesPredictionOfClassLabel

In [2]:
class TimeseriesPredictionOfClassLabel():

    def __init__(self, df_main, empty_unique_class_predictions, knn_model_params, threshold):
        self.gdf = df_main.groupby("class_id")
        self.unique_class_predictions = empty_unique_class_predictions
        self.threshold = threshold # [0.5, 0.55, ...]
        self.knn_model_params = knn_model_params
        
        self.time_to_train = None

        self.accepted_num = {"isctss": 0}
        for th in threshold:
            self.accepted_num[th] = 0
    

    # ************************** helper ************************** #
    
    def prediction_column_name(self, th):
        return "predictions_" + str(th)


    # ************************** progress_lines ************************** #
    def make_progress_lines(self):
        # --- ProgressLines
        self.pl = ProgressLines()
        self.pl.progress_lines(1, [len(self.unique_class_predictions)], 
                               ["time-series-prediction"], ["sample"], ["blue"])


    # ************************** prediction ************************** #
    def make_predictions(self):
        # --- make prediction
        self.gdf.apply(lambda g: self.flow_(g))
        

        # --- time to train
        totalSeconds = self.pl.progresses[0].elapsed
        hours = int(totalSeconds / 3600)
        minutes = int((totalSeconds - (hours * 3600)) / 60)
        seconds = int((totalSeconds - (hours * 3600 + minutes * 60)))
        self.time_to_train = f"{hours}h:{minutes}m:{seconds}s"
    

    def flow_(self, g):

        # --- extract class time series.
        sorted_df_cts = g.sort_values(by="time_stamp", ascending=True)
        cts = sorted_df_cts["label"].to_numpy()
        
        # --- If cts is stationary, then make prediction & 
        # If accuracy pass the threshold :
        # save the prediction
        # --- Otherwise, label the class as unpredictable
        is_cts_stationary = self.testSeries_(cts)
        if is_cts_stationary:
            self.accepted_num["isctss"] += 1
            accuracy, yhat = self.predictValue_(cts)
            for th in self.threshold:
                if accuracy > th:
                    self.accepted_num[th] += 1
                    tempColumnName = self.prediction_column_name(th)
                    self.unique_class_predictions.loc[self.unique_class_predictions["class_id"] == g.name, 
                                                     tempColumnName] = int(yhat)
        
        self.pl.progresses[0].update()
    

    def testSeries_(self, cts):
        # because of query_length in predictValue_ function
        if len(cts) <= 10:
            return False
        return True
    
    
    def predictValue_(self, cts):
        max_accuracy = -1
        best_yhat = -1
        for query_length in range(3, 10):
            knnX, knnY = self.knnDS_(cts, query_length)
            query = np.array(cts[-query_length:])
            accuracy, yhat = self.knnPredict_(knnX, knnY, query)
            if accuracy > max_accuracy:
                max_accuracy = accuracy
                best_yhat = yhat
        return (max_accuracy, best_yhat)
    
    
    def knnDS_(self, cts, ql):
        knnX = [cts[i: i + ql] for i in range(len(cts) - ql)] # [[0, -1, 1, 1, ...], [1, 0, 0, 0, ...]]
        knnY = [cts[i + ql]    for i in range(len(cts) - ql)] # [0, 1, ...]
        return knnX, knnY
    
    
    # x: [[0, -1, 1, 1, ...], [1, 0, 0, 0, ...]]
    # y: [0, 1, ...]
    def knnPredict_(self, x, y, query):
        # --- prepare train and test data
        size = int(len(x) * self.knn_model_params["train_size"])
        xtrain, xtest = x[0: size], x[size:]
        ytrain, ytest = y[0: size], y[size:]
        
        if len(xtrain) < 6:
            return 0, 0
        
        # --- train model
        knn = KNeighborsClassifier(n_neighbors=self.knn_model_params["n_neighbors"])
        knn.fit(xtrain, ytrain)
        
        # --- test the model
        accuracy = knn.score(xtest, ytest)
        
        # --- make prediction
        yhat = 0
        query = query.reshape(1, -1)
        yhat = knn.predict(query)

        return accuracy, yhat
    
    
    def free_memory(self):
        self.df_timeserAll = None