------------------------------- Important library used in our code -------------------------------

In [1]:
import numpy as np
from scipy import stats
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score
from tabulate import tabulate

------------------------------- Utility Function that are used in our code -------------------------------

In [2]:
# funtion that load the data set into our program
def loadData(data_set = None):

    if (data_set == None):
        return None
    else:
        data = []
        fptr = open(data_set, "r")
        lines = fptr.readlines()
        for line in lines:
            line = line.strip().split()
            for i in range(len(line)):
                line[i] = float(line[i])
            # print(line)
            data.append(line)
        
        return np.array(data)

In [3]:
# function that make the new data set based on statistical value
def makeNewDataSet(data_set=None):

    new_data_set = []

    for i in range(0, len(data_set), 32):
        
        temp = []
        x = data_set[i: i + 32, 0]
        y = data_set[i: i + 32, 1]
        z = data_set[i: i + 32, 2]
        op = data_set[i][3]
        # y = data_set[i][-1]

        # adding mean to data set
        temp.append(np.mean(x))
        temp.append(np.mean(y))
        temp.append(np.mean(z))

        # adding median to the data set
        temp.append(np.median(x))
        temp.append(np.median(y))
        temp.append(np.median(z))

        # adding mode to the data set
        temp.append(stats.mode(x, keepdims=True)[0][0])
        temp.append(stats.mode(y, keepdims=True)[0][0])
        temp.append(stats.mode(z, keepdims=True)[0][0])

        # adding the standard deviation to the data set
        temp.append(np.std(x))
        temp.append(np.std(y))
        temp.append(np.std(z))

        # adding the quantile @25 in the data set
        iq_1 = np.quantile(x, .25)
        iq_2 = np.quantile(y, .25)
        iq_3 = np.quantile(z, .25)
        temp.append(iq_1)
        temp.append(iq_2)
        temp.append(iq_3)

        # adding the quantile @50 in the data set
        temp.append(np.quantile(x, .5))
        temp.append(np.quantile(y, .5))
        temp.append(np.quantile(z, .5))

        # adding the quantile @75 in the data set
        iq_1_ = np.quantile(x, .75)
        iq_2_ = np.quantile(y, .75)
        iq_3_ = np.quantile(z, .75)
        temp.append(iq_1_)
        temp.append(iq_2_)
        temp.append(iq_3_)

        # adding inter quantile in the data set
        temp.append(iq_1_ - iq_1)
        temp.append(iq_2_ - iq_2)
        temp.append(iq_3_ - iq_3)

        # adding the output 
        temp.append(op)

        new_data_set.append(temp)
        # print(temp, op)
    
    return np.array(new_data_set)


In [4]:
# Finding number of left, right turn in the data
def countTurns(data):
    count_l = 0
    count_r = 0
    no_turn = 0

    for i in range(0, len(data)):

        if data[i][-1] == -1:
            count_l += 1
        
        elif data[i][-1] == 1:
            count_r += 1
        
        else:
            no_turn += 1
    
    return {count_l, count_r, no_turn}

In [5]:
# function that run the model at that size
def runModelAtSize(split_ratio, model_name, X, y):
    
    output = [round(split_ratio, 1), model_name]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=round(split_ratio, 1), random_state=42)

    if model_name == "Descison tree":
        model = DecisionTreeClassifier()
        model.fit(X_train, y_train)

        # Train Accuracy
        y_pred = model.predict(X_train)
        output.append(accuracy_score(y_pred, y_train)*100)

        # Test Accuracy
        y_pred = model.predict(X_test)
        output.append(accuracy_score(y_pred, y_test)*100)
    
    elif model_name == "Random Forest":
        model = RandomForestClassifier()
        model.fit(X_train, y_train)

        # Train Accuracy
        y_pred = model.predict(X_train)
        output.append(accuracy_score(y_pred, y_train)*100)

        # Test Accuracy
        y_pred = model.predict(X_test)
        output.append(accuracy_score(y_pred, y_test)*100)
    
    elif model_name == "KNN":
        model = KNeighborsClassifier()
        model.fit(X_train, y_train)

        # Train Accuracy
        y_pred = model.predict(X_train)
        output.append(accuracy_score(y_pred, y_train)*100)

        # Test Accuracy
        y_pred = model.predict(X_test)
        output.append(accuracy_score(y_pred, y_test)*100)

    elif model_name == "AdaBoost":
        model = AdaBoostClassifier()
        model.fit(X_train, y_train)

        # Train Accuracy
        y_pred = model.predict(X_train)
        output.append(accuracy_score(y_pred, y_train)*100)

        # Test Accuracy
        y_pred = model.predict(X_test)
        output.append(accuracy_score(y_pred, y_test)*100)
    
    elif model_name == "Gaussian NB":
        model = GaussianNB()
        model.fit(X_train, y_train)

        # Train Accuracy
        y_pred = model.predict(X_train)
        output.append(accuracy_score(y_pred, y_train)*100)

        # Test Accuracy
        y_pred = model.predict(X_test)
        output.append(accuracy_score(y_pred, y_test)*100)
    
    elif model_name == "SVM":
        model = SVC()
        model.fit(X_train, y_train)

        # Train Accuracy
        y_pred = model.predict(X_train)
        output.append(accuracy_score(y_pred, y_train)*100)

        # Test Accuracy
        y_pred = model.predict(X_test)
        output.append(accuracy_score(y_pred, y_test)*100)
    
    elif model_name == "Gradient Boosting":
        model = GradientBoostingClassifier()
        model.fit(X_train, y_train)

        # Train Accuracy
        y_pred = model.predict(X_train)
        output.append(accuracy_score(y_pred, y_train)*100)

        # Test Accuracy
        y_pred = model.predict(X_test)
        output.append(accuracy_score(y_pred, y_test)*100)
    
    elif model_name == "MLP":
        model = MLPClassifier()
        model.fit(X_train, y_train)

        # Train Accuracy
        y_pred = model.predict(X_train)
        output.append(accuracy_score(y_pred, y_train)*100)

        # Test Accuracy
        y_pred = model.predict(X_test)
        output.append(accuracy_score(y_pred, y_test)*100)
    
    
    return output

In [21]:
# finction that run all the models at differnt size
def runAllModels(data_set, file_name):
    X = data_set[:, :-1]
    y = data_set[:, -1]

    outputs = [["Split ratio", "Model", "Train Accuracy", "Test Accuracy"]]
    
    # Run model 1: Descision tree
    split_ratio = 0.2
    for i in range(5):
        outputs.append(runModelAtSize(split_ratio, "Descison tree", X, y))
        outputs.append(runModelAtSize(split_ratio, "Random Forest", X, y))
        outputs.append(runModelAtSize(split_ratio, "KNN", X, y))
        outputs.append(runModelAtSize(split_ratio, "AdaBoost", X, y))
        outputs.append(runModelAtSize(split_ratio, "Gaussian NB", X, y))
        outputs.append(runModelAtSize(split_ratio, "SVM", X, y))
        outputs.append(runModelAtSize(split_ratio, "Gradient Boosting", X, y))
        outputs.append(runModelAtSize(split_ratio, "MLP", X, y))


        outputs.append(["-------------"] * len(outputs[0]))
        split_ratio += 0.1

    out = tabulate(outputs, headers="firstrow")
    fptr = open(file_name, "w")
    fptr.write(out)
    fptr.close()


In [19]:
# function that find Individual Accuracy for new left, right, no turn on split ration 0.2
def individualAccuracyNew(data_set, file_name):
    
    X = data_set[:, :-1]
    y = data_set[:, -1]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    X_left_train = []
    y_left_train = []

    X_right_train = []
    y_right_train = []

    X_no_train = []
    y_no_train = []

    X_left_test = []
    y_left_test = []

    X_right_test = []
    y_right_test = []

    X_no_test = []
    y_no_test = []

    for i in range(len(X_train)):

        if (y_train[i] == 0):
            X_no_train.append(X_train[i])
            y_no_train.append(0)
        
        elif (y_train[i] == 1):
            X_right_train.append(X_train[i])
            y_right_train.append(1)
        
        else:
            X_left_train.append(X_train[i])
            y_left_train.append(-1)
    
    for i in range(len(X_test)):

        if (y_train[i] == 0):
            X_no_test.append(X_test[i])
            y_no_test.append(0)
        
        elif (y_train[i] == 1):
            X_right_test.append(X_test[i])
            y_right_test.append(1)
        
        else:
            X_left_test.append(X_test[i])
            y_left_test.append(-1)
    
    outputs = [["Model", "Left Train Accuracy", "Right Train Accuracy", "No Turn Accuracy", "Left Test Accuracy", "Right Test Accuracy", "No Test Accuracy"]]

    #--------------------------------------------------------------------------
    
    model = DecisionTreeClassifier()
    model.fit(X_train, y_train)

    output = ["Descison tree"]
    
    y_pred = model.predict(X_left_train)
    output.append(accuracy_score(y_pred, y_left_train)*100)
    y_pred = model.predict(X_right_train)
    output.append(accuracy_score(y_pred, y_right_train)*100)
    y_pred = model.predict(X_no_train)
    output.append(accuracy_score(y_pred, y_no_train)*100)

    y_pred = model.predict(X_left_test)
    output.append(accuracy_score(y_pred, y_left_test)*100)
    y_pred = model.predict(X_right_test)
    output.append(accuracy_score(y_pred, y_right_test)*100)
    y_pred = model.predict(X_no_test)
    output.append(accuracy_score(y_pred, y_no_test)*100)

    outputs.append(output)
   
    #--------------------------------------------------------------------------

    model = RandomForestClassifier()
    model.fit(X_train, y_train)

    output = ["Random Forest"]
    
    y_pred = model.predict(X_left_train)
    output.append(accuracy_score(y_pred, y_left_train)*100)
    y_pred = model.predict(X_right_train)
    output.append(accuracy_score(y_pred, y_right_train)*100)
    y_pred = model.predict(X_no_train)
    output.append(accuracy_score(y_pred, y_no_train)*100)

    y_pred = model.predict(X_left_test)
    output.append(accuracy_score(y_pred, y_left_test)*100)
    y_pred = model.predict(X_right_test)
    output.append(accuracy_score(y_pred, y_right_test)*100)
    y_pred = model.predict(X_no_test)
    output.append(accuracy_score(y_pred, y_no_test)*100)

    outputs.append(output)

    #--------------------------------------------------------------------------

    model = KNeighborsClassifier()
    model.fit(X_train, y_train)

    output = ["KNN"]
    
    y_pred = model.predict(X_left_train)
    output.append(accuracy_score(y_pred, y_left_train)*100)
    y_pred = model.predict(X_right_train)
    output.append(accuracy_score(y_pred, y_right_train)*100)
    y_pred = model.predict(X_no_train)
    output.append(accuracy_score(y_pred, y_no_train)*100)

    y_pred = model.predict(X_left_test)
    output.append(accuracy_score(y_pred, y_left_test)*100)
    y_pred = model.predict(X_right_test)
    output.append(accuracy_score(y_pred, y_right_test)*100)
    y_pred = model.predict(X_no_test)
    output.append(accuracy_score(y_pred, y_no_test)*100)

    outputs.append(output)

    #--------------------------------------------------------------------------
    
    model = AdaBoostClassifier()
    model.fit(X_train, y_train)

    output = ["AdaBoost"]
    
    y_pred = model.predict(X_left_train)
    output.append(accuracy_score(y_pred, y_left_train)*100)
    y_pred = model.predict(X_right_train)
    output.append(accuracy_score(y_pred, y_right_train)*100)
    y_pred = model.predict(X_no_train)
    output.append(accuracy_score(y_pred, y_no_train)*100)

    y_pred = model.predict(X_left_test)
    output.append(accuracy_score(y_pred, y_left_test)*100)
    y_pred = model.predict(X_right_test)
    output.append(accuracy_score(y_pred, y_right_test)*100)
    y_pred = model.predict(X_no_test)
    output.append(accuracy_score(y_pred, y_no_test)*100)

    outputs.append(output)

    #--------------------------------------------------------------------------

    model = GaussianNB()
    model.fit(X_train, y_train)

    output = ["Gaussian NB"]
    
    y_pred = model.predict(X_left_train)
    output.append(accuracy_score(y_pred, y_left_train)*100)
    y_pred = model.predict(X_right_train)
    output.append(accuracy_score(y_pred, y_right_train)*100)
    y_pred = model.predict(X_no_train)
    output.append(accuracy_score(y_pred, y_no_train)*100)

    y_pred = model.predict(X_left_test)
    output.append(accuracy_score(y_pred, y_left_test)*100)
    y_pred = model.predict(X_right_test)
    output.append(accuracy_score(y_pred, y_right_test)*100)
    y_pred = model.predict(X_no_test)
    output.append(accuracy_score(y_pred, y_no_test)*100)

    outputs.append(output)

    #--------------------------------------------------------------------------

    model = SVC()
    model.fit(X_train, y_train)

    output = ["SVM"]
    
    y_pred = model.predict(X_left_train)
    output.append(accuracy_score(y_pred, y_left_train)*100)
    y_pred = model.predict(X_right_train)
    output.append(accuracy_score(y_pred, y_right_train)*100)
    y_pred = model.predict(X_no_train)
    output.append(accuracy_score(y_pred, y_no_train)*100)

    y_pred = model.predict(X_left_test)
    output.append(accuracy_score(y_pred, y_left_test)*100)
    y_pred = model.predict(X_right_test)
    output.append(accuracy_score(y_pred, y_right_test)*100)
    y_pred = model.predict(X_no_test)
    output.append(accuracy_score(y_pred, y_no_test)*100)

    outputs.append(output)

    #--------------------------------------------------------------------------

    model = GradientBoostingClassifier()
    model.fit(X_train, y_train)

    output = ["Gradient Boosting"]
    
    y_pred = model.predict(X_left_train)
    output.append(accuracy_score(y_pred, y_left_train)*100)
    y_pred = model.predict(X_right_train)
    output.append(accuracy_score(y_pred, y_right_train)*100)
    y_pred = model.predict(X_no_train)
    output.append(accuracy_score(y_pred, y_no_train)*100)

    y_pred = model.predict(X_left_test)
    output.append(accuracy_score(y_pred, y_left_test)*100)
    y_pred = model.predict(X_right_test)
    output.append(accuracy_score(y_pred, y_right_test)*100)
    y_pred = model.predict(X_no_test)
    output.append(accuracy_score(y_pred, y_no_test)*100)

    outputs.append(output)

    #--------------------------------------------------------------------------

    model = MLPClassifier()
    model.fit(X_train, y_train)

    output = ["MLP"]
    
    y_pred = model.predict(X_left_train)
    output.append(accuracy_score(y_pred, y_left_train)*100)
    y_pred = model.predict(X_right_train)
    output.append(accuracy_score(y_pred, y_right_train)*100)
    y_pred = model.predict(X_no_train)
    output.append(accuracy_score(y_pred, y_no_train)*100)

    y_pred = model.predict(X_left_test)
    output.append(accuracy_score(y_pred, y_left_test)*100)
    y_pred = model.predict(X_right_test)
    output.append(accuracy_score(y_pred, y_right_test)*100)
    y_pred = model.predict(X_no_test)
    output.append(accuracy_score(y_pred, y_no_test)*100)

    outputs.append(output)

    fptr = open(file_name, "w")
    op = tabulate(outputs, headers="firstrow")

    fptr.write(op)

    fptr.write("\nFor training data")
    fptr.write(f"\nleft: {len(X_left_train)} right: {len(X_right_train)} no_turn: {len(X_no_train)}")
    fptr.write("\nFor testing data")
    fptr.write(f"\nleft: {len(X_left_test)} right: {len(X_right_test)} no_turn: {len(X_no_test)}")

    fptr.close()



------------------------------- variables that are used in our code -------------------------------

In [11]:
# variable that stores the data
data_set_1 = loadData("5_11/visual_annotion_5_11.txt")
new_data_set_1 = makeNewDataSet(data_set = data_set_1)

In [12]:
# let check the number of turns in the data set and total number of rows
print(countTurns(new_data_set_1))
print(len(new_data_set_1))

{449, 98, 151}
698


In [13]:
runAllModels(new_data_set_1, "5_11/models_output.txt")



In [20]:
individualAccuracyNew(new_data_set_1, "5_11/individual_accur.txt")

