In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import operator
import sklearn
import sklearn.model_selection
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

import re
import nltk

In [None]:
nltk.download('punkt')
from nltk.tokenize import word_tokenize as wt 
nltk.download('stopwords')
from nltk.corpus import stopwords

import importlib
import src.models as model
importlib.reload(model)
import src.utils as util
importlib.reload(util);

In [None]:
# global data
training_set = pd.read_csv("data/covid_training.tsv", sep="\t")
test_set = pd.read_csv("data/covid_test_public.tsv", sep="\t")
preX = training_set.values[:,1]
prey = training_set.values[:,2]
X = np.array(preX).reshape(-1, 1)
categories = list(set(prey))
y = np.array([categories.index(ele) for ele in prey]).reshape(-1,1)

X_test = test_set.values[:,1].reshape(-1, 1)
y_test = np.array([categories.index(ele) for ele in test_set.values[:,2]]).reshape(-1,1)
X_train, X_val, y_train, y_val = sklearn.model_selection.train_test_split(X, y, train_size=0.9, random_state=0)
print(X_test.shape)

In [None]:
#data format
pd.DataFrame(data=np.column_stack([X,y]),columns=["Training Examples","Training Labels"]).head()
pd.DataFrame(data=np.column_stack([X_test,y_test]),columns=["Training Examples","Training Labels"]).head()

In [None]:
#training
nb_bow = model.NB_BOW(np.unique(y)) #instantiate a NB class object

print ("---------------- Training In Progress --------------------")
 
nb_bow.train(X, y) #start tarining by calling the train function

print ('----------------- Training Completed ---------------------')

In [None]:
#testing
predict_classes = nb_bow.test(X_test)
test_acc=np.sum(predict_classes == y_test.reshape(-1))/float(y_test.shape[0]) 

print ("Test Set Examples: ",y_test.shape[0])
print ("Test Set Accuracy: ",test_acc*100,"%")
print(predict_classes)
print( y_test.reshape(-1))

In [None]:
# evaluation
y_pred = classifier.predict(X_test)
cm = confusion_matrix(y_test, y_pred)
cr = classification_report(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)


# main
def process(input):
    for index, e in enumerate(input):
        print(f"***\t\tsample {index}\t\t***")
        t=xpuzzle.XPuzzle(2, 4, input[index]) 
        s=search.Search(t, 30)

        p = s.uniformCost()
        print(p[0])
        exportData("ucs", index, p)

        for heuristic in range(2):
            p = s.greedyBFS(heuristic)
            print(p[0])
            exportData("gbfs", index, p, heuristic)

            p  = s.aStar(heuristic)
            print(p[0])
            exportData("astar", index, p, heuristic)

In [None]:
inputData = importData('samplePuzzles.txt')
print(inputData, end='\n\n')
process(inputData)

# X_train, X_val, y_train, y_val = sklearn.model_selection.train_test_split(
#     X, y, train_size=0.9, random_state=0)
# X_val, X_test, y_val, y_test = sklearn.model_selection.train_test_split(
#     X_temp, y_temp, train_size=0.5, random_state=0)
# print(X_train.shape)
# print(X_val.shape)
# print(X_test.shape)