In [1]:
import io, os, sys, types
from IPython import get_ipython
from nbformat import read
from IPython.core.interactiveshell import InteractiveShell

def find_notebook(fullname, path=None):
    """find a notebook, given its fully qualified name and an optional path

    This turns "foo.bar" into "foo/bar.ipynb"
    and tries turning "Foo_Bar" into "Foo Bar" if Foo_Bar
    does not exist.
    """
    name = fullname.rsplit('.', 1)[-1]
    if not path:
        path = ['']
    for d in path:
        nb_path = os.path.join(d, name + ".ipynb")
        if os.path.isfile(nb_path):
            return nb_path
        # let import Notebook_Name find "Notebook Name.ipynb"
        nb_path = nb_path.replace("_", " ")
        if os.path.isfile(nb_path):
            return nb_path
        
class NotebookLoader(object):
    """Module Loader for Jupyter Notebooks"""
    def __init__(self, path=None):
        self.shell = InteractiveShell.instance()
        self.path = path

    def load_module(self, fullname):
        """import a notebook as a module"""
        path = find_notebook(fullname, self.path)

        print ("importing Jupyter notebook from %s" % path)

        # load the notebook object
        with io.open(path, 'r', encoding='utf-8') as f:
            nb = read(f, 4)


        # create the module and add it to sys.modules
        # if name in sys.modules:
        #    return sys.modules[name]
        mod = types.ModuleType(fullname)
        mod.__file__ = path
        mod.__loader__ = self
        mod.__dict__['get_ipython'] = get_ipython
        sys.modules[fullname] = mod

        # extra work to ensure that magics that would affect the user_ns
        # actually affect the notebook module's ns
        save_user_ns = self.shell.user_ns
        self.shell.user_ns = mod.__dict__

        try:
          for cell in nb.cells:
            if cell.cell_type == 'code':
                # transform the input to executable Python
                code = self.shell.input_transformer_manager.transform_cell(cell.source)
                # run the code in themodule
                exec(code, mod.__dict__)
        finally:
            self.shell.user_ns = save_user_ns
        return mod
    
class NotebookFinder(object):
    """Module finder that locates Jupyter Notebooks"""
    def __init__(self):
        self.loaders = {}

    def find_module(self, fullname, path=None):
        nb_path = find_notebook(fullname, path)
        if not nb_path:
            return

        key = path
        if path:
            # lists aren't hashable
            key = os.path.sep.join(path)

        if key not in self.loaders:
            self.loaders[key] = NotebookLoader(path)
        return self.loaders[key]
    
sys.meta_path.append(NotebookFinder())

In [2]:
import classifierModuleshallowcopy
import torch
import torch.nn as nn
from torch.autograd import Variable   
from torch import optim
import numpy as np

sys.path.append("../Preprocessor")
import format_module
reviewDB = format_module.FormattedReview.reviewDB
import preprocessorModule

import rnn
import naivebayesian
import cnn
import conclude
import mlp

importing Jupyter notebook from classifierModuleshallowcopy.ipynb
importing Jupyter notebook from rnn.ipynb




In [3]:
learning_rate = 0.005
input_size = 100  # word2vec k size
batch_size = 100
n_epochs = 40

In [4]:
reviewDB = format_module.ReviewDB("../Preprocessor/pkl/train")
format_module.FormattedReview.setDB(reviewDB)
FRlist = classifierModuleshallowcopy.load_object("../Preprocessor/pkl/save_formatted_review_train.pkl")[:100]
model = classifierModuleshallowcopy.classifierModule(input_size, batch_size, FRlist, "./models/test_model.mdl", True)
criterion = nn.CrossEntropyLoss(torch.FloatTensor([1,6]))
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [5]:
def spamFilterModule(train_or_not, input_excel_file_path,input_excel_file_for_validation_path = None,
                     sensitivity = 0.5, run_mode = 'default'):
    if(train_or_not):
        if(input_excel_file_for_validation_path == None): 
            print("no excel for validation.. abort training")
            return
        else:
            formatted_review_list_for_training = preprocessorModule.preprocessModule(input_excel_file_path, reviewDB, "train")
            formatted_review_list_for_validating = preprocessorModule.preprocessModule(input_excel_file_for_validation_path, reviewDB, "validation")
            train_net(formatted_review_list_for_training,formatted_review_list_for_validating,sensitivity,run_mode)
    else:
        formatted_review_list = preprocessorModule.preprocessModule(input_excel_file_path, reviewDB, "test")
        out = model(formatted_review_list, None,run_mode)        
        tg = classifierModuleshallowcopy.get_targets(formatted_review_list, model)
        _, formatted, _ = model.encoder(formatted_review_list)
        print()
        print("############# print test accuracy and infer results for each ##############")
        print("%5.3f" % (100 * classifierModuleshallowcopy.get_accuracy(out, tg, 0.5)/len(formatted_review_list)),end = "")
        print("%")
        for i in range(500):
            acc = classifierModuleshallowcopy.get_accuracy(out[i:i+1], tg[i:i+1], 0.5)
            if tg.data[i]:
                print("            ",out.data[i, 1], tg.data[i], acc)
            else:
                print(out.data[i, 1], tg.data[i], acc)

In [6]:
spamFilterModule(False,"../Preprocessor/Commonreviews_snuproject _test.xlsx")

processing 0 th unit...
adding review to DB...
=> Postag initiated
postagging [komoran] 0 ith unit...
processing word embedding...
processing formatted review...
saving formatted review...

############# print test accuracy and infer results for each ##############
64.264%
0.5428406000137329 0 0
0.4562237560749054 0 1
0.5368159413337708 0 0
0.5855391025543213 0 0
             0.4953310191631317 1 0
0.4748134911060333 0 1
0.4389099180698395 0 1
0.46217024326324463 0 1
0.523930013179779 0 0
0.4314444959163666 0 1
0.4223302900791168 0 1
0.41480475664138794 0 1
             0.47989100217819214 1 0
             0.4854913651943207 1 0
0.4556526839733124 0 1
0.5261133313179016 0 0
             0.5103979706764221 1 1
0.5191811323165894 0 0
0.44691839814186096 0 1
0.47286146879196167 0 1
0.538532018661499 0 0
0.4651523232460022 0 1
0.46222981810569763 0 1
0.4340062439441681 0 1
0.4760284721851349 0 1
0.5305610299110413 0 0
0.47800731658935547 0 1
0.4811856150627136 0 1
0.4823729395866394 0 1
0.

0.5122172236442566 0 0
0.49731701612472534 0 1
0.4352349638938904 0 1
0.45432841777801514 0 1
0.4871222674846649 0 1
0.5359777212142944 0 0
0.43213579058647156 0 1
0.49823054671287537 0 1
             0.537956953048706 1 1
             0.5244581699371338 1 1
0.5455837249755859 0 0
0.42213186621665955 0 1
0.42177245020866394 0 1
0.410987913608551 0 1
0.44640904664993286 0 1
0.5083188414573669 0 0
0.45856809616088867 0 1
0.45846888422966003 0 1
0.4486788809299469 0 1
0.5128364562988281 0 0
             0.46863311529159546 1 0
0.52371746301651 0 0
0.5183959007263184 0 0
0.408786803483963 0 1
0.41491031646728516 0 1
0.4343656301498413 0 1
0.4475230276584625 0 1
0.4852852523326874 0 1
0.49402734637260437 0 1
             0.4254184067249298 1 0
0.47279486060142517 0 1
             0.4828529953956604 1 0
0.49161961674690247 0 1
0.4793287515640259 0 1
0.47694483399391174 0 1
0.5063652396202087 0 0
0.4589933156967163 0 1
0.5270665884017944 0 0
0.3942972719669342 0 1
0.5227900147438049 0 0
     