In [1]:
import io, os, sys, types
from IPython import get_ipython
from nbformat import read
from IPython.core.interactiveshell import InteractiveShell

def find_notebook(fullname, path=None):
    """find a notebook, given its fully qualified name and an optional path

    This turns "foo.bar" into "foo/bar.ipynb"
    and tries turning "Foo_Bar" into "Foo Bar" if Foo_Bar
    does not exist.
    """
    name = fullname.rsplit('.', 1)[-1]
    if not path:
        path = ['']
    for d in path:
        nb_path = os.path.join(d, name + ".ipynb")
        if os.path.isfile(nb_path):
            return nb_path
        # let import Notebook_Name find "Notebook Name.ipynb"
        nb_path = nb_path.replace("_", " ")
        if os.path.isfile(nb_path):
            return nb_path
        
class NotebookLoader(object):
    """Module Loader for Jupyter Notebooks"""
    def __init__(self, path=None):
        self.shell = InteractiveShell.instance()
        self.path = path

    def load_module(self, fullname):
        """import a notebook as a module"""
        path = find_notebook(fullname, self.path)

        print ("importing Jupyter notebook from %s" % path)

        # load the notebook object
        with io.open(path, 'r', encoding='utf-8') as f:
            nb = read(f, 4)


        # create the module and add it to sys.modules
        # if name in sys.modules:
        #    return sys.modules[name]
        mod = types.ModuleType(fullname)
        mod.__file__ = path
        mod.__loader__ = self
        mod.__dict__['get_ipython'] = get_ipython
        sys.modules[fullname] = mod

        # extra work to ensure that magics that would affect the user_ns
        # actually affect the notebook module's ns
        save_user_ns = self.shell.user_ns
        self.shell.user_ns = mod.__dict__

        try:
          for cell in nb.cells:
            if cell.cell_type == 'code':
                # transform the input to executable Python
                code = self.shell.input_transformer_manager.transform_cell(cell.source)
                # run the code in themodule
                exec(code, mod.__dict__)
        finally:
            self.shell.user_ns = save_user_ns
        return mod
    
class NotebookFinder(object):
    """Module finder that locates Jupyter Notebooks"""
    def __init__(self):
        self.loaders = {}

    def find_module(self, fullname, path=None):
        nb_path = find_notebook(fullname, path)
        if not nb_path:
            return

        key = path
        if path:
            # lists aren't hashable
            key = os.path.sep.join(path)

        if key not in self.loaders:
            self.loaders[key] = NotebookLoader(path)
        return self.loaders[key]
    
sys.meta_path.append(NotebookFinder())

In [2]:
import torch
import torch.nn as nn
from torch.autograd import Variable   

sys.path.append("../Preprocessor")
import format_module

import rnn

importing Jupyter notebook from rnn.ipynb


In [3]:
import pickle

def save_object(obj, filename):
    with open(filename, 'wb') as output:
        pickle.dump(obj, output, pickle.HIGHEST_PROTOCOL)

def load_object(filename):
    with open(filename, 'rb') as input:
        return pickle.load(input)

In [20]:
input_size = 100  # word2vec k size
batch_size = 10

In [16]:
class classifierModule(nn.Module):
    def __init__(self):
        super(classifierModule, self).__init__()
        self.rnn_model = rnn.RNN_model(input_size)
        #TODO
        
    def encoder(self, formattedList):
        length = len(formattedList)
        contextList = [formattedList[i].context for i in range(length)]

        lengths = torch.LongTensor([len(contextList[i]) for i in range(length)])
        max_len = torch.max(lengths)
        
        data = np.zeros((length, max_len, input_size))

        for i in range(length):
            context = contextList[i]
            if not (context.size == 0):
                data[i, :context.shape[0],:] = context
            else:
                lengths[i] = 1
            i+=1
            
        return self.sort_batch(torch.FloatTensor(data), formattedList, lengths)
        
    def sort_batch(self, context, formatted, seq_len):
        batch_size = context.size(0)
        sorted_seq_len, sorted_idx = seq_len.sort(0, descending = True)
        
        sorted_context = context[sorted_idx]
        sorted_formatted = [formatted[i] for i in sorted_idx]

        for f in sorted_formatted:
            print(len(f.context))
        
        return Variable(sorted_context), sorted_formatted, sorted_seq_len
    
    def resize_input(self, input):
        list_ = list()
        for i in range(0, len(input), batch_size):
            list_.append(input[i:i+batch_size])
        return list_
        
    def forward(self, formatted_list, hidden=None):
        batch_list = self.resize_input(formatted_list)
        
        for bl in batch_list:
            context, formatted, lengths = self.encoder(bl)
        
            print(self.rnn_model(context, lengths))

In [19]:
formatted_list = load_object("../Preprocessor/save_formatted_review.pkl")
test_classifier = classifierModule()

#print(formatted_list[0].context.shape)
test_classifier(formatted_list)

for param in test_classifier.parameters():
     print(type(param.data), param.size())

162
141
137
124
120
112
104
96
94
94
88
86
85
83
83
73
71
70
68
63
55
54
52
49
48
47
46
45
44
43
43
42
41
34
33
33
32
31
28
28
27
27
27
27
26
25
25
25
25
22
22
20
20
20
19
17
16
15
15
15
13
13
12
10
9
8
7
7
6
0
Variable containing:
-0.0749  0.2547 -0.0749 -0.0939  0.0044  0.2277 -0.0597  0.1489 -0.1973  0.0118
-0.0698  0.2462 -0.0908 -0.0529  0.0186  0.2233 -0.0800  0.1419 -0.1765 -0.0335
-0.0633  0.2417 -0.0762 -0.0731  0.0070  0.2399 -0.0730  0.1664 -0.1798 -0.0199
-0.0787  0.2565 -0.0877 -0.0646  0.0097  0.2140 -0.0754  0.1369 -0.1853 -0.0182
-0.0906  0.2470 -0.0854 -0.0767  0.0029  0.1876 -0.0531  0.1167 -0.1892  0.0248
-0.0612  0.2394 -0.0835 -0.0676  0.0081  0.2258 -0.0701  0.1341 -0.1947 -0.0030
-0.0762  0.2321 -0.0869 -0.0628  0.0140  0.2086 -0.0612  0.1201 -0.1831  0.0063
-0.0636  0.2339 -0.0749 -0.0827  0.0086  0.2049 -0.0518  0.1242 -0.1916  0.0162
-0.0976  0.2595 -0.0993 -0.0737 -0.0131  0.1938 -0.0641  0.1365 -0.1722 -0.0169
-0.0865  0.2333 -0.0924 -0.0685  0.0043  0.1908 

In [37]:
a = ["ㄱ", "ㄴ", "ㄷ", "ㄹ", "ㅁ"]
b = [4, 3, 2, 1, 0]
c = [4, 2, 1, 0, 3]

d = [x for _,x in sorted(zip(c,a))]
print(d)
list.reverse(d)
print(d)

['ㄹ', 'ㄷ', 'ㄴ', 'ㅁ', 'ㄱ']
['ㄱ', 'ㅁ', 'ㄴ', 'ㄷ', 'ㄹ']


In [10]:
a = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
a_ = list()
for i in range(0, len(a), 3):
    a_.append(a[i:i+3])
print(a_)

[[1, 2, 3], [4, 5, 6], [7, 8, 9], [10]]
