In [26]:
import numpy as np
import sklearn.preprocessing as prep
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from autoencoder_models.VariationalAutoencoder import VariationalAutoencoder
from scipy import sparse
import pandas as pd
from IPython.core.display import Image, display, HTML

In [2]:
docs = sparse.load_npz('../../page_vecs_sparse.npz')

In [3]:
def abs_scale(X_train, X_test):
    ''' Scale by each feature's max value '''
    preprocessor = prep.MaxAbsScaler().fit(X_train)
    X_train = preprocessor.transform(X_train)
    X_test = preprocessor.transform(X_test)
    return X_train, X_test

def get_random_docs(data, batch_size):
    ''' Select a random subset of the docs'''
    random_indexes = np.random.permutation(np.arange(data.shape[0]))[:batch_size]
    return data[random_indexes]

In [4]:
X_train, X_test = abs_scale(docs[:250000,:], docs[250000:,:])

In [5]:
n_samples = int(X_train.shape[0])
training_epochs = 4
batch_size = 512
display_step = 1
n_samples, training_epochs, batch_size, display_step

(250000, 4, 512, 1)

In [6]:
autoencoder = VariationalAutoencoder(n_input = docs.shape[1],
                                     n_hidden = 200,
                                     optimizer = tf.train.AdamOptimizer(learning_rate = 0.001))

In [7]:
for epoch in range(training_epochs):
    avg_cost = 0.
    total_batch = int(n_samples / batch_size)
    # Loop over all batches
    for i in range(total_batch):
        batch_xs = get_random_docs(X_train, batch_size)

        # Fit training using batch data
        cost = autoencoder.partial_fit(batch_xs.toarray())
        # Compute average loss
        avg_cost += cost / n_samples * batch_size

    # Display logs per epoch step
    if epoch % display_step == 0:
        print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))

print("Total cost: " + str(autoencoder.calc_total_cost(X_test.toarray())))

Epoch: 0001 cost= 633.131289000
Epoch: 0002 cost= 629.585562000
Epoch: 0003 cost= 628.867468125
Epoch: 0004 cost= 631.037214750
Total cost: 47619.8


- 1024 batch, 2 epoch: total cost: 46129.7
- 512 batch, 4 epoch: total cost: 47619.8

In [22]:
tl = []
for i in range(int(X_train.shape[0]/100)):
    transformed = autoencoder.transform(X_train[i*100:(i+1)*100].toarray())
    tl.append(transformed)
all_transformed = np.vstack(tl)

In [82]:
all_transformed

array([[ -1.55527629e-02,  -4.09963652e-02,  -3.35380528e-03, ...,
          2.28860416e-04,  -4.89446009e-03,  -1.47530641e-02],
       [  8.44246801e-03,  -5.38064446e-03,   9.92728863e-03, ...,
         -2.16895118e-02,  -3.87283484e-03,  -2.44846512e-02],
       [ -1.63031416e-03,  -3.28462832e-02,   1.43629564e-02, ...,
         -2.37468518e-02,  -4.01131026e-02,   3.57775912e-02],
       ..., 
       [  1.01382751e-02,  -1.16517514e-01,   1.98340937e-02, ...,
          9.38276574e-02,  -9.94200073e-03,   8.33654925e-02],
       [  3.15664150e-03,   2.18842342e-01,   1.90707184e-02, ...,
          2.53360439e-02,  -3.26796956e-02,  -1.33861117e-02],
       [ -5.47616817e-02,   2.80428290e-01,   7.47864693e-02, ...,
          6.90316856e-02,  -1.31352931e-01,   4.06380221e-02]], dtype=float32)

In [29]:
page_ref = pd.read_hdf('../../page_vecs_order_ref.h5')
with pd.HDFStore('../../cookbooks/ref.h5') as store:
    dictionary = store['dictionary']
    metadata = store['metadata']
    volids = store['volids']

# Saving model

In [1170]:
saver = tf.train.Saver(autoencoder.weights)
saver.save(autoencoder.sess, '../../autoencoder-model-13529-200.tf')

'../../autoencoder-model-13529-200.tf'

To restore:

```
autoencoder = VariationalAutoencoder(n_input = 13529,
                                     n_hidden = 200,
                                     optimizer = tf.train.AdamOptimizer(learning_rate = 0.001))
saver = tf.train.Saver(autoencoder.weights)
saver.restore(autoencoder.sess, '../../autoencoder-model-13529-200.tf')
```

In [1169]:
autoencoder.x

<tf.Tensor 'Placeholder:0' shape=(?, 13529) dtype=float32>

# Examine autoencoder dimensions

In [533]:
term_weights = autoencoder.getWeights()
term_weights.shape

(13529, 200)

In [557]:
sortdim[:-10:-1]

8907     0.113686
13380    0.112235
13015    0.106644
9214     0.100868
10726    0.100818
12441    0.099627
38       0.099288
10125    0.098253
6858     0.098244
dtype: float32

### What do the dimensions (roughly) mean

Note that 200 is still a large dimension count for such a tight space, and that values range from negative to positive. Topic modeling or clustering on these dimensions will give a better impression of what they're about.

In [565]:
print_tokens = lambda indices: ", ".join(dictionary.iloc[indices].token.tolist())
for i in range(0, 200):
    sortdim = pd.Series(term_weights[:,i]).sort_values()
    print("Dimension %d is about\t\t" % i, print_tokens(sortdim[:-10:-1].index.values))
    print("Dimension %d is inversely about\t" % i, print_tokens(sortdim[:10].index.values))

Dimension 0 is about		 ajo, siguiente, alentejo, insight, perforation, auberge, brown, eastem, ๑/๒
Dimension 0 is inversely about	 qq, =l, anonymous, appetiser, boar, ratafia, thrift, arugula, liqueﬁed, lancaster
Dimension 1 is about		 combine, many, serve, sprinkle, grate, green, cut, dish, lay
Dimension 1 is inversely about	 add, heat, simmer, water, remove, stir, bring, meet, fare, boil
Dimension 2 is about		 combine, place, grease, remove, woolen, bisquit, shorten, doufu, baking
Dimension 2 is inversely about	 tablespoon, cook, add, stir, heat, thicken, plat, prohibition, austin, cornstarch
Dimension 3 is about		 sprinkle, place, side, follower, remove, barbados, wool, ting, lievre
Dimension 3 is inversely about	 use, add, water, kelley, grub, connect, cuvee, gage, paddle, cook
Dimension 4 is about		 food, ог, centigrade, palmer, jerky, skinny, temperance, fermentation, jaggery
Dimension 4 is inversely about	 have, remove, sterilization, saumon, ﬂeſh, triﬂe, crepe, warehouse, frito

Dimension 76 is about		 place, ft, piedmont, remove, warm, 60g, distributed, lucia, let
Dimension 76 is inversely about	 have, mulberry, lndian, catarrh, fur, patna, urn, carob, nugget, dam
Dimension 77 is about		 preheat, yolk, sprinkle, cream, cheese, grate, sole, crumb, layer
Dimension 77 is inversely about	 teaspoon, cup, tablespoon, shorten, cut, word, combine, sift, soda, knob
Dimension 78 is about		 tablespoon, teaspoon, heat, thicken, cornstarch, brown, debone, muller, stir
Dimension 78 is inversely about	 green, cream, preheat, teaspoonful, serve, white, cupful, nippy, use, cheese
Dimension 79 is about		 cream, yolk, thicken, beat, whip, heavy, arc, combine, heat
Dimension 79 is inversely about	 place, let, water, yeast, rise, lukewarm, cover, knead, dough, stand
Dimension 80 is about		 buzz, c., appendix, global, combine, birch, alta, yield, pav
Dimension 80 is inversely about	 tablespoon, teaspoon, cup, hemp, inadequate, albacore, omelet, cumberland, cookies., agregue
Dimens

Dimension 152 is about		 preheat, place, grave, always, pone, ;§, sopaipillas, let, manicotti
Dimension 152 is inversely about	 have, garniture, mush, fla, poele, val, shakespeare, peapods, suspension, gritty
Dimension 153 is about		 teaspoon, tablespoon, juice, cup, lemon, striped, stuart, escort, fever
Dimension 153 is inversely about	 smooth, cook, pomerol, thicken, milk, use, gun, pub, swede, prik
Dimension 154 is about		 teaspoon, cup, cut, tablespoon, have, juice, posole, ignorance, meat
Dimension 154 is inversely about	 smooth, greengage, preheat, c., heat, malaga, stir, jalapeño, pompadour, southerner
Dimension 155 is about		 serve, combine, sq, cook, side, .v, cut, immune, wehlener
Dimension 155 is inversely about	 add, have, l-, chaud, shrimps, dry, medieval, teaspoon, о, tablespoon
Dimension 156 is about		 reduce, cream, whip, yolk, thicken, sweeten, jerk, stiff, 1-
Dimension 156 is inversely about	 teaspoon, tablespoon, brown, cut, cup, soda, dry, baking, salt, sift
Dimensi

### Related words

In [567]:
from scipy.spatial.distance import pdist, cdist

In [653]:
q = 'sugar'
def similar_words(q, dict_size=1000):
    q_id = dictionary[dictionary.token == q].id.values
    qvec = term_weights[q_id, :]
    # Similar words
    similarities = cdist(qvec, term_weights)
    ranked = pd.Series(similarities[0], index=dictionary.token)
    # Trim to only the #{dict_size} most frequent words
    return ranked[:dict_size].sort_values(ascending=True)[1:]

In [1480]:
for q in ['steak', 'salmon', 'ham']:
    print(q.center(22, '='))
    print(similar_words(q)[:5])

token
short     0.496073
rib       0.498609
skewer    0.500571
breast    0.507584
string    0.508437
dtype: float64
token
asparagus    0.482121
oyster       0.485063
split        0.492362
caper        0.497022
perfect      0.500313
dtype: float64
token
turkey    0.443072
duck      0.467179
stewed    0.470433
stuff     0.472402
short     0.475259
dtype: float64


### Document Similarity

In [994]:
import spacy
nlp = spacy.load('en')

# First, an example of a sentence as a 'document' query
def fake_doc(sentence, target_recipes=True):
    '''
    If target_recipes is true, the fake doc includes some counts for 'cup', teaspoon' 'tablespoon'
    '''
    modifier = 8
    lemmas = [word.lemma_ for word in nlp(sentence)]
    if target_recipes:
        boost = dictionary.token.isin(['teaspoon', 'cup', 'tablespoon']).apply(int).multiply(0.1*len(lemmas)).values
    else:
        boost = np.zeros(len(dictionary))
    per_word_mod = float(modifier)/len(lemmas)
    fake_doc = dictionary.token.isin(lemmas).apply(int).values
    transformed = autoencoder.transform([per_word_mod * fake_doc + boost])
    return transformed

def sentence_query(q):
    qvec = fake_doc(q)
    similarities = cdist(qvec, all_transformed)
    ranked = pd.Series(similarities[0])
    top_pages = ranked[pdfilter].sort_values().index.to_series()
    return top_pages

def page_images(result_index, sample=10):
    arr = extended_page_ref.iloc[result_index].image_url.values
    imagehtml = "".join(["<img style='float:left' src='%s' />" % img for img in arr])
    return HTML(imagehtml)

In [731]:
HT_IMAGE_TEMPLATE = "https://babel.hathitrust.org/cgi/imgsrv/image?id={};seq={};width={}"
extended_page_ref = pd.merge(page_ref,
                             pd.merge(volids,
                                      metadata.rename(columns={'id':'htid'})[['htid', 'year','title', 'rights_attributes']]
                                     )
                            )
extended_page_ref['image_url'] = extended_page_ref.apply(lambda x: HT_IMAGE_TEMPLATE.format(x['htid'], x['page'], 550) if x['rights_attributes'] =='pd' else '', 1)
pdfilter = (extended_page_ref.rights_attributes == 'pd')
extended_page_ref.head()

Unnamed: 0,volid,page,htid,year,title,rights_attributes,image_url
0,1978,7,uc1.$b713642,1970,Wine and food.,und,
1,1978,8,uc1.$b713642,1970,Wine and food.,und,
2,1978,9,uc1.$b713642,1970,Wine and food.,und,
3,1978,10,uc1.$b713642,1970,Wine and food.,und,
4,1978,11,uc1.$b713642,1970,Wine and food.,und,


In [1482]:
q = 'salmon asparagus'
results = sentence_query(q)
img = page_images(results[:10])
img

### Topic modelling on the dimensions

Here, I topic model on the `doc x autoencoder_dim` matrix, then I expand the resulting models to translate the topics of autocoder dimensions into topics of words.

In [999]:
from sklearn.decomposition import LatentDirichletAllocation
lda = LatentDirichletAllocation(n_topics=50, batch_size=1024)

In [1171]:
training_epochs = 1

In [1172]:
for epoch in range(training_epochs):
    avg_score = 0.
    total_batch = int(n_samples / batch_size)
    # Loop over all batches
    for i in range(total_batch):
        batch_xs = get_random_docs(X_train, batch_size)

        # Fit training using batch data
        lda.partial_fit(batch_xs)
        score = lda.score(batch_xs)
        # Compute average log-liklihood
        avg_score += score / n_samples * batch_size
        if i % 50 == 0:
            print("batch %d" % i)
        
    # Display logs per epoch step
    if epoch % display_step == 0:
        print("Epoch:", '%04d' % (epoch + 1), "score=", "{:.9f}".format(avg_score))

batch 0
batch 50
batch 100
batch 150
batch 200
batch 250
batch 300
batch 350
batch 400
batch 450
Epoch: 0001 score= -599095.820911306


In [1158]:
print("Epoch:", '%04d' % (epoch + 1), "score=", "{:.9f}".format(avg_score))

Epoch: 0010 score= -599032.204400121


In [1160]:
def get_topic_terms(i):
    return pd.Series(lda.components_[i], index=dictionary.token).sort_values(ascending=False)

def print_top_topic_terms(i, n=10):
    topics = get_topic_terms(i)
    tokens = topics.index.values[:n]
    print(i, ", ".join(tokens))

for k in range(lda.n_topics):
    print_top_topic_terms(k)

0 worthy, century, food, occasional, book, adapt, include, american, world, devise
1 amazing, и, с, —, appreciative, nd, chmn, wk, newcomer, choco
2 tasteless, acquaint, anxious, tions, dried, h‘, lemonjuice, diﬁerent, tract, tidbit
3 k, f, %, ﬁ, w, tc, t, ч, \, ww
4 colorful, repertoire, reduce, ﬁnishing, prepare, same, surround, suppress, architect, heady
5 put, take, lay, diſh, little, let, ſalt, boil, ſome, ſweet
6 endless, impressive, intelligent, mental, mouthful, myriad, dependent, festive, harmless, sh
7 add, cup, beat, sugar, milk, teaspoon, baking, cream, salt, mix
8 ſtand, ake, pleaſe, cullender, ;", gs, perfumed, pretty, nora, ſweeten
9 this, status, discoloration, prepared, retur, liberal, ubiquitous, magical, offset, reputable
10 afraid, diﬂicult, goodness, except, execution, amusing, persuade, innumerable, some, depart
11 fare, wine, au, meet, occasion, cafe, du, grand, le, society
12 use, be, many, have, other, dry, most, fresh, delicate, cook
13 be, have, food, other, 

In [None]:

print("Total cost: " + str(autoencoder.calc_total_cost(X_test.toarray())))

In [1053]:
def words_for_dim():
pd.Series(expandedweights[:,0], index=dictionary.token).sort_values(ascending=False).index.values

array(['ajo', 'siguiente', 'alentejo', ..., 'penguin', 'but', 'harsh'], dtype=object)

In [1077]:
(lda.components_[0] * expandedweights)

array([[ 0.01689948,  0.06104647, -0.        , ..., -0.        ,
        -0.        ,  0.00662822],
       [ 0.01651793, -0.        , -0.        , ..., -0.        ,
        -0.        ,  0.00584206],
       [ 0.00212098, -0.        ,  0.00900096, ..., -0.        ,
        -0.        ,  0.00104464],
       ..., 
       [ 0.02163254, -0.        ,  0.00540285, ...,  0.00468644,
        -0.        , -0.        ],
       [ 0.01285331,  0.00014013,  0.00353292, ...,  0.01191507,
        -0.        ,  0.01341815],
       [-0.        , -0.        , -0.        , ..., -0.        ,
        -0.        ,  0.00536898]])

In [1097]:
np.hstack=[(expandedweights[:,[i]] * lda.components_[0].T).mean(1) for i in range(0,400)]

[array([ 0.01427825,  0.01395588,  0.001792  , ...,  0.01827718,
         0.01085967,  0.        ]),
 array([  3.96204978e-02,   0.00000000e+00,   0.00000000e+00, ...,
          0.00000000e+00,   9.09492561e-05,   0.00000000e+00]),
 array([ 0.        ,  0.        ,  0.0092405 , ...,  0.00554664,
         0.00362694,  0.        ]),
 array([ 0.        ,  0.        ,  0.        , ...,  0.00214537,
         0.        ,  0.        ]),
 array([ 0.00255109,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.00569769]),
 array([ 0.00541456,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]),
 array([ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.00661448,  0.00072851]),
 array([ 0.03000596,  0.00047128,  0.03207867, ...,  0.00553223,
         0.00691735,  0.00153601]),
 array([ 0.01515207,  0.        ,  0.00933452, ...,  0.        ,
         0.        ,  0.        ]),
 array([ 0.        ,  0.        ,  0.02797264, ...,  0.014331

In [1078]:
lda.components_[0].shape

(400,)

In [None]:
lda

## Clusters words from autoencoder weights

In [89]:
from scipy.cluster import hierarchy

In [172]:
Z = hierarchy.linkage(term_weights, method='ward')

In [530]:
n = 70
c = hierarchy.fcluster(Z, n, criterion='maxclust')
d = pd.Series(c)
for cluster in range(1,n+1):
    wordsample = dictionary.token[d==cluster].iloc[:10].tolist()
    print(cluster, ", ".join(wordsample))

1 c., serving, yield, pkg, owc, pres, afb
2 l, lb, t, mrs., casserole, c, degree, tsp, margarine, t.
3 mix, ingredient, salad, vinegar, celery, mince, dress, dice, lettuce, mustard
4 dry, large, fresh, tomato, taste, garlic, red, peel, ground, clove
5 green
6 cheese, grate, sprinkle, crumb
7 salt, pepper, chop, onion, season, parsley
8 cream, juice, lemon, whip, chill
9 serve
10 combine
11 water, boil
12 add
13 cover, hour, let, stand, quart, jar, night, kettle, morning, next
14 teaspoonful, tablespoonful, cupful
15 cold, soft, dissolve, double, boiler, scald, enough
16 place
17 remove
18 roll, work, round, size, shape, hand, press, sheet, center, edge
19 dough, warm, loaf, rise, yeast, knead, lukewarm, bulk, active, elastic
20 smooth
21 wine, dinner, hold, au, meet, occasion, fare, chateau, wines, grand
22 la, chef, guest, present, hotel, port, champagne, le, society, member
23 y, se, el, que, sal, para, una, con, en, por
24 break, fork, burn, air, manner, appear, wet, touch, careful,

In [531]:
#To do: distributions by year for different clusters

## Cluster on a specific word

Find pages that *exactly* match a query, then use the auto-encoder weights to cluster the pages. e.g. cluster on pages that talk about 'cookies'

In [None]:
from sklearn.cluster import KMeans

In [1483]:
q = "salmon"
qids = dictionary[dictionary.token == q].id.values
matchpages = (X_train[:, qids] > 0).toarray()[:,0]
# Public Domain, so we can view pages. Also, hopefully more esoterica
matchpagespd = (matchpages & pdpagefilter[:250000])
matchdata = all_transformed[matchpagespd,:]

In [1484]:
kmeans = KMeans(n_clusters=10)
Z = kmeans.fit_predict(matchdata)
pd.Series(Z).value_counts()

2    740
1    474
8    211
5    205
7    131
6    114
0     61
3     25
4     16
9     10
dtype: int64

In [1485]:
# After a manual review of the first cookie clusters,
# Trim the clusters that aren't really about cookie recipes,
# or where page lengths are distractingly long
exclude = [] #[4, 6, 7, 3,2,0]
Zfilter = ~np.in1d(Z,exclude)
kmeans = KMeans(n_clusters=40)
Y = kmeans.fit_predict(matchdata[Zfilter])
pd.Series(Y).value_counts()

9     387
39    200
5     161
30     91
36     89
13     88
26     77
14     71
27     68
1      63
8      61
7      60
0      55
32     50
19     45
29     40
21     37
6      34
11     32
3      32
20     31
33     24
23     23
15     22
38     18
35     16
4      15
2      15
24     15
25     11
28     10
12      9
22      7
10      6
34      6
16      5
31      5
18      4
17      3
37      1
dtype: int64

In [1486]:
for cluster in [9, 39, 5, 30, 36]:
    page_ids = extended_page_ref.iloc[:250000][matchpagespd][Zfilter][(Y == cluster)].sample(2).index
    print(cluster)
    display(page_images(page_ids[:2]))

9


39


5


30


36


In [1198]:
Z = hierarchy.linkage(term_weights, method='ward')
n = 20
c = hierarchy.fcluster(Z, n, criterion='maxclust')
d = pd.Series(c)
for cluster in range(1,n+1):
    wordsample = dictionary.token[d==cluster].iloc[:10].tolist()
    print(cluster, ", ".join(wordsample))

array([False, False, False, ..., False, False, False], dtype=bool)

## 'Burrowing' Search

A fun idea for exploring the clusters. Using agglomerative clustering on the public domain pages:

    1. split the tree into four clusters
    2. show examples of each cluster for selection
    3. cut tree 

In [1330]:
Z = hierarchy.linkage(all_transformed[pdpagefilter[:250000]], method='ward')

In [1333]:
all_transformed[pdpagefilter[:250000]].shape

(71720, 200)

In [1362]:
pdpageids = pdpagefilter[:250000][pdpagefilter[:250000]].index.values

In [1351]:
a = hierarchy.cut_tree(Z, n_clusters=[2**i for i in range(2, 15)])

array([[   0,    0,    0, ...,    0,    0,    0],
       [   0,    0,    0, ...,    0,    0,    0],
       [   0,    0,    0, ...,    0,    0,    0],
       ..., 
       [   1,    3,    4, ..., 1717, 2996, 4664],
       [   1,    3,    4, ...,  491,  610,  786],
       [   1,    3,    4, ..., 2059, 3794, 5938]])

In [None]:
a[:,level]

In [1491]:
include = np.array([True]*a.shape[0])
level = 0

In [1493]:
include = (clusters==1)
level += 2
print(level)

2


In [1494]:
clusters = pd.Series(a[:,level], index=pdpageids)

print("Choose a cluster")
for cluster in clusters[include].unique():
    print("Cluster %d, with %d pages" % (cluster, clusters[clusters==cluster].count()))
    sample = clusters[clusters==cluster].sample(4).index
    display(page_images(sample))

Choose a cluster
Cluster 4, with 12766 pages


Cluster 5, with 3420 pages


Cluster 6, with 2960 pages


Cluster 7, with 4023 pages


Cluster 9, with 2678 pages


Cluster 10, with 772 pages


Cluster 11, with 438 pages


Cluster 13, with 1716 pages


In [1385]:
clusters.unique()

array([0, 1, 2, 3, 4, 5, 6, 7])

In [1387]:
clusters[include].unique()

array([3, 4, 5])

In [1371]:
page_images(clusters[clusters==cluster].sample(2).index)

In [1350]:
[2**i for i in range(2, 15)]

[4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384]

In [1340]:
Z[:100, ].shape

ValueError: Linkage 'Z' uses non-singleton cluster before it is formed.

In [1336]:
a.shape

(71720, 1)

In [None]:
n = 20
c = hierarchy.fcluster(Z, n, criterion='maxclust')
d = pd.Series(c)
for cluster in range(1,n+1):
    wordsample = dictionary.token[d==cluster].iloc[:10].tolist()
    print(cluster, ", ".join(wordsample))

## Reconstruct Pages

In [984]:
page_sample = extended_page_ref.query('rights_attributes=="pd"').sample(1)
page_images(page_sample.index)

In [987]:
encoded_page = all_transformed[page_sample.index,:]
regenerated_page = pd.Series(autoencoder.generate(encoded_page)[0], index=dictionary.token)
" ".join(regenerated_page[regenerated_page > np.random.rand(len(dictionary))].index.values)

'remove let tongue hotel few europe selection harder ped loop surgeon harmonize'