In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt

%matplotlib inline
mpl.style.use('bmh')

In [2]:
import torch
import attr
import pandas as pd

from textblob import TextBlob
from cached_property import cached_property
from annoy import AnnoyIndex
from itertools import combinations
from tqdm import tqdm_notebook
from scipy.spatial.distance import cosine

from sent_order.models.kt_regression import Sentence, SentenceEncoder

In [3]:
sent_encoder = torch.load(
    '../../data/models/new/kt-reg/sent_encoder.366.bin',
    map_location={'cuda:0': 'cpu'},
)

In [4]:
@attr.s
class Text:

    raw = attr.ib()
    
    @classmethod
    def from_path(cls, path):
        with open(path) as fh:
            return cls(fh.read())
    
    @cached_property
    def blob(self):
        return TextBlob(self.raw)
    
    def sentence_variables(self):
        for sent in self.blob.sentences:
            sent = Sentence(list(sent.tokens))
            yield sent.variable()

In [8]:
gg = Text.from_path('../../data/novels/invisible-man.txt')

In [9]:
gg_sents = sent_encoder(gg.sentence_variables())

In [10]:
sar = Text.from_path('../../data/novels/sun-also-rises.txt')

In [11]:
sar_sents = sent_encoder(sar.sentence_variables())

In [12]:
gg_idx = AnnoyIndex(1000)

In [13]:
for i in range(len(gg_sents)):
    gg_idx.add_item(i, gg_sents[i].data.tolist())

In [14]:
gg_idx.build(10)

True

In [19]:
gg_idx.get_nns_by_vector(gg_sents[10].data.tolist(), 10, include_distances=True)

([10, 4744, 10870, 3664, 2178, 7620, 4120, 8510, 844, 611],
 [0.0,
  0.8908292055130005,
  0.9553898572921753,
  0.9601327180862427,
  0.9620949625968933,
  0.9629205465316772,
  0.966036319732666,
  0.9708313345909119,
  0.9754887819290161,
  0.9767999649047852])

In [21]:
gg.blob.sentences[10]

Sentence("It is sometimes advantageous to be unseen, although it is most often rather wearing on the nerves.")

In [22]:
gg.blob.sentences[4744]

Sentence("It was a most painful position, for at the same time, Mary reminded me constantly that something was expected of me, some act of leadership, some newsworthy achievement; and I was torn between resenting her for it and loving her for the nebulous hope she kept alive.")

In [23]:
matches = []
for sar_id in tqdm_notebook(range(len(sar_sents))):
    gg_ids, ds = gg_idx.get_nns_by_vector(sar_sents[sar_id].data.tolist(), 10, include_distances=True)
    for gg_id, d in zip(gg_ids, ds):
        if d > 0:
            matches.append((sar_id, gg_id, d))




In [24]:
df = pd.DataFrame(matches, columns=('sar_id', 'gg_id', 'd'))

In [25]:
df.sort_values('d').head(1000)

Unnamed: 0,sar_id,gg_id,d
41072,4117,3200,0.000392
1610,161,3200,0.000392
26089,2616,10822,0.000414
65447,6557,10822,0.000414
34313,3440,10822,0.000414
46742,4685,6646,0.000507
4199,421,7191,0.390053
10784,1080,5642,0.409994
23993,2406,3667,0.445693
44065,4417,5644,0.525096


In [28]:
for r in df.sort_values('d').head(1000).itertuples():
    print(sar.blob.sentences[r.sar_id], '|', gg.blob.sentences[r.gg_id])
    print('---')

He shook his head. | He shook his head.
---
He shook his head. | He shook his head.
---
“No,” I said. | “No,” I said.
---
“No,” I said. | “No,” I said.
---
“No,” I said. | “No,” I said.
---
“Let’s get out of here. | “Let’s get out of here.
---
“I don’t know,” she said. | “I don’t know,” he said.
---
It’s so simple. | It’s that simple.
---
You can’t do it. | You can’t trust any of them.
---
What do you think of that?”

“I don’t know.”

“That’s it. | What do you think of that?”

“Why, I think it’s fine,” I said, trying to take in the full meaning of his words.
---
It certainly was hot. | It was hot.
---
Then we went up past the old fort and out to the local Syndicat d’Initiative office, where the bus was supposed to start from. | Then I saw the sullen-faced crowd, looking at a building where two white men were totting out a chair in which an old woman sat; who, as I watched, struck at them feebly with her fists.
---
I looked at the count. | I looked at the Exhorter.
---
I looked at them 

---
You cannot compete with bull fighters on their own ground. | You can drink till you blue in the face in here, but I wouldn’t sell you enough to spit through your teeth to take outside.”

“But I’ve got a sick man out in the car.”

“What car?
---
I thought it was accidental and went on. | I knew very little about unions—but most of these men seemed hostile … And before I could answer a fat man with shaggy gray hair leaped to his feet, shouting angrily.
---
“Let’s take a drive. | “Let’s see you.
---
“Let’s take a drive. | “Let’s see you.
---
You don’t care, do you, Bill?”

Bill put his arm around Mike’s shoulder. | You don’t have to worry, son.
---
They were all quiet now, their heads down. | They were fast people, all right.
---
He had a chance to behave so well.”

“He’s probably waiting just outside the door now.”

“Yes. | He had struggled for Brotherhood on a hundred street corners and he thought it would make him more human, but he died like any dog in a road.
---
There was one wi