In [1]:
import numpy as np
from nltk.stem import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import Normalizer

In [23]:
import string

from nltk.tokenize import TreebankWordTokenizer
from nltk.tokenize import word_tokenize
from nltk.tokenize import sent_tokenize
from nltk.util import ngrams
from nltk.tokenize import RegexpTokenizer

from nltk.stem import PorterStemmer

from sklearn.feature_extraction.text import CountVectorizer

class text_processor:

    def __init__(self, remover_function=None, tokenizer_function=None, 
                 cleaning_function=None, stemmer_function=None,
                     vectorizer_function = CountVectorizer()):
        self.remover = remover_function
        self.tokenizer = tokenizer_function
        self.cleaner = cleaning_function
        self.stemmer = stemmer_function
        self.vectorizer = vectorizer_function


        if remover_function == 'no_punctuation':
            self.remover = self.no_punctuation
        if tokenizer_function == 'tk_word':
            self.tokenizer = self.tk_word
        if not tokenizer_function:
            self.tokenizer = self.splitter
        if cleaning_function == 'lowstem':
            self.cleaner = self.lowstem
                
   # cleaning functions

    def lower(self,X):
        sentences = []
        for sentence in X:
            sentences.append(sentence.lower()) 
        return sentences


    def no_punctuation(self,X):
    # remove the punctuation
        pos = []
        for sentence in X:
            for punc in string.punctuation:
                print("sentence",sentence)
                sentence = sentence.replace(punc,'')
            pos.append(sentence)
        return pos
    
 # tokenizer functions   
    
    def tk_word(self,X):
        vocabulary = []
        for x in X:
            vocabulary.append(word_tokenize(x)) 
        return vocabulary        
    
    
    def splitter(self, text):
        """
        Default tokenizer that splits on spaces naively
        """
        return text.split(' ')

   # stemmer function
    

    def stem(self,X):
        stemmed = []
        for word in (X):
            stem_word = stemmer.stem(word)
            stemmed.append(stem_word)
        return stemmed


    
    
    # vectorizing function
    def vectorize(self, X):
        self.vectorizer.fit(X)
        self.columns=self.vectorizer.get_feature_names()
        return self.vectorizer.transform(X).toarray()
        
        
    def fit(self,X):
        clear_text = self.remover(X)
        clear_text = self.lower(clear_text)
#        clear_text = self.stem(clear_text)
        self.matrix = self.vectorize(clear_text)
   

In [27]:
def generator(text):
    
    nlp = text_processor(remover_function='no_punctuation',tokenizer_function = 'tk_word'
                    ,stemmer_function = PorterStemmer,
#                    vectorizer_function=TfidfVectorizer(min_df=0.3, max_df=0.8))
                    vectorizer_function=TfidfVectorizer(min_df=0, max_df=1))
    
    df = pd.read_excel('./data/bukowski1.xlsx')
    X = df['verses']
    pos = [text]
    for x in X:
        print("xbeforepunc",x)
        for punc in string.punctuation:
            x = x.replace(punc,'')
        print("xafter",x)
        pos.append(x)
    nlp.fit(pos)
    pos_matrix = nlp.matrix
    pos_columns = nlp.columns
    lsa = TruncatedSVD(2, algorithm = 'arpack')
    dtm_lsa = lsa.fit_transform(pos_matrix)
    dtm_lsa = Normalizer(copy=False).fit_transform(dtm_lsa)
    pd.DataFrame(lsa.components_.round(5),
                 index = ["component_1","component_2"],columns = pos_columns)
    df3 = pd.DataFrame(dtm_lsa.round(5), index = pos, columns = ["component_1","component_2" ])
    df4 = df3.nlargest(len(df3), 'component_1')
    df4 = df4.reset_index()
    mask1 = (df4['index'] == text)
    n = df4[mask1].index[0]
    df5 = df3.nlargest(len(df3), 'component_2')
    df5 = df5.reset_index()
    mask5 = (df5['index'] == text)
    m = df5[mask5].index[0]

    return (df4.iloc[n+1]['index'])

In [28]:
def verse():
    x = input()
    return generator(x)

In [29]:
verse()

just do it
xbeforepunc Cannot beat the real thing,
xafter Cannot beat the real thing
xbeforepunc  don’t ever get the idea I am a poet you can see me
xafter  don’t ever get the idea I am a poet you can see me
xbeforepunc  at the racetrack any day half drunk
xafter  at the racetrack any day half drunk
xbeforepunc  betting quarters sidewheelers and straight thoroughs
xafter  betting quarters sidewheelers and straight thoroughs
xbeforepunc  but let me tell you there are some women there
xafter  but let me tell you there are some women there
xbeforepunc  who go where the money goes and sometimes when you
xafter  who go where the money goes and sometimes when you
xbeforepunc  look at these whores these onehundreddollar whores
xafter  look at these whores these onehundreddollar whores
xbeforepunc  you wonder sometimes if nature isn’t playing a joke
xafter  you wonder sometimes if nature isn’t playing a joke
xbeforepunc  dealing out so much breast and ass and the way
xafter  dealing out so muc

sentence  at the racetrack any day half drunk
sentence  at the racetrack any day half drunk
sentence  at the racetrack any day half drunk
sentence  at the racetrack any day half drunk
sentence  at the racetrack any day half drunk
sentence  at the racetrack any day half drunk
sentence  at the racetrack any day half drunk
sentence  at the racetrack any day half drunk
sentence  at the racetrack any day half drunk
sentence  at the racetrack any day half drunk
sentence  at the racetrack any day half drunk
sentence  at the racetrack any day half drunk
sentence  at the racetrack any day half drunk
sentence  at the racetrack any day half drunk
sentence  betting quarters sidewheelers and straight thoroughs
sentence  betting quarters sidewheelers and straight thoroughs
sentence  betting quarters sidewheelers and straight thoroughs
sentence  betting quarters sidewheelers and straight thoroughs
sentence  betting quarters sidewheelers and straight thoroughs
sentence  betting quarters sidewheelers a

sentence  drinking Old Grandad
sentence  drinking Old Grandad
sentence  drinking Old Grandad
sentence  drinking Old Grandad
sentence  drinking Old Grandad
sentence  drinking Old Grandad
sentence  drinking Old Grandad
sentence  drinking Old Grandad
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  and she said
sentence  what’s

sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  impossibly
sentence  listen he said you ever seen a bunch of crabs in axa0 bucket
sentence  listen he said you ever seen a bunch of crabs in axa0 bucket
sentence  listen he said you ever seen a bunch of crabs in axa0 bucket
sentence  listen he said you ever seen a bunch of crabs in axa0 bucket
sentence  listen he said you ever seen a bun

sentence  are they
sentence  are they
sentence  are they
sentence  are they
sentence  are they
sentence  are they
sentence  are they
sentence  are they
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  guarenteed this
sentence  is it 

sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine
sentence  16 hours a day in a coal mine


sentence  nothing but little Japanese prints on the wall
sentence  nothing but little Japanese prints on the wall
sentence  nothing but little Japanese prints on the wall
sentence  nothing but little Japanese prints on the wall
sentence  nothing but little Japanese prints on the wall
sentence  nothing but little Japanese prints on the wall
sentence  nothing but little Japanese prints on the wall
sentence  nothing but little Japanese prints on the wall
sentence  nothing but little Japanese prints on the wall
sentence  nothing but little Japanese prints on the wall
sentence  nothing but little Japanese prints on the wall
sentence  all those tiny people sitting by red rivers
sentence  all those tiny people sitting by red rivers
sentence  all those tiny people sitting by red rivers
sentence  all those tiny people sitting by red rivers
sentence  all those tiny people sitting by red rivers
sentence  all those tiny people sitting by red rivers
sentence  all those tiny people sitting by red ri

sentence  the German air ace
sentence  the German air ace
sentence  the German air ace
sentence  the German air ace
sentence  the German air ace
sentence  the German air ace
sentence  the German air ace
sentence  the German air ace
sentence  the German air ace
sentence  the German air ace
sentence  the German air ace
sentence  the German air ace
sentence  the German air ace
sentence  the German air ace
sentence  hed shot down 80 of their best
sentence  hed shot down 80 of their best
sentence  hed shot down 80 of their best
sentence  hed shot down 80 of their best
sentence  hed shot down 80 of their best
sentence  hed shot down 80 of their best
sentence  hed shot down 80 of their best
sentence  hed shot down 80 of their best
sentence  hed shot down 80 of their best
sentence  hed shot down 80 of their best
sentence  hed shot down 80 of their best
sentence  hed shot down 80 of their best
sentence  hed shot down 80 of their best
sentence  hed shot down 80 of their best
sentence  hed shot d

sentence  perhaps three or four a
sentence  perhaps three or four a
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  year
sentence  the problem was that each
sentence  the problem was that each
sentence  the problem was that each
sentence  the problem was that each
sentence  the problem was that each
sentence  the problem was that each
sentence  the problem was that each
sentence  the problem was that each
sentence  the problem was that each
sentence  the problem was that each
sentence  the problem was that each
sentence  the problem was that each
sentence  the proble

sentence  volition
sentence  volition
sentence  volition
sentence  volition
sentence  volition
sentence  volition
sentence  volition
sentence  volition
sentence  volition
sentence  volition
sentence  volition
sentence  I decided then I should take a whole week
sentence  I decided then I should take a whole week
sentence  I decided then I should take a whole week
sentence  I decided then I should take a whole week
sentence  I decided then I should take a whole week
sentence  I decided then I should take a whole week
sentence  I decided then I should take a whole week
sentence  I decided then I should take a whole week
sentence  I decided then I should take a whole week
sentence  I decided then I should take a whole week
sentence  I decided then I should take a whole week
sentence  I decided then I should take a whole week
sentence  I decided then I should take a whole week
sentence  I decided then I should take a whole week
sentence  I decided then I should take a whole week
sentence  I

sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  the starch mountains begin out in the willow
sentence  

sentence  then its best to go back to the bars the same bars
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sentence  wooden stale merciless green
sen

sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  its the worst
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in madhouses
sentence  in ma

sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
sentence  replacing outdated bullets
s

sentence  the window and in a million rooms
sentence  the window and in a million rooms
sentence  the window and in a million rooms
sentence  the window and in a million rooms
sentence  the window and in a million rooms
sentence  the window and in a million rooms
sentence  the window and in a million rooms
sentence  the window and in a million rooms
sentence  the window and in a million rooms
sentence  the window and in a million rooms
sentence  the window and in a million rooms
sentence  the window and in a million rooms
sentence  lovers lie entwined and lost
sentence  lovers lie entwined and lost
sentence  lovers lie entwined and lost
sentence  lovers lie entwined and lost
sentence  lovers lie entwined and lost
sentence  lovers lie entwined and lost
sentence  lovers lie entwined and lost
sentence  lovers lie entwined and lost
sentence  lovers lie entwined and lost
sentence  lovers lie entwined and lost
sentence  lovers lie entwined and lost
sentence  lovers lie entwined and lost
sent

sentence  selfishness unjust judgments murder
sentence  selfishness unjust judgments murder
sentence  selfishness unjust judgments murder
sentence  selfishness unjust judgments murder
sentence  selfishness unjust judgments murder
sentence  selfishness unjust judgments murder
sentence  selfishness unjust judgments murder
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
sentence  They dont make it
senten

' and chased me under the bed'