In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow import keras

%matplotlib inline

from google.colab import drive
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive


In [0]:
def read_txt(PATH, Ln=600):
  '''
  Cut raw texts in to chunks of 600 characters.
  '''

  txt = (open(PATH, 'r')).read()
  txt = txt.replace('\n', ' ')
  l_1 = []
  for i in range(len(txt)//Ln):
    l_1.append(txt[i*Ln:(i+1)*Ln])
  return l_1

In [0]:
def replace_broken_words(df, author):
  '''
  Broken words at the beginnig and the end of each chunk are removed.
  '''

  l1 = []
  for t in df['text']:
    l2 = []
    for i, v in enumerate(t):
      if v == ' ':
        l2.append(i)
    s = l2[0]
    e = l2[-1]
    l1.append(t[s:e])
  
  df1 = pd.DataFrame(l1)
  df1['author'] = author
  df1.rename(columns={0:'text'}, inplace=True)
  return df1

In [0]:
# Data Preparation

PATH = './gdrive/My Drive/DL/Style/Nabokov-all.txt'
natxt = read_txt(PATH)

PATH2 = './gdrive/My Drive/DL/Style/Austen-all.txt'
autxt = read_txt(PATH2)

PATH3 = './gdrive/My Drive/DL/Style/Dumas-all.txt'
dutxt = read_txt(PATH3)

dict1 = {'text': natxt, 'author': 'Nabokov'}
dict2 = {'text': autxt, 'author': 'Austen'}
dict3 = {'text': dutxt, 'author': 'Dumas'}


na = pd.DataFrame(dict1)
au = pd.DataFrame(dict2)
du = pd.DataFrame(dict3)

na = replace_broken_words(na, 'Nabokov')
au = replace_broken_words(au, 'Austen')
du = replace_broken_words(du, 'Dumas')

author = [na, au, du]

df = pd.concat(author)

In [0]:
X = df.text.astype('str')

from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

max_words = 10000  # We will keep only the 10000 most common words

tokenizer = Tokenizer(num_words=max_words, oov_token='<oov>') 
tokenizer.fit_on_texts(X) 
sequences = tokenizer.texts_to_sequences(X) # list: string - numbers(indices)
word_index = tokenizer.word_index # dict: word - number(index)

# Furthermore, we need to pad the sequences so that their lengths are the same and do not exceed a specific maximum length.
maxlen = 256
X = pad_sequences(sequences, maxlen=maxlen, truncating="post")

from sklearn.preprocessing import OneHotEncoder

embeddings_index = {}
gl_PATH = './gdrive/My Drive/DL/NLP/GloVe/glove.6B.200d.txt' 
f = open(gl_PATH)
for line in f:
  values = line.split()
  word = values[0]
  coefs = np.asarray(values[1:], dtype='float32')
  embeddings_index[word] = coefs
f.close()

embedding_dim = 200
embedding_matrix = np.zeros((max_words, embedding_dim))
for word, i in word_index.items(): 
  if i < max_words:
    embedding_vector = embeddings_index.get(word)
    if embedding_vector is not None:
      embedding_matrix[i] = embedding_vector

Using TensorFlow backend.


Found 80526 unique tokens.
Shape of data tensor:  (35874, 256)
Shape of label tensor:  (35874, 3)
Found 400000 word vectors.


In [0]:
# Test demo using donor texts, results are stored on github
from keras.models import load_model
model = load_model('./gdrive/My Drive/DL/Style/model_base.h5', custom_objects={'LayerNormalization': LayerNormalization})







Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.








Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where




In [0]:
PATH = './gdrive/My Drive/DL/Style/donor.csv'
dn = pd.read_csv(PATH)

X_ts = dn.text.astype('str')
y_ts = dn.author.astype('category')

tokenizer.fit_on_texts(X_ts) 
sequences = tokenizer.texts_to_sequences(X_ts) 
# word_index = tokenizer.word_index 

X_ts = pad_sequences(sequences, maxlen=maxlen, truncating="post")

y_ts = np.asarray(y_ts)
onehot_encoder = OneHotEncoder(sparse=False)
encoded = y_ts.reshape(len(y_ts), 1)
y_ts = onehot_encoder.fit_transform(encoded)

yhat = model.predict(X_ts)

In [0]:
# CS230/outputs/Rand-donor-text (original)
txt1 = ["She sat at the window of the train, her head thrown back, one leg stretched across to the empty seat before her. The window frame trembled with the speed of the motion, the pane hung over empty darkness, and dots of light slashed across the glass as luminous streaks, once in a while.", 
        "Her leg, sculptured by the tight sheen of the stocking, its long line running straight, over an arched instep, to the tip of a foot in a high-heeled pump, had a feminine elegance that seemed out of place in the dusty train car and oddly incongruous with the rest of her.", 
        "She wore a battered camel's hair coat that had been expensive, wrapped shapelessly about her slender, nervous body. The coat collar was raised to the slanting brim of her hat. A sweep of brown hair fell back, almost touching the line of her shoulders.", 
        "Her face was made of angular planes, the shape of her mouth clear-cut, a sensual mouth held closed with inflexible precision. She kept her hands in the coat pockets, her posture taut, as if she resented immobility, and unfeminine, as if she were unconscious of her own body and that it was a woman's body.", 
        "She sat listening to the music. It was a symphony of triumph. The notes flowed up, they spoke of rising and they were the rising itself, they were the essence and the form of upward motion, they seemed to embody every human act and thought that had ascent as its motive.", 
        "It was a sunburst of sound, breaking out of hiding and spreading open. It had the freedom of release and the tension of purpose. It swept space clean, and left nothing but the joy of an unobstructed effort.", 
        "Only a faint echo within the sounds spoke of that from which the music had escaped, but spoke in laughing astonishment at the discovery that there was no ugliness or pain, and there never had had to be. It was the song of an immense deliverance."]

# CS230/outputs/Rand_117M_10000_Nabokov-All-3.txt
txt2 = ["She sat at the window of the train, sighing as she did that, but then she got up and move to the seat opposite her and sat there too, reclining. The window creaked as it swung into motion, the pane of the window half opened, and there was darkness again, and dots of light slashed across the glass as luminous streaks, once in a while.", 
        "Her leg, sculptured by the tight sheen of the stocking, its long line running straight, now stretched out to an angle, above the knee, to the tip of a foot in a high-heeled pump, had a feminine elegance that seemed out of place in the dusty train car and even more so because of the comical expression on her face.",  
        "The calves of her high-heeled shoes were bare, and her bag was full of sand, wrapped shapelessly about her slender, nervous body. She put on the shoes, and the collar turned out to be long. A sweep of brown hair fell back, touching her left shoulder.",  
        "Her forehead, cheek and all of her arms were those of a babushka, blush seeping into the spectrum of her cheeks, a sensual mouth held closed with inflexible precision. She straightened, fidgeted, brushed her bare arms with her handkerchief, and then her hand reached down, as if just barely, on her knees, for she had grabbed something and her face was quivering over a woman's body.", 
        "In the familiarity of the room, she looked up, relaxed. It was a symphony of triumph. The air was warmly flowing music, notes spoke of rising and they were the rising itself, they were the essence and the form of upward motion, and the closer they came, the more distinct and overwhelming became their conviction that if they attained.",  
        "It was a sunburst of sound, breaking out of hiding and spreading open. It had the freedom of release and the tension of purpose. It swept space clean, and immediately the air, having at once filled with a tempestuous sigh.",
        "The sonorous note of the sound grew and filled with its ominous premonition, but spoke in laughing astonishment at the discovery that there was no ugliness or pain in its every pronouncement. It was the song of an immense deliverance."]


# CS230/outputs/Rand-output-ngram.txt
txt3 = ["She sat at the light of the draw, her one thrown behind, one leg stretched against to the empty seat before her. The light state trembled with the streak of the pass, the skin hung on let dark, and dots of light smashed across the stone so one streaks, once in a while.",
        "Her foot, cut by the thin sheen of the stocking, its large print running straight, else an rounded foot, to the snap of a sum in a dear-heeled up, had a affected poetry that seemed past of time in the gray head van and oddly several with the bed of her.", 
        "She wore a battered mule's little hat that had been expensive, covered shapelessly about her little, emotional one. The face collar was formed to the garbling brim of her jacket. A reflection of do little down back, almost touching the line of her shoulders.", 
        "Her face was made of angular planes, the state of her tongue clear-except, a straight mouth new close with firm care. She kept her hand in the wash pockets, her turn taut, as if she resented standing, and offensive, so if she were dead of her own head and that it was a woman's body.", 
        "She sat ear to the music. It was a brass of skin. The notes flowed knight, they step of back and they were the rising her, they were the case and the form of upward stream, they seemed to connect every hand act and thought that had current how its motive.",  
        "It was a sunburst of road, breaking apparently of destruction and spreading distributed. It had the freedom of release and the tension of reason. It swept field plain, and pink lightweight even the joy of an open crack.",  
        "Merely a dim reply within the sounds check of that out which the arrangement had escaped, without spoke in laughing astonishment at the find that there was none ugliness or spasm, and there not had had to be. It was the lay of an gigantic issue."]

def prepare_inputs(X): 
  tokenizer.fit_on_texts(X) 
  sequences = tokenizer.texts_to_sequences(X) 
  X = pad_sequences(sequences, maxlen=maxlen, truncating="post")
  return X

X1 = prepare_inputs(txt1)
X2 = prepare_inputs(txt2)
X3 = prepare_inputs(txt3)

In [0]:
yhat1 = model.predict(X1)
yhat2 = model.predict(X2)
yhat3 = model.predict(X3)

# The Rand text looks extremely Nabokov-like for the model.
print(yhat1)
print('\n', yhat2)
print('\n', yhat3)

# We should expect a possitive number if the text style is tranferred to be more like Nabokov.
print('\n', yhat1[:, -1] - yhat2[:, -1])
print('\n', yhat1[:, -1] - yhat3[:, -1])


 [[1.9874492e-06 1.5040224e-03 9.9849403e-01]
 [6.0623637e-07 2.0041092e-05 9.9997938e-01]
 [8.2889194e-08 1.6244162e-06 9.9999833e-01]
 [8.3698222e-04 1.6383591e-04 9.9899918e-01]
 [5.3102599e-04 2.1514185e-02 9.7795480e-01]
 [6.2312851e-05 1.2646959e-04 9.9981123e-01]
 [7.2156760e-04 4.0704547e-03 9.9520797e-01]]

 [[2.08121864e-03 2.50375215e-02 9.72881198e-01]
 [2.57652991e-05 1.79159618e-03 9.98182654e-01]
 [1.22583215e-05 7.13620238e-06 9.99980569e-01]
 [5.20651869e-04 6.49833382e-05 9.99414325e-01]
 [3.17843743e-02 7.82148913e-02 8.90000761e-01]
 [9.02135980e-06 5.02097129e-04 9.99488831e-01]
 [1.75791024e-06 1.64208643e-04 9.99834061e-01]]

 [[9.77837626e-05 2.66789142e-02 9.73223329e-01]
 [1.11950875e-07 4.16828925e-06 9.99995708e-01]
 [7.34423338e-07 3.48073104e-06 9.99995828e-01]
 [6.22700536e-05 1.05061314e-04 9.99832630e-01]
 [6.21210262e-02 5.43152587e-03 9.32447493e-01]
 [1.73683584e-05 2.58376695e-05 9.99956846e-01]
 [1.91121435e-04 2.90205772e-03 9.96906817e-01]]

 [ 

In [0]:
pd.DataFrame(y_ts, columns=['Austen', 'Dumas', 'Nabokov']).to_csv(r'./gdrive/My Drive/DL/Style/donor_y.csv', index=False)
pd.DataFrame(yhat, columns=['Austen', 'Dumas', 'Nabokov']).to_csv(r'./gdrive/My Drive/DL/Style/donor_yhat.csv', index=False)