In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('Seattle_Pet_Licenses.csv')

In [3]:
df.head()

Unnamed: 0,License Issue Date,License Number,Animal's Name,Species,Primary Breed,Secondary Breed,ZIP Code
0,April 19 2003,200097.0,Tinkerdelle,Cat,Domestic Shorthair,,98116
1,February 07 2006,75432.0,Pepper,Cat,Manx,Mix,98103
2,August 31 2012,578859.0,Grey Fox,Cat,Siamese,Mix,98125
3,November 14 2013,832989.0,Hannah,Cat,Domestic Longhair,,98133
4,April 03 2014,433713.0,Daisy,Cat,Domestic Shorthair,,98117


In [4]:
df_dog = df[df["Species"]=='Dog']

In [6]:
df_dog["Primary Breed"].unique()

array(['Pug', 'Boxer', 'Hound', 'Kuvasz', 'Mix', 'Retriever, Labrador',
       'Dachshund, Miniature Smooth Haired', 'Spaniel, Irish Water',
       'Chihuahua, Short Coat', 'Spaniel, English Springer',
       'Australian Shepherd', 'Schnauzer, Miniature', 'Havanese',
       'Shih Tzu', 'Retriever, Golden', 'Bulldog, French', 'Setter, Irish',
       'Terrier, Cairn', 'Spaniel, American Cocker', 'Rhodesian Ridgeback',
       'Terrier, Fox, Toy', 'Shetland Sheepdog', 'Bearded Collie',
       'Welsh Corgi, Cardigan', 'Beagle', 'Terrier, Staffordshire Bull',
       'Terrier, Soft Coated Wheaten', 'Pomeranian',
       'Welsh Corgi, Pembroke', 'Spaniel', 'Terrier, Airedale',
       'Schipperke', 'Terrier, American Pit Bull', 'Greyhound',
       'Poodle, Standard', 'Portuguese Water Dog', 'Poodle, Miniature',
       'Terrier', 'Poodle, Toy', 'Xoloitzcuintli', 'Brittany',
       'Terrier, American Staffordshire', 'Pointer, German Shorthaired',
       'Terrier, Boston', 'Terrier, Manchester', 'S

In [8]:
pd.DataFrame(df_dog["Primary Breed"].unique()).to_csv('dog_breeds.csv', index=False)

In [10]:
!head dog_breeds.csv

Pug
Boxer
Hound
Kuvasz
Mix
"Retriever, Labrador"
"Dachshund, Miniature Smooth Haired"
"Spaniel, Irish Water"
"Chihuahua, Short Coat"
"Spaniel, English Springer"


In [None]:
# Let's give dog breeds the same generative treatment

In [11]:
with open('dog_breeds.csv') as f:
    file = f.read()

In [12]:
import random
import string
import re

all_characters = string.printable
n_characters = len(all_characters)

In [13]:
chunk_len = 100
file_len = len(file)
def random_chunk():
    '''filelen is length of chars in file'''
    start_index = random.randint(0, file_len - chunk_len)
    end_index = start_index + chunk_len + 1
    return file[start_index:end_index]

In [14]:
# Build the Model
import torch
import torch.nn as nn
from torch.autograd import Variable

class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, n_layers=1):
        super(RNN, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        self.n_layers = n_layers
        
        self.encoder = nn.Embedding(input_size, hidden_size)
        self.gru = nn.GRU(hidden_size, hidden_size, n_layers)
        self.decoder = nn.Linear(hidden_size, output_size)
        
    def forward(self, input, hidden):
        input = self.encoder(input.view(1,-1))
        output, hidden = self.gru(input.view(1,1,-1) ,hidden)
        output = self.decoder(output.view(1, -1))
        return output, hidden
    
    def init_hidden(self):
        return Variable(torch.zeros(self.n_layers, 1, self.hidden_size))




In [15]:
def char_tensor(string_):
    tensor = torch.zeros(len(string_)).long()
    for c in range(len(string_)):
        tensor[c] = all_characters.index(string_[c])
    return Variable(tensor)

print(char_tensor('abcDEF'))

Variable containing:
 10
 11
 12
 39
 40
 41
[torch.LongTensor of size 6]



In [16]:
def random_training_set():
    chunk = random_chunk()
    inp = char_tensor(chunk[:-1])
    target = char_tensor(chunk[1:])
    return inp, target



In [17]:
# Evaluating
def evaluate(prime_str='A', predict_len=100, temperature=0.8):
    hidden = decoder.init_hidden()
    prime_input = char_tensor(prime_str)
    predicted = prime_str
    
    for p in range(len(prime_str)-1):
        _,hidden = decoder(prime_input[p], hidden)
    inp = prime_input[-1]
    
    for p in range(predict_len):
        output, hidden = decoder(inp, hidden)
        
        output_dist = output.data.view(-1).div(temperature).exp()
        top_i = torch.multinomial(output_dist, 1)[0]
        
        predicted_char = all_characters[top_i]
        predicted += predicted_char
        inp = char_tensor(predicted_char)
        
    return predicted

In [18]:
# Training
import time, math

def time_since(since):
    s = time.time() - since
    m = math.floor(s/60)
    s -= m*60
    return '%dm %ds' % (m,s)

In [19]:
def train(inp, target):
    hidden = decoder.init_hidden()
    decoder.zero_grad()
    loss=0
    
    for c in range(chunk_len):
        output, hidden = decoder(inp[c], hidden)
        loss += criterion(output, target[c])
        
    loss.backward()
    decoder_optimizer.step()
    
    return loss.data[0] / chunk_len

In [20]:
n_epochs = 2000
print_every = 100
plot_every=10
hidden_size = 100
n_layers = 1
lr = 0.005

decoder = RNN(n_characters, hidden_size, n_characters, n_layers)
decoder_optimizer = torch.optim.Adam(decoder.parameters(), lr=lr)
criterion = nn.CrossEntropyLoss()

start = time.time()
all_losses=[]
loss_avg = 0

for epoch in range(1, n_epochs+1):
    try:
        loss = train(*random_training_set())
        loss_avg += loss

        if epoch % print_every == 0:
            print('[%s (%d %d%%) %.4f]' % (time_since(start), epoch, epoch / n_epochs * 100, loss))
            print(evaluate('R', 100), '\n')

        if epoch % plot_every == 0:
            all_losses.append(loss_avg / plot_every)
            loss_avg = 0
    except:
        continue

[0m 12s (100 5%) 2.0617]
Rul"
"Griff
Setan Bulkande Doglier, ins Sheped Estinse"
Rusater"
"Terrierre Mard Aund
"Terrier
Norlis 

[0m 25s (200 10%) 1.1439]
Rerpieva
"Terrier, Redlese
Janese Shephound, Ming Welsh Sheepdog
Sitcan Bull"
Catrie
Petter, Reepdol
 

[0m 37s (300 15%) 1.3358]
R"
"Terrier, For, Minarian Russel
"Terrier, Boodlergel, Gordog
"Terrier, Golden Chad Cancoo
Retriever 

[0m 49s (400 20%) 0.8015]
Rie, Miniature White"
Spanian
Tascher 
Coon
Terman Blue Frane Basse Kker
"Spaniel, Russell"
Iberman F 

[1m 1s (500 25%) 0.8501]
Royer"
Durchound
"Terrier, Geamatter"
"Setter, Mott"
American Ban Sherd, Franca Mountain Kish Watue"
 

[1m 13s (600 30%) 1.0672]
Retre"
Wheater"
Schow fully grown)"
Oldogge Bastar-"Retriever, Shepherd
"Foxhound, Long Coated White" 

[1m 25s (700 35%) 0.5338]
Retriever, Flat-Cate de Ball"
"Terrier, Fox, des arkman"
"Terrier, Staffordshire"
Retire Haired
Austr 

[1m 37s (800 40%) 0.4892]
Redhound
"Retriever, Cheepand White"
Japanese Masaffordshire Bulld

In [21]:
print(evaluate('f', 300, temperature=0.5))

fulling"
Canadian Eskimo
Caucasian Mastiff
"Terrier, Welsh"
"Terrier, Miniature Smooth Haired"
"Spaniel, English Cocker"
"Terrier, English Corgi, Cardigan"
"Terrier, Miniature Wire Haired"
"Spaniel, American Water"
"Terrier, Border"
Royal Gallego
Shiloh Shepherd
"Dachshund, Miniature Smooth Haired"
"


In [22]:
print(evaluate('f', 300, temperature=0.1))

fulling"
Canadian Elkhoundl, English"
Pharaoha Dogooke de Blancher Mascaremanese Mamute
"Terrier, Bedican"
Barbee
"Terricancais Mancais Gascogne
Austalian Sheepdog
"German Shar-Peike
Mascair Dinian
Bascain Dinoo
"
Porton Elg
Oldo English Coated"
"Bulldog, English"
Austaliaffon Veendeen, German"
Basca


In [34]:
print(evaluate('f', 400, temperature=0.95))

fghund, Miniature Pinscher
Beaglo
"Coonhound, Treeing Walker"
Alkitz
Carolitte
Saint Hound
"Vizsla, Wire Haired"
Perdeiured
American Mountain Dog
Maremma Shar-Pei
Miniature Highund
Shiba Inca Orchimamo
Cantanian
"Terrier, Soft Coated"
"Retriever, Nova Scotia Duck TollingCoat"
Portuguese Water Dog
"Terrier, English Staffordshire"
"Terrier, Border"
Royal Galled Shortuesioun
Akita
Retriever
Cotondon
"
