In [51]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import sigmoid_kernel
from sklearn.metrics.pairwise import linear_kernel
import ipywidgets as widgets
from IPython.display import display, clear_output
from sklearn.metrics.pairwise import cosine_similarity

In [22]:
data_cat = 'cat_breed_characteristics.csv'
data_dog = 'dog_breed_characteristics.csv'

df_cat = pd.read_csv(data_cat)
df_dog = pd.read_csv(data_dog)

In [23]:
df_cat.head(1)

Unnamed: 0,BreedName,AltBreedName,LapCat,Fur,MaleWtKg,Temperment,AvgKittenPrice,MalaysiaPopularity,PopularityUS2017
0,Abyssinian,,Lap,Short,4.0,"Active, Energetic, Independent, Intelligent, G...",1050.0,,7.0


In [24]:
df_dog.head(1)

Unnamed: 0,BreedName,AltBreedName,Group1,Group2,MaleWtKg,Temperment,AvgPupPrice,Intelligence,Watchdog,MalaysiaGuardedDog,MalaysiaProhibitedDog,MalaysiaPopularity,PopularityUS2017
0,Affenpinscher,,Toy,Companion,4.0,"Active, Adventurous, Curious, Fun-loving, Play...",1000.0,37.0,1.0,,,,147.0


In [25]:
# Drop columns not used
df_cat = df_cat.drop(['AltBreedName','LapCat', 'Fur', 'MaleWtKg', 'AvgKittenPrice', 'MalaysiaPopularity', 'PopularityUS2017'], axis=1)

In [26]:
# Drop columns not used
df_dog = df_dog.drop(['AltBreedName','Group1', 'Group2', 'MaleWtKg', 'AvgPupPrice', 'Intelligence', 'Watchdog', 'MalaysiaGuardedDog', 'MalaysiaProhibitedDog', 'MalaysiaPopularity', 'PopularityUS2017'], axis=1)

In [27]:
df_cat.head(1)

Unnamed: 0,BreedName,Temperment
0,Abyssinian,"Active, Energetic, Independent, Intelligent, G..."


In [28]:
df_dog.head(1)

Unnamed: 0,BreedName,Temperment
0,Affenpinscher,"Active, Adventurous, Curious, Fun-loving, Play..."


In [9]:
# Pet features
cat_features = df_cat['Temperment']
dog_features = df_dog['Temperment']

In [29]:
# Check null values
print(f'Total null values cat dataframe: {df_cat.Temperment.isnull().sum().sum()}\nTotal values dog dataframe: {df_dog.Temperment.isnull().sum().sum()}')

Total null values cat dataframe: 1
Total values dog dataframe: 1


In [30]:
# Clean dataframes from null values
df_cat = df_cat.dropna(subset=['Temperment'])
df_dog = df_dog.dropna(subset=['Temperment'])

In [31]:
# Check null values after dropna
print(f'Total null values cat dataframe: {df_cat.Temperment.isnull().sum().sum()}\nTotal values dog dataframe: {df_dog.Temperment.isnull().sum().sum()}')

Total null values cat dataframe: 0
Total values dog dataframe: 0


In [32]:
# Create matrix of TF-IDF features / Cat
vectorizer_cat = TfidfVectorizer(use_idf=True)

df_cat['Temperment'] = df_cat['Temperment'] .fillna('')
cat_matrix = vectorizer_cat.fit_transform(df_cat['Temperment'])
print(vectorizer_cat.get_feature_names())

['active', 'adaptable', 'affectioante', 'affectionate', 'agile', 'calm', 'clever', 'curious', 'demanding', 'dependent', 'devoted', 'easygoing', 'energetic', 'friendly', 'genlte', 'gentle', 'independent', 'inquisitive', 'intelligent', 'interactive', 'lively', 'loving', 'loyal', 'mischevious', 'outgoing', 'patient', 'peaceful', 'playful', 'quiet', 'rambunctious', 'sedate', 'sensible', 'sensitive', 'shy', 'six', 'sociable', 'social', 'sweet', 'tempered', 'tenacious', 'toed']


In [33]:
# Sparse matrix
cat_matrix

<66x41 sparse matrix of type '<class 'numpy.float64'>'
	with 356 stored elements in Compressed Sparse Row format>

In [34]:
# The lower the IDF value of a word, the less unique it is to any particular document. 
# Idf weights
df_idf = pd.DataFrame(vectorizer_cat.idf_, index=vectorizer_cat.get_feature_names(),columns=["IDF Value"]) 
 
# sort ascending 
df_idf.sort_values(by=['IDF Value'])

Unnamed: 0,IDF Value
playful,1.376051
intelligent,1.541131
active,1.837397
affectionate,1.908856
curious,1.985817
social,2.069198
gentle,2.11365
easygoing,2.16017
loyal,2.260254
calm,2.496642


In [55]:
# Cosine simularity
cat_cosine = cosine_similarity(cat_matrix, cat_matrix)

In [56]:
# Cat name and index
cat_title_index = pd.Series(df_cat.index, index=df_cat['BreedName'])

In [112]:
def recommend_pets(breed, pet_title, df, cosine=cat_cosine):
    pet = pet_title[breed]

    cosine_score = list(enumerate(cosine[pet]))

    cosine_score = sorted(cosine_score, key=lambda x: x[1], reverse=True)

    # Top 5 recommendations
    cosine_score = cosine_score[1:6]

    pet_index = [i[0] for i in cosine_score]

    # Create list and append breed names
    pet_list = []
    for i in df[pet_index]:
        pet_list.append(i)

    df = pd.DataFrame(pet_list, columns = ['Recommended breeds']) 
    df.set_index('Recommended breeds', inplace=True)

    return df

In [113]:
recommend_pets('Turkish Angora', cat_title_index, df_cat['BreedName'])

Japanese Bobtail
Chausie
Birman
Munchkin
Tiger


# **Cat breed recommender**
Click the buttons below and get recommendations for 5 similar cat breeds!

In [69]:
# Create widget buttons for 5 cats
button_angora = widgets.Button(description="Turkish Angora", button_style='success')
button_abyssinian = widgets.Button(description="Abyssinian", button_style='success')
button_bengal = widgets.Button(description="Bengal", button_style='success')
button_maine_coon = widgets.Button(description="Maine Coon", button_style='success')
button_ocicat = widgets.Button(description="Ocicat", button_style='success')

# Output
output = widgets.Output(layout={'border': '1px solid black'})

# Display buttons
display(button_angora,button_abyssinian, button_bengal, button_maine_coon, button_ocicat, output)

# One function for each cat breed
def angora_button(b):
       with output:
            clear_output(True)
            display(recommend_pets('Turkish Angora', cat_title_index, df_cat['BreedName']))

def abyssinian_button(b): 
       with output:
            clear_output(True)
            display(recommend_pets('Abyssinian', cat_title_index, df_cat['BreedName']))

def bengal_button(b): 
       with output:
            clear_output(True)
            display(recommend_pets('Bengal', cat_title_index, df_cat['BreedName']))

def maine_coon_button(b): 
       with output:
            clear_output(True)
            display(recommend_pets('Maine Coon', cat_title_index, df_cat['BreedName']))
            
def ocicat_button(b):
       with output:
            clear_output(True)
            display(recommend_pets('Ocicat', cat_title_index, df_cat['BreedName']))
            
            
button_angora.on_click(angora_button)
button_abyssinian.on_click(abyssinian_button)
button_bengal.on_click(bengal_button)
button_maine_coon.on_click(maine_coon_button)
button_ocicat.on_click(ocicat_button)

Button(button_style='success', description='Turkish Angora', style=ButtonStyle())

Button(button_style='success', description='Abyssinian', style=ButtonStyle())

Button(button_style='success', description='Bengal', style=ButtonStyle())

Button(button_style='success', description='Maine Coon', style=ButtonStyle())

Button(button_style='success', description='Ocicat', style=ButtonStyle())

Output(layout=Layout(border='1px solid black'))

None
