In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import sigmoid_kernel
import ipywidgets as widgets
from IPython.display import display, clear_output

In [2]:
data_cat = 'cat_breed_characteristics.csv'
data_dog = 'dog_breed_characteristics.csv'

df_cat = pd.read_csv(data_cat)
df_dog = pd.read_csv(data_dog)

In [3]:
#df_cat.head(1)

In [4]:
#df_dog.head(1)

In [5]:
# Drop columns not used
df_cat = df_cat.drop(['AltBreedName','LapCat', 'Fur', 'MaleWtKg', 'AvgKittenPrice', 'MalaysiaPopularity', 'PopularityUS2017'], axis=1)

In [6]:
# Drop columns not used
df_dog = df_dog.drop(['AltBreedName','Group1', 'Group2', 'MaleWtKg', 'AvgPupPrice', 'Intelligence', 'Watchdog', 'MalaysiaGuardedDog', 'MalaysiaProhibitedDog', 'MalaysiaPopularity', 'PopularityUS2017'], axis=1)

In [7]:
#df_cat.head(1)

In [8]:
#df_dog.head(1)

In [9]:
# Pet features
cat_features = df_cat['Temperment']
dog_features = df_dog['Temperment']

In [10]:
# Check null values
#print(f'Total null values cat dataframe: {df_cat.Temperment.isnull().sum().sum()}\nTotal values dog dataframe: {df_dog.Temperment.isnull().sum().sum()}')

In [11]:
# Clean dataframes from null values
df_cat = df_cat.dropna(subset=['Temperment'])
df_dog = df_dog.dropna(subset=['Temperment'])

In [12]:
# Check null values after dropna
#print(f'Total null values cat dataframe: {df_cat.Temperment.isnull().sum().sum()}\nTotal values dog dataframe: {df_dog.Temperment.isnull().sum().sum()}')

In [13]:
# Create matrix of TF-IDF features / Cat
vectorizer_cat = TfidfVectorizer(use_idf=True)

df_cat['Temperment'] = df_cat['Temperment'] .fillna('')
cat_matrix = vectorizer_cat.fit_transform(df_cat['Temperment'])
#print(vectorizer_cat.get_feature_names())

In [14]:
# Sparse matrix
#cat_matrix

In [15]:
# The lower the IDF value of a word, the less unique it is to any particular document. 

# Idf weights
df_idf = pd.DataFrame(vectorizer_cat.idf_, index=vectorizer_cat.get_feature_names(),columns=["IDF Value"]) 
 
# sort ascending 
#df_idf.sort_values(by=['IDF Value'])

In [16]:
# Sigmoid
cat_sigmoid = sigmoid_kernel(cat_matrix, cat_matrix)

In [17]:
# Cat name and index
cat_title_index = pd.Series(df_cat.index, index=df_cat['BreedName'])

In [18]:
def recommend_pets(breed, pet_title, df, sigmoid=cat_sigmoid):
    pet = pet_title[breed]

    sigmoid_score = list(enumerate(sigmoid[pet]))

    sigmoid_score = sorted(sigmoid_score, key=lambda x: x[1], reverse=True)

    # Top 5 recommendations
    sigmoid_score = sigmoid_score[1:6]

    pet_index = [i[0] for i in sigmoid_score]

    # Create list and append breed names
    pet_list = []
    for i in df[pet_index]:
        pet_list.append(i)

    df = pd.DataFrame(pet_list, columns = ['Recommended breeds']) 
    df.set_index('Recommended breeds', inplace=True)

    return df

In [19]:
#recommend_pets('Turkish Angora', cat_title_index, df_cat['BreedName'])

# **Cat breed recommender**
Click the buttons below and get recommendations for 5 similar cat breeds!

In [20]:
# Create widget buttons for 5 cats
button_angora = widgets.Button(description="Turkish Angora", button_style='success')
button_abyssinian = widgets.Button(description="Abyssinian", button_style='success')
button_bengal = widgets.Button(description="Bengal", button_style='success')
button_maine_coon = widgets.Button(description="Maine Coon", button_style='success')
button_ocicat = widgets.Button(description="Ocicat", button_style='success')

# Output
output = widgets.Output(layout={'border': '1px solid black'})

# Display buttons
display(button_angora,button_abyssinian, button_bengal, button_maine_coon, button_ocicat, output)

# One function for each cat breed
def angora_button(b):
       with output:
            clear_output(True)
            display(recommend_pets('Turkish Angora', cat_title_index, df_cat['BreedName']))

def abyssinian_button(b): 
       with output:
            clear_output(True)
            display(recommend_pets('Abyssinian', cat_title_index, df_cat['BreedName']))

def bengal_button(b): 
       with output:
            clear_output(True)
            display(recommend_pets('Bengal', cat_title_index, df_cat['BreedName']))

def maine_coon_button(b): 
       with output:
            clear_output(True)
            display(recommend_pets('Maine Coon', cat_title_index, df_cat['BreedName']))
            
def ocicat_button(b):
       with output:
            clear_output(True)
            display(recommend_pets('Ocicat', cat_title_index, df_cat['BreedName']))
            
            
button_angora.on_click(angora_button)
button_abyssinian.on_click(abyssinian_button)
button_bengal.on_click(bengal_button)
button_maine_coon.on_click(maine_coon_button)
button_ocicat.on_click(ocicat_button)

Button(button_style='success', description='Turkish Angora', style=ButtonStyle())

Button(button_style='success', description='Abyssinian', style=ButtonStyle())

Button(button_style='success', description='Bengal', style=ButtonStyle())

Button(button_style='success', description='Maine Coon', style=ButtonStyle())

Button(button_style='success', description='Ocicat', style=ButtonStyle())

Output(layout=Layout(border='1px solid black'))