In [1]:
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import euclidean_distances,cosine_similarity

In [2]:
# pickle_in = open('app_data/translation_dict.pickle','rb')
# translation_dict = pickle.load(pickle_in)

## Load in dataframe created by scraping dogtime.com

In [3]:
df = pd.read_csv('app_data/breed_traits.csv',index_col='Unnamed: 0')

In [62]:
#Residual code dealing with NaN value for jindo at Bark or Howl cell
##df.loc['jindo','Tendency To Bark Or Howl'] = 2.0
##df.loc['jindo','Tendency To Bark Or Howl']
##df.to_csv(r'/home/ubuntu/dog_app/app_data/breed_traits.csv')

## Recommendation Functions 
* Turn the row for a breed into an array
*  Calculate the Cosine Similarity / Euclidean Distance of all other breeds/rows from the initial row

In [5]:
def overall_recommender(breed,dist='cosine'):
    '''
    Input: Name of breed (string)
    Output: 5 Breeds with most similar temperaments according to dogtime.com ratings
    '''
    y = df.loc[[breed],:]
    euc_dists = euclidean_distances(df.values,y.values)
    euc_ind = np.argsort(euc_dists.flatten())
    cos_dists = cosine_similarity(df.values,y.values)
    cos_ind = np.argsort(cos_dists.flatten())
    if dist == 'euclidean':
        return [df.iloc[ind,:].name for ind in euc_ind][1:6]
    elif dist == 'cosine':
        return [df.iloc[ind,:].name for ind in cos_ind][-1:-6:-1]
    
def predictions_recommender(breed,photo_list,dist='cosine'):
    '''
    Input: Name of breed (string), List of dogs you're considering (list)
    Output: Ordered list starting from most similar to least
    '''
    y = df.loc[[breed],:]
    X = df.loc[photo_list,:]
    euc_dists = euclidean_distances(X.values,y.values)
    euc_ind = np.argsort(euc_dists.flatten())
    cos_dists = cosine_similarity(X.values,y.values)
    cos_ind = np.argsort(cos_dists.flatten())
    if dist == 'euclidean':
        return [X.iloc[ind,:].name for ind in euc_ind]
    elif dist == 'cosine':
        return [X.iloc[ind,:].name for ind in cos_ind][::-1]

In [63]:
overall_recommender('golden-retriever')

['golden-retriever',
 'labrador-retriever',
 'irish-setter',
 'english-setter',
 'brittany']

In [64]:
overall_recommender('golden-retriever',dist='euclidean')

['labrador-retriever',
 'irish-setter',
 'english-setter',
 'brittany',
 'newfoundland']

In [65]:
photo_list = ['american-staffordshire-terrier','miniature-schnauzer',
              'rottweiler','newfoundland']
predictions_recommender('golden-retriever',photo_list)

['newfoundland',
 'rottweiler',
 'american-staffordshire-terrier',
 'miniature-schnauzer']

In [66]:
predictions_recommender('golden-retriever',photo_list, dist='euclidean')

['newfoundland',
 'rottweiler',
 'american-staffordshire-terrier',
 'miniature-schnauzer']

## Need to create questions that reflect the 31 traits in the dataframe

In [67]:
len(df.columns)

31

In [80]:
df.columns

Index([' Adaptability', ' All Around Friendliness', ' Exercise Needs',
       ' Health Grooming', ' Trainability', 'Adapts Well to Apartment Living',
       'Affectionate with Family', 'Amount Of Shedding', 'Dog Friendly',
       'Drooling Potential', 'Easy To Groom', 'Easy To Train', 'Energy Level',
       'Exercise Needs', 'Friendly Toward Strangers', 'General Health',
       'Good For Novice Owners', 'Incredibly Kid Friendly Dogs',
       'Intelligence', 'Intensity', 'Potential For Mouthiness',
       'Potential For Playfulness', 'Potential For Weight Gain', 'Prey Drive',
       'Sensitivity Level', 'Size', 'Tendency To Bark Or Howl',
       'Tolerates Being Alone', 'Tolerates Cold Weather',
       'Tolerates Hot Weather', 'Wanderlust Potential'],
      dtype='object')

In [83]:
for enumerate(df.columns)

<enumerate at 0x1a24664d38>

In [82]:
columns = list(df.columns)
a = np.zeros(31)
profile_df = pd.DataFrame(a.reshape(-1, len(a)),columns=columns)
profile_df

Unnamed: 0,Adaptability,All Around Friendliness,Exercise Needs,Health Grooming,Trainability,Adapts Well to Apartment Living,Affectionate with Family,Amount Of Shedding,Dog Friendly,Drooling Potential,...,Potential For Playfulness,Potential For Weight Gain,Prey Drive,Sensitivity Level,Size,Tendency To Bark Or Howl,Tolerates Being Alone,Tolerates Cold Weather,Tolerates Hot Weather,Wanderlust Potential
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [69]:
# Are there any correlations that indicate how we can group together certain traits?
trait_corrs = df.corr(method='pearson')
trait_corrs

Unnamed: 0,Adaptability,All Around Friendliness,Exercise Needs,Health Grooming,Trainability,Adapts Well to Apartment Living,Affectionate with Family,Amount Of Shedding,Dog Friendly,Drooling Potential,...,Potential For Playfulness,Potential For Weight Gain,Prey Drive,Sensitivity Level,Size,Tendency To Bark Or Howl,Tolerates Being Alone,Tolerates Cold Weather,Tolerates Hot Weather,Wanderlust Potential
Adaptability,1.0,0.203614,-0.126795,-0.274403,-0.053194,0.550922,0.094827,-0.052036,0.238838,-0.269333,...,0.082962,0.059336,-0.117818,0.218321,-0.302114,-0.027699,0.192661,0.195134,0.195739,-0.064511
All Around Friendliness,0.203614,1.0,0.237759,-0.009336,0.227386,-0.014888,0.494942,0.043279,0.758163,-0.11043,...,0.390929,0.135973,0.133176,0.161098,-0.018553,-0.003427,-0.138564,0.043105,0.137761,0.155863
Exercise Needs,-0.126795,0.237759,1.0,0.075548,0.335317,-0.357212,0.209012,0.033417,0.134836,-0.156749,...,0.636527,-0.103627,0.149587,0.016251,0.111044,0.065862,-0.010237,0.10615,0.28122,0.450288
Health Grooming,-0.274403,-0.009336,0.075548,1.0,-0.024824,-0.363099,0.069146,0.529184,-0.113293,0.573769,...,-0.057509,0.2925,0.025659,0.056506,0.496627,-0.025099,-0.089244,0.051092,-0.098584,0.101973
Trainability,-0.053194,0.227386,0.335317,-0.024824,1.0,-0.209415,0.105125,-0.017608,0.1856,-0.145588,...,0.221463,-0.170944,0.544139,0.108356,-0.032186,0.424478,-0.109347,0.019207,0.330847,0.521648
Adapts Well to Apartment Living,0.550922,-0.014888,-0.357212,-0.363099,-0.209415,1.0,0.001764,-0.208688,0.00542,-0.245419,...,-0.082233,0.067894,-0.075905,0.089809,-0.614309,-0.019812,0.007504,-0.286183,-0.057382,-0.281122
Affectionate with Family,0.094827,0.494942,0.209012,0.069146,0.105125,0.001764,1.0,0.087799,0.269307,0.046355,...,0.432321,0.143474,0.047155,0.326955,0.042899,-0.033842,-0.305651,0.045671,0.021662,0.029791
Amount Of Shedding,-0.052036,0.043279,0.033417,0.529184,-0.017608,-0.208688,0.087799,1.0,-0.032984,0.142738,...,0.007003,0.16173,0.070883,0.035421,0.231719,0.043039,-0.091508,0.259374,-0.177472,0.039589
Dog Friendly,0.238838,0.758163,0.134836,-0.113293,0.1856,0.00542,0.269307,-0.032984,1.0,-0.15271,...,0.264885,0.097395,0.107208,0.154694,-0.048049,-0.017328,-0.054933,0.048467,0.128558,0.201424
Drooling Potential,-0.269333,-0.11043,-0.156749,0.573769,-0.145588,-0.245419,0.046355,0.142738,-0.15271,1.0,...,-0.208898,0.19659,-0.068834,0.051882,0.436406,-0.093216,-0.10043,-0.006454,-0.255268,-0.102076


In [70]:
# Sample that shows strong relationships
test = trait_corrs.sort_values(by=[' Exercise Needs'],ascending=False).T.loc[' Exercise Needs',:]
list(test[(test!=1.0)&((test>0.5) | (test<-0.4))].index)

['Energy Level', 'Exercise Needs', 'Intensity', 'Potential For Playfulness']

In [73]:
dictionary = {}


for column in trait_corrs.columns:    
    sample = trait_corrs.sort_values(by=[column],ascending=False).T.loc[column,:]
    summary = sample[(sample!=1.0)&((sample>0.5) | (sample<-0.4))]
    if len(summary) == 0:
        pass
    else:
        if column in set([item for sublist in list(dictionary.values()) for item in sublist]):
            pass
        else:
            print('Trait: {}'.format(column))
            print('--------------------')
            print(summary)
            print('====================')
            print('')

            dictionary[column] = list(summary.index)
            print('Values')
            print(set([item for sublist in list(dictionary.values()) for item in sublist]))
            print('--------------------')
            print(len(set([item for sublist in list(dictionary.values()) for item in sublist])))
            print('====================')
            print('')
            print('')

Trait:  Adaptability
--------------------
Good For Novice Owners             0.632658
Adapts Well to Apartment Living    0.550922
Name:  Adaptability, dtype: float64

Values
{'Good For Novice Owners', 'Adapts Well to Apartment Living'}
--------------------
2


Trait:  All Around Friendliness
--------------------
Incredibly Kid Friendly Dogs    0.760396
Dog Friendly                    0.758163
Friendly Toward Strangers       0.752394
Name:  All Around Friendliness, dtype: float64

Values
{'Dog Friendly', 'Good For Novice Owners', 'Adapts Well to Apartment Living', 'Incredibly Kid Friendly Dogs', 'Friendly Toward Strangers'}
--------------------
5


Trait:  Exercise Needs
--------------------
Energy Level                 0.798173
Exercise Needs               0.766113
Intensity                    0.700995
Potential For Playfulness    0.636527
Name:  Exercise Needs, dtype: float64

Values
{'Dog Friendly', 'Intensity', 'Good For Novice Owners', 'Adapts Well to Apartment Living', 'Potential 

In [74]:
dictionary

{' Adaptability': ['Good For Novice Owners',
  'Adapts Well to Apartment Living'],
 ' All Around Friendliness': ['Incredibly Kid Friendly Dogs',
  'Dog Friendly',
  'Friendly Toward Strangers'],
 ' Exercise Needs': ['Energy Level',
  'Exercise Needs',
  'Intensity',
  'Potential For Playfulness'],
 ' Health Grooming': ['Drooling Potential', 'Amount Of Shedding'],
 ' Trainability': ['Prey Drive', 'Wanderlust Potential'],
 'Size': ['Good For Novice Owners', 'Adapts Well to Apartment Living']}

# Survey Questions

Survey:
    
Adapts Well to Apartment Living: House or apartment?  House: 1 ; Apartment: 5

Good For Novice Owners: Experience Training Dogs? Little: 5 Some: 3 Lots: 1

Sensitivity Level: Hectic Life, young kids, low patience: Yes: 1 No: 5

Tolerates Being Alone: How much time do you spend away from home? Little: 1 Some: 3 Lots: 5

Tolerates Cold Weather: Live in a cold climate?: Yes: 5 No: 1

Tolerates Hot Weather: Live in a hot climate?: Yes: 5 No: 1

Affectionate with Family: How important is affection from your dog? Not: 1 Kind of: 3 Very: 5

Incredibly Kid Friendly Dogs: Do you have kids? Yes: 5 No: 1

Dog Friendly: Do you have other dogs?: Yes: 5 No: 1

Friendly Toward Strangers: How social do you plan to be with your dog?: Not: 1 Some: 3 Very: 5

Amount Of Shedding: Are you willing to deal with hair?: 
No: 1, A little: 3, I don't care: 5

Drooling Potential: Are you willing to deal with drool?:
No: 1, A little: 3, I don't care: 5

Easy To Groom: Are you willing to groom your dog?:
Yes: 1 Sometimes: 3 No: 5

General Health: N/A

Potential For Weight Gain: How active will you be with your dog?:
Not very: 1 Somewhat: 3 Very: 5

Size: Do you prefer small or big dogs?
Small: 1, Don't care: 3, Big: 5

Easy To Train: Experience with dogs, patient with training?:
Yes: 1 Somewhat: 3 No: 5

Intelligence: N/A

Potential For Mouthiness: N/A

Prey Drive: Have small dogs or pets(birds,hampsters)?
Yes: 1 No: 5

Tendency To Bark Or Howl: Does barking or howling bother you?:
Yes: 1, A little: 3, No: 5

Wanderlust Potential: N/A

Energy Level: How active will you be with your dog?:
Not very: 1 Somewhat: 3 Very: 5

Intensity: Do you have kids or grandparents living with you?:
Yes: 1 No: 5

Exercise Needs: How active will you be with your dog?:
Not very: 1 Somewhat: 3 Very: 5

Potential For Playfulness: How active will you be with your dog?:
Not very: 1 Somewhat: 3 Very: 5