# Correlation between dog size and intelligence

In [1]:
# first lets import all libraries !
import plotly.graph_objs as go
import numpy as np
import pandas as pd
from plotly.subplots import make_subplots
import plotly.express as p

In [2]:
# Now lets import our data.
df = pd.read_csv('dog_data.csv', engine='python')

In [3]:
# So in the table displayed:

# Breed:The breed of the dogs considered in the experiment.
# reps_lower: The lower limit of repetitions to understand new commands.
# reps_upper: The upper limit of repetitions to understand new commands.
# height_low_inches: The lower limit of the height of the dogs in inches.
# height_high_inches: The upper limit of the height of the dogs in inches.
# weight_low_lbs: The lower limit of the weight of the dogs in pounds.
# weight_high_lbs: The upper limit of the weight of the dogs in pounds.

# In this case we are determining the intelligence by the limit of repetitions to understand a new command.
# So the more repeticions of a command means that the dog is a slow learner.
# (Coren notes that this type of intelligence is the type most people seem interested in, and the type of intelligence that dog obedience trials measure.)
# Now lets clean our data.
# If there is no data in the intelligence columns we are ommiting the breed in our analysis.. sorry chihuahua!

In [4]:
# drop nan columns
df1 = df.dropna()

# remove duplicated breeds
df1 = df.drop_duplicates(subset='Breed')

# remove characters in column names
df1['Breed'] = df1['Breed'].map(lambda x: x.lstrip('+-').rstrip('aAbBcC'))

#remove any numbers from breed name
df1['Breed'] = df['Breed'].str.replace('\d+', '')

print(df1)

                             Breed  height_low_inches  height_high_inches  \
0                            Akita               26.0                28.0   
1                 Atolian Sheepdog               27.0                29.0   
2             Bernese Mountain Dog               23.0                27.0   
3                       Bloodhound               24.0                26.0   
4                           Borzoi               26.0                28.0   
5                      Bullmastiff               25.0                27.0   
6                       Great Dane               32.0                32.0   
7                   Great Pyrenees               27.0                32.0   
8         Great Swiss Mountain Dog               23.0                28.0   
9                  Irish Wolfhound               28.0                35.0   
10                          Kuvasz               28.0                30.0   
11                         Mastiff               27.0                30.0   

In [5]:
# Now lets determine the size of our dogos by the info given.
# for size, we will take the average weight and height based on the range given.

In [6]:
df1['weight_avg'] = df1[['weight_low_lbs', 'weight_high_lbs']].mean(axis=1)
#taking away red errors
pd.options.mode.chained_assignment = None  # default='warn'
df1['height_avg'] = df1[['height_low_inches', 'height_high_inches']].mean(axis=1)

#taking away red errors
pd.options.mode.chained_assignment = None  # default='warn'

In [7]:
# lets to the same with the repetitions to understand new commands columns
df1['Intelligence'] = df1[['reps_lower', 'reps_upper']].mean(axis=1)
pd.options.mode.chained_assignment = None  # default='warn'

#drop zeros from size average columns
df1 = df1[(df1[['weight_avg', 'height_avg']] != 0).any(axis=1)]

In [8]:
# Now lets classify our dogs into sizes. This will be based of a crate size so:
# - XS: Less than 12lb 
# - S: 13lb to 25lb 
# - M: 26lb to 57lb 
# - L: 58lb to 99lb
# - XL: 100lb+ 
# This will help visualize in our plot when visualizing
#Lets just focus on weight for this excersice for simplicity

In [16]:
# we will add a column and classify by size
df1.loc[(df1.weight_avg <= 12), 'size'] = 'XS'
df1.loc[((df1.weight_avg > 12) & (df1.weight_avg <= 25)), 'size'] = 'S'
df1.loc[((df1.weight_avg > 25) & (df1.weight_avg <= 57)), 'size'] = 'M'
df1.loc[((df1.weight_avg > 57) & (df1.weight_avg <= 99)), 'size'] = 'L'
df1.loc[(df1.weight_avg >= 100), 'size'] = 'XL'

In [24]:
# rename column
df1['Weight Average']  = df1['weight_avg']

In [25]:
# Now lets visualize the data to get closer to more answers!
import plotly.express as px

fig = px.scatter(df1, x="Weight Average", y="Intelligence", color='size', hover_name="Breed",)
fig.update_layout(
    title='Dog Size vs Intelligence')


fig.show()

In [26]:
# What do we see? (hover over the plot to see the breeds and maybe find yours)
# Looks like the larger dogs need less repeticions for them to undestand a command!

# Now lets filter this a little bit more to see who lands as the smartest 10 dog breeds and see if the devious zues makes the cut! (siberian husky)

In [29]:
df1 = df1.sort_values('Intelligence')
df2 = df1.drop(['height_low_inches','height_high_inches','weight_low_lbs', 'weight_avg', 'weight_high_lbs','height_avg', 'reps_lower','reps_upper'], 1)
print(df2.head(10))

                      Breed  Intelligence size   Size  Weight Average
0                     Akita           2.5   XL  100.0           100.0
8  Great Swiss Mountain Dog           2.5   XL  140.0           140.0
7            Great Pyrenees           2.5   XL  107.5           107.5
6                Great Dane           2.5   XL  140.0           140.0
4                    Borzoi           2.5    L   85.0            85.0
5               Bullmastiff           2.5   XL  115.0           115.0
3                Bloodhound           2.5   XL  100.0           100.0
2      Bernese Mountain Dog           2.5    L   97.5            97.5
1          Atolian Sheepdog           2.5   XL  125.0           125.0
9           Irish Wolfhound           2.5   XL  120.0           120.0


In [30]:
# Wow he didnt make the top 10..
# We do see however the top smartest dogs are mostly over 100lb!
# So where is zues?
findzeus = df2.loc[df.Breed =='Siberian Husky']
print(findzeus)

             Breed  Intelligence size  Size  Weight Average
91  Siberian Husky          33.0    M  50.0            50.0


In [31]:
# At least hes in the top 100! 
# Thank you for running my code i hope you enjoyed and if you have a smaller dog im sure he is just as smart and this is just
# an older study/dataset that should be revisited.

#### Suggestions for future research (including improvements)

In [32]:
# - Something else we can consider in this analysis is the other data in the dataset. we can consider the height data to get a more accurate read on the dogs size. However for indurtry standard he just used weight.
# - Include other resources on dog intelligence such as the ability to solve problems such as detouring to a goal or emotional intelligence
# - Get more data points for intelligence so look at different studies
# - Calculate the baseline for the data
# - Get more data points for weight and height instead of a range
# - Do a more interactive analysis where you can input your dog and it tell you where it ranks