# Using Linear Regression to Predict Canine Intelligence
This notebook utilizes the dog intelligence dataset hosted on Kaggle and provided by the American Kennel Club in order to estimate dog intelligence using a linear regression model.

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/canine-intelligence-and-size/AKC Breed Info.csv
/kaggle/input/canine-intelligence-and-size/dog_intelligence.csv


## Data Loading

In [2]:
# load data
breed_info = pd.read_csv('/kaggle/input/canine-intelligence-and-size/AKC Breed Info.csv')
dog_intel = pd.read_csv('/kaggle/input/canine-intelligence-and-size/dog_intelligence.csv')

## Data Cleaning & Exploratory Data Analysis (EDA)

In [3]:
# Dog Breed Info
breed_info.head()

Unnamed: 0,index,Breed,height_low_inches,height_high_inches,weight_low_lbs,weight_high_lbs
0,0,Akita,26,28,80,120
1,1,Anatolian Sheepdog,27,29,100,150
2,2,Bernese Mountain Dog,23,27,85,110
3,3,Bloodhound,24,26,80,120
4,4,Borzoi,26,28,70,100


In [4]:
breed_info.shape

(150, 6)

In [5]:
breed_info.dtypes

index                  int64
Breed                 object
height_low_inches     object
height_high_inches    object
weight_low_lbs        object
weight_high_lbs       object
dtype: object

---

In [6]:
# Dog Intelligence
dog_intel.head()

Unnamed: 0,index,Breed,Classification,obey,reps_lower,reps_upper
0,0,Border Collie,Brightest Dogs,95%,1,4
1,1,Poodle,Brightest Dogs,95%,1,4
2,2,German Shepherd,Brightest Dogs,95%,1,4
3,3,Golden Retriever,Brightest Dogs,95%,1,4
4,4,Doberman Pinscher,Brightest Dogs,95%,1,4


In [7]:
breed_info.shape

(150, 6)

In [8]:
dog_intel.dtypes

index              int64
Breed             object
Classification    object
obey              object
reps_lower         int64
reps_upper         int64
dtype: object

In [9]:
# depict null values as a histogram

## Data Cleaning

In [10]:
# find & clear null values
obj_cols = ['height_low_inches','height_high_inches','weight_low_lbs','weight_high_lbs']
for i in obj_cols:
    print(breed_info[i].unique())

# alaskan malamute and coton de tulear: have no data for all fields in height/weight, so we can't use them
# dropping 2 rows based on value
breed_info = breed_info[breed_info['height_low_inches'] != 'na']
breed_info = breed_info[breed_info['height_low_inches'] != 'not found']

# convert numeric columns to numbers
for i in obj_cols:
    breed_info[i] = pd.to_numeric(breed_info[i])
    
# drop the 'index' column
breed_info = breed_info.drop('index', axis=1)

# calculate average columns based on height and weight
# drop the 'index' column
breed_info['avg_height'] = (breed_info['height_high_inches'] + breed_info['height_low_inches'])/2
breed_info['avg_weight'] = (breed_info['weight_high_lbs'] + breed_info['weight_low_lbs'])/2

['26' '27' '23' '24' '25' '32' '28' '22' 'na' '21' '19' '20' '26.5' '15'
 '17' '18' '14' '12' '10' '13' '16' '48' '9' '9.5' '7' 'not found' '11'
 '8' '6']
['28' '29' '27' '26' '32' '35' '30' 'na' '25' '20' '24' '35.5' '19' '18'
 '23' '14' '22' '21' '16' '17' '15' '66' '10' '11.5' '8' '12' 'not found'
 '11' '13' '9']
['80' '100' '85' '70' '120' '95' '130' '90' '175' '65' '110' '50' 'na'
 '60' '75' '74' '55' '35' '45' '140' '40' '25' '30' '22' '31' '42' '44'
 '24' '33' '10' '20' '18' '12' '15' '6' '14' 'not found' '16' '8' '9' '17'
 '34' '29' '13' '27' '2' '5' '4' '3']
['120' '150' '110' '100' '130' '160' '190' '60' 'na' '70' '65' '75' '140'
 '95' '76' '80' '90' '135' '45' '85' '170' '50' '40' '55' '28' '35' '27'
 '66' '33' '25' '30' '14' '22' '23' '18' '15' '12' '20' 'not found' '32'
 '11' '36' '17' '16' '21' '5' '10' '6' '8' '7']


---

In [11]:
breed_info.head()

Unnamed: 0,Breed,height_low_inches,height_high_inches,weight_low_lbs,weight_high_lbs,avg_height,avg_weight
0,Akita,26.0,28.0,80,120,27.0,100.0
1,Anatolian Sheepdog,27.0,29.0,100,150,28.0,125.0
2,Bernese Mountain Dog,23.0,27.0,85,110,25.0,97.5
3,Bloodhound,24.0,26.0,80,120,25.0,100.0
4,Borzoi,26.0,28.0,70,100,27.0,85.0


In [12]:
breed_info['Breed'].unique()

array(['Akita', 'Anatolian Sheepdog', 'Bernese Mountain Dog',
       'Bloodhound', 'Borzoi', 'Bullmastiff', 'Great Dane',
       'Great Pyrenees', 'Great Swiss Mountain Dog', 'Irish Wolfhound',
       'Kuvasz', 'Mastiff', 'Neopolitan Mastiff', 'Newfoundland',
       'Otter Hound', 'Rottweiler', 'Saint Bernard', 'Afghan Hound',
       'American Foxhound', 'Beauceron', 'Belgian Malinois',
       'Belgian Sheepdog', 'Belgian Tervuren', 'Black And Tan Coonhound',
       'Black Russian Terrier', 'Bouvier Des Flandres', 'Boxer', 'Briard',
       'Chesapeake Bay Retriever', 'Clumber Spaniel',
       'Collie (Rough) & (Smooth)', 'Curly Coated Retriever',
       'Doberman Pinscher', 'English Foxhound', 'English Setter',
       'German Shepherd Dog', 'German Shorthaired Pointer',
       'German Wirehaired Pointer', 'Giant Schnauzer', 'Golden Retriever',
       'Gordon Setter', 'Greyhound', 'Irish Setter', 'Komondor',
       'Labrador Retriever', 'Old English Sheepdog (Bobtail)',
       'Poodle S

In [13]:
# clean 'dog_intel' data


# drop 'index' column
dog_intel = dog_intel.drop('index', axis=1)

# convert 'Classification' to categorical datatype
dog_intel['Classification'] = dog_intel['Classification'].astype('category')



In [14]:
# convert 'Classification' to categorical datatype
dog_intel['Classification'] = dog_intel['Classification'].astype('category')

In [15]:
dog_intel.dtypes

Breed               object
Classification    category
obey                object
reps_lower           int64
reps_upper           int64
dtype: object

In [16]:
dog_intel.columns

Index(['Breed', 'Classification', 'obey', 'reps_lower', 'reps_upper'], dtype='object')

In [17]:
for i in dog_intel.columns:
    print(dog_intel[i].unique())

['Border Collie' 'Poodle' 'German Shepherd' 'Golden Retriever'
 'Doberman Pinscher' 'Shetland Sheepdog' 'Labrador Retriever' 'Papillon'
 'Rottweiler' 'Australian Cattle Dog' 'Pembroke Welsh Corgi'
 'Miniature Schnauzer' 'English Springer Spaniel'
 'Belgian Shepherd Dog (Tervuren)' 'Schipperke' 'Belgian Sheepdog'
 'Collie' 'Keeshond' 'German Shorthaired Pointer' 'Flat-Coated Retriever'
 'English Cocker Spaniel' 'Standard Schnauzer' 'Brittany' 'Cocker Spaniel'
 'Weimaraner' 'Belgian Malinois' 'Bernese Mountain Dog' 'Pomeranian'
 'Irish Water Spaniel' 'Vizsla' 'Cardigan Welsh Corgi'
 'Chesapeake Bay Retriever' 'Puli' 'Yorkshire Terrier' 'Giant Schnauzer'
 'Portuguese Water Dog' 'Airedale Terrier' 'Bouvier des Flandres'
 'Border Terrier' 'Briard' 'Welsh Springer Spaniel' 'Manchester Terrier'
 'Samoyed' 'Field Spaniel' 'Newfoundland' 'Australian Terrier'
 'American Staffordshire Terrier' 'Gordon Setter' 'Bearded Collie'
 'Cairn Terrier' 'Kerry Blue Terrier' 'Irish Setter' 'Norwegian Elkhoun

In [18]:
dog_intel.head()

Unnamed: 0,Breed,Classification,obey,reps_lower,reps_upper
0,Border Collie,Brightest Dogs,95%,1,4
1,Poodle,Brightest Dogs,95%,1,4
2,German Shepherd,Brightest Dogs,95%,1,4
3,Golden Retriever,Brightest Dogs,95%,1,4
4,Doberman Pinscher,Brightest Dogs,95%,1,4


In [19]:
dog_intel[dog_intel['obey'].isnull()]
# dog_intel['obey'].unique()


Unnamed: 0,Breed,Classification,obey,reps_lower,reps_upper
125,Shih Tzu,Lowest Degree of Working/Obedience Intelligence,,81,100
126,Basset Hound,Lowest Degree of Working/Obedience Intelligence,,81,100
127,Mastiff,Lowest Degree of Working/Obedience Intelligence,,81,100
128,Beagle,Lowest Degree of Working/Obedience Intelligence,,81,100
129,Pekingese,Lowest Degree of Working/Obedience Intelligence,,81,100
130,Bloodhound,Lowest Degree of Working/Obedience Intelligence,,81,100
131,Borzoi,Lowest Degree of Working/Obedience Intelligence,,81,100
132,Chow Chow,Lowest Degree of Working/Obedience Intelligence,,81,100
133,Bulldog,Lowest Degree of Working/Obedience Intelligence,,81,100
134,Basenji,Lowest Degree of Working/Obedience Intelligence,,81,100


In [20]:
dog_intel.shape

(136, 5)

In [21]:
dog_intel[dog_intel['Classification']=='Lowest Degree of Working/Obedience Intelligence']

Unnamed: 0,Breed,Classification,obey,reps_lower,reps_upper


In [22]:
for i in dog_intel['Classification'].unique():
    print(i)

Brightest Dogs
Excellent Working Dogs
Above Average Working Dogs
Average Working/Obedience Intelligence
Fair Working/Obedience Intelligence
Lowest Degree of Working/Obedience Intelligence 


In [23]:
# add avg reps computed column
dog_intel['avg_reps'] = (dog_intel['reps_lower'] + dog_intel['reps_upper']) / 2

In [24]:
# create the intersection of these two tables, merged on breed and only including those breeds which appear in both tables
dog_table = dog_intel.merge(breed_info, how = 'inner' ,indicator=False)
dog_table

Unnamed: 0,Breed,Classification,obey,reps_lower,reps_upper,avg_reps,height_low_inches,height_high_inches,weight_low_lbs,weight_high_lbs,avg_height,avg_weight
0,Border Collie,Brightest Dogs,95%,1,4,2.5,19.0,21.0,40,40,20.0,40.0
1,Golden Retriever,Brightest Dogs,95%,1,4,2.5,21.0,24.0,55,75,22.5,65.0
2,Doberman Pinscher,Brightest Dogs,95%,1,4,2.5,26.0,28.0,60,100,27.0,80.0
3,Labrador Retriever,Brightest Dogs,95%,1,4,2.5,21.0,24.0,55,80,22.5,67.5
4,Papillon,Brightest Dogs,95%,1,4,2.5,8.0,11.0,5,10,9.5,7.5
...,...,...,...,...,...,...,...,...,...,...,...,...
99,Bloodhound,Lowest Degree of Working/Obedience Intelligence,,81,100,90.5,24.0,26.0,80,120,25.0,100.0
100,Borzoi,Lowest Degree of Working/Obedience Intelligence,,81,100,90.5,26.0,28.0,70,100,27.0,85.0
101,Chow Chow,Lowest Degree of Working/Obedience Intelligence,,81,100,90.5,19.0,22.0,45,55,20.5,50.0
102,Basenji,Lowest Degree of Working/Obedience Intelligence,,81,100,90.5,17.0,17.0,20,22,17.0,21.0
