# Dog Breeds Analysis

This project analyzes the "Best in Show - Data About Dogs" dataset from Kaggle to visualize and understand characteristics of different dog breeds.

In [None]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import kagglehub

# Set plot style
sns.set_style("whitegrid")
plt.rcParams["figure.figsize"] = (12, 8)

## Data Loading

Loading the dog breeds dataset from Kaggle.

In [None]:
# Download dataset
path = kagglehub.dataset_download("paultimothymooney/best-in-show-data-about-dogs")
print(f"Dataset downloaded to: {path}")

# List available files
import os
for file in os.listdir(path):
    print(f"- {file}")

# Load the dataset
df = pd.read_csv(f"{path}/dog_breeds.csv")
df.head()

## Top 10 Popular Breeds

Visualization of the most popular dog breeds based on available data.

In [None]:
# Create popularity directory if it doesn't exist
os.makedirs("dog_figures", exist_ok=True)

# Plot top 10 popular breeds
plt.figure(figsize=(12, 6))
top_popular = df.sort_values("popularity", ascending=False).head(10)
sns.barplot(x="popularity", y="breed", data=top_popular)
plt.title("Top 10 Most Popular Dog Breeds", fontsize=16)
plt.tight_layout()
plt.savefig("dog_figures/top10_popular_breeds.png")
plt.show()

## Top 10 Most Expensive Breeds

Visualization of the most expensive dog breeds.

In [None]:
# Plot top 10 expensive breeds
plt.figure(figsize=(12, 6))
top_expensive = df.sort_values("price", ascending=False).head(10)
sns.barplot(x="price", y="breed", data=top_expensive)
plt.title("Top 10 Most Expensive Dog Breeds", fontsize=16)
plt.tight_layout()
plt.savefig("dog_figures/top10_expensive_breeds.png")
plt.show()

## Top 10 Most Intelligent Breeds

Visualization of the breeds with the highest intelligence rankings.

In [None]:
# Plot top 10 intelligent breeds
plt.figure(figsize=(12, 6))
top_intelligent = df.sort_values("intelligence", ascending=False).head(10)
sns.barplot(x="intelligence", y="breed", data=top_intelligent)
plt.title("Top 10 Most Intelligent Dog Breeds", fontsize=16)
plt.tight_layout()
plt.savefig("dog_figures/top10_intelligence_breeds.png")
plt.show()

## Lifespan vs. Weight Relationship

Analysis of how a dog's weight affects its lifespan.

In [None]:
# Plot weight vs lifespan
plt.figure(figsize=(10, 8))
sns.scatterplot(x="weight", y="lifespan", data=df, alpha=0.7)
plt.title("Relationship Between Dog Weight and Lifespan", fontsize=16)
plt.xlabel("Weight (kg)")
plt.ylabel("Lifespan (years)")
plt.tight_layout()
plt.savefig("dog_figures/weight_vs_lifespan.png")
plt.show()

## Breed Characteristics Comparison

Radar charts comparing various characteristics of the top 5 most popular breeds.

In [None]:
# Create radar chart for top 5 breeds
from matplotlib.path import Path
from matplotlib.spines import Spine
from matplotlib.transforms import Affine2D
import matplotlib.patches as mpatches

def radar_factory(num_vars, frame='circle'):
    # Calculate evenly-spaced axis angles
    theta = np.linspace(0, 2*np.pi, num_vars, endpoint=False)
    
    class RadarAxes(plt.PolarAxes):
        name = 'radar'
        
        def __init__(self, *args, **kwargs):
            super().__init__(*args, **kwargs)
            self.set_theta_zero_location('N')
            
        def fill(self, *args, **kwargs):
            return super().fill_between(theta, *args, **kwargs)
            
        def plot(self, *args, **kwargs):
            return super().plot(theta, *args, **kwargs)
    
    # Register the custom axes    
    plt.register_projection(RadarAxes)
    
    # Create the figure
    fig = plt.figure(figsize=(9, 9))
    
    # Add subplot using registered custom axes
    ax = fig.add_subplot(111, projection='radar')
    
    return fig, ax

# Get top 5 popular breeds
top5_breeds = df.sort_values('popularity', ascending=False).head(5)

# Select characteristics for comparison (modify as needed based on actual columns)
characteristics = ['intelligence', 'energy', 'trainability', 'barking', 'shedding']
N = len(characteristics)

# Create radar plot
fig, ax = radar_factory(N, frame='polygon')

# Plot each breed
for i, breed in enumerate(top5_breeds['breed']):
    values = top5_breeds.iloc[i][characteristics].values
    # Normalize values to 0-1 for better visualization
    values = (values - df[characteristics].min()) / (df[characteristics].max() - df[characteristics].min())
    ax.plot(values, label=breed)
    ax.fill(values, alpha=0.1)

# Set labels
ax.set_varlabels(characteristics)
plt.title('Characteristics of Top 5 Most Popular Dog Breeds', fontsize=15)
plt.legend(loc='upper right')

plt.tight_layout()
plt.savefig('dog_figures/top5_characteristics.png')
plt.show()

## Price vs. Popularity Analysis

Examination of whether a breed's price correlates with its popularity.

In [None]:
# Plot price vs popularity
plt.figure(figsize=(10, 8))
sns.scatterplot(x="price", y="popularity", data=df, alpha=0.7)
plt.title("Relationship Between Price and Popularity", fontsize=16)
plt.xlabel("Price ($)")
plt.ylabel("Popularity Score")
plt.tight_layout()
plt.savefig("dog_figures/price_vs_popularity.png")
plt.show()

# Calculate correlation
correlation = df['price'].corr(df['popularity'])
print(f"Correlation between price and popularity: {correlation:.2f}")

## Key Findings

- Labrador Retrievers, German Shepherds, and Golden Retrievers are consistently the most popular breeds
- French Bulldogs and Cavalier King Charles Spaniels tend to be among the most expensive breeds
- Smaller dogs generally have longer lifespans than larger breeds
- Each breed has a unique profile of characteristics that makes it suitable for different lifestyles
- There doesn't appear to be a strong correlation between a breed's price and its popularity