In [1]:
import numpy as np
import pandas as pd
import plotly.express as px

import geopandas as gpd
import matplotlib.pyplot as plt

from urllib.request import urlopen
import json
import requests

In [2]:
data = pd.read_csv("/kaggle/input/active-seattle-pet-licenses-9-24-2024/Seattle_Pet_Licenses_20240924.csv", index_col="License Number")
data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 43683 entries, S107948 to 8056586
Data columns (total 6 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   License Issue Date  43683 non-null  object 
 1   Animal's Name       43642 non-null  object 
 2   Species             43683 non-null  object 
 3   Primary Breed       43683 non-null  object 
 4   Secondary Breed     29506 non-null  object 
 5   ZIP Code            43558 non-null  float64
dtypes: float64(1), object(5)
memory usage: 2.3+ MB


In [3]:
data['Year Issued'] = data['License Issue Date'].apply(lambda date: int(date.split(' ')[2]))
data['ZIP Code'] = data['ZIP Code'].fillna(0).astype(int)
data = data.drop_duplicates(subset=['License Issue Date'], keep='first')
data = data.fillna("Not Specified")
data
# data.nunique()

Unnamed: 0_level_0,License Issue Date,Animal's Name,Species,Primary Breed,Secondary Breed,ZIP Code,Year Issued
License Number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
S107948,December 18 2015,Zen,Cat,Domestic Longhair,Mix,98117,2015
S116503,June 14 2016,Misty,Cat,Siberian,Not Specified,98117,2016
S119301,August 04 2016,Lyra,Cat,Mix,Not Specified,98121,2016
962273,February 13 2019,Veronica,Cat,Domestic Longhair,Not Specified,98107,2019
S133113,August 10 2019,Spider,Cat,LaPerm,Not Specified,98115,2019
...,...,...,...,...,...,...,...
8019317,June 06 2022,Kona,Dog,"Mixed Breed, Medium (up to 44 lbs fully grown)",Mix,98146,2022
8038882,June 10 2022,Whisky,Dog,"Mixed Breed, Large (over 44 lbs fully grown)",Mix,98103,2022
8007589,June 22 2022,Maximus,Dog,"Poodle, Standard",Mix,98126,2022
S134127,June 28 2022,Rocky,Dog,"Chihuahua, Short Coat",Bichon Frise,98105,2022


* Seattle law covers animal registrations such as: potbelly pig, miniature goat, cat, and dog licenses, guard and attack dog licenses, and exotic animal permits.
* Bulk of registrations happened post-2022.

In [4]:
fig = px.histogram(data,'Year Issued', nbins=20, text_auto=True, height=350, title="Yearly Pet Registration")
fig.show()

* Names for cats and dogs were very similar for the top five.
* Frequency of names for goats and pigs were all 1, as there were very few samples of those species to begin with.

In [5]:
fig = px.histogram(data, x='Year Issued', facet_col='Species', facet_col_wrap=2, height=500,
             nbins=10, text_auto=True, title="Yearly Pet Registration by Species")
fig.show()

In [6]:
# Text output showing most popular animal name by species type
breed_count = pd.DataFrame(data.groupby(['Species', "Animal's Name"])["ZIP Code"].count().rename('Count'))
breed_count = breed_count.reset_index(level=[0,1])

print("Goat Names:", breed_count[breed_count.Species == "Goat"]["Animal's Name"].unique())
print("\nPig Names:", breed_count[breed_count.Species == "Pig"]["Animal's Name"].unique())

Goat Names: ['Abelard']

Pig Names: []


In [7]:
cats = breed_count[breed_count.Species == "Cat"].nlargest(10, "Count")

fig = px.bar(cats, x="Animal's Name", y="Count", barmode="group", title="Top 10 Cat Names", height=350, text_auto=True)
fig.show()

In [8]:
dogs = breed_count[breed_count.Species == "Dog"].nlargest(10, "Count")

fig = px.bar(dogs, x="Animal's Name", y="Count", barmode="group", title="Top 10 Dog Names", height=350, text_auto=True)
fig.show()

In [9]:
url = "https://raw.githubusercontent.com/seattleio/seattle-boundaries-data/refs/heads/master/data/zip-codes.geojson"
seattle_zips_json = []

with urlopen(url) as response:
    seattle_zips_json = json.load(response)

zip_codes = []
for i in range(len(seattle_zips_json['features'])):
    code = seattle_zips_json['features'][i]['properties']['ZCTA5CE10']
    zip_codes.append(code)

zip_codes = pd.DataFrame({'ZIP Code': zip_codes})
zip_codes['ZIP Code'] = zip_codes['ZIP Code'].astype(int)
zip_codes.head()

Unnamed: 0,ZIP Code
0,98136
1,98144
2,98109
3,98195
4,98199


In [10]:
zip_codes_count = data.groupby(["ZIP Code"]).count()['Species']
zip_codes_count = zip_codes.merge(zip_codes_count, on="ZIP Code").rename({"Species": "ZIP Code Count"}, axis=1)

data = data.reset_index().merge(zip_codes_count, on="ZIP Code")
data.head()

Unnamed: 0,License Number,License Issue Date,Animal's Name,Species,Primary Breed,Secondary Breed,ZIP Code,Year Issued,ZIP Code Count
0,S107948,December 18 2015,Zen,Cat,Domestic Longhair,Mix,98117,2015,54
1,S116503,June 14 2016,Misty,Cat,Siberian,Not Specified,98117,2016,54
2,S119301,August 04 2016,Lyra,Cat,Mix,Not Specified,98121,2016,33
3,962273,February 13 2019,Veronica,Cat,Domestic Longhair,Not Specified,98107,2019,39
4,S133113,August 10 2019,Spider,Cat,LaPerm,Not Specified,98115,2019,85


In [11]:
# Most popular breeds by zip code?

In [12]:
# Chloropeth of registrations by zip count + most popular animals per zip code

fig = px.choropleth(data,
                    geojson= seattle_zips_json,
                    locations='ZIP Code',
                    featureidkey="properties.ZCTA5CE10",
                    color='ZIP Code Count',
                    color_continuous_scale="blues",
                    projection="mercator",
                    title="Pet Registrations by ZIP Code",
                    )

fig.update_geos(fitbounds="locations", visible=False)
fig.show()