# Data Analysis Project 5: Pet Shelter Analysis

Import Panda and CSVs

In [3]:
import pandas as pd

intakes_outcomes = pd.read_csv('archive/aac_intakes_outcomes.csv')
aac_intakes = pd.read_csv('archive/aac_intakes.csv')
aac_outcomes = pd.read_csv('archive/aac_outcomes.csv')

## 1. Is there an area where more pets are found?
   - Find the top 5 places where animals are found so the shelter can coordinate with local volunteers and animal control to monitor these areas.

Strategy
- Return a series "found_location", count how many times each value appears, print the top five.

In [6]:
top_areas = aac_intakes['found_location'].value_counts().head()

print(f"The top 5 places where animals are found are:")
for label, value in top_areas.items():
    print(f"- {label}{value}")

The top 5 places where animals are found are:
- Austin (TX)14443
- Outside Jurisdiction948
- Travis (TX)921
- 7201 Levander Loop in Austin (TX)517
- Del Valle (TX)411


## 2. What is the average number of pets found in a month in the year 2015?
   - Are there months where there is a higher number of animals found?
   - Knowing the number of pets the shelter might see in a month can help them gather enough resources and donations to care for the animals they receive.

Strategy
- Converted the datetime column to type datetime, filtered rows for 2015.
- Group the intakes by month and count them.
- Find the mean of the monthly intakes.
- Months with a higher intake number than the mean.


In [9]:
aac_intakes['datetime'] = pd.to_datetime(aac_intakes['datetime'])
intakes_2015 = aac_intakes[aac_intakes['datetime'].dt.year == 2015]

monthly_intakes = intakes_2015.groupby(intakes_2015['datetime'].dt.month).size()

mean_intakes = monthly_intakes.mean()

higher_months = monthly_intakes[monthly_intakes > mean_intakes]

print(f"The 2015 monthly mean was {mean_intakes:.2f} intakes.\n")

print(f"Months with intakes above the mean:")
for label, value in higher_months.items():
    print(f"- Month {label} had {value} intakes.")

The 2015 monthly mean was 1559.33 intakes.

Months with intakes above the mean:
- Month 5 had 2094 intakes.
- Month 6 had 2189 intakes.
- Month 7 had 1635 intakes.
- Month 8 had 1718 intakes.
- Month 9 had 1591 intakes.
- Month 10 had 1740 intakes.


## 3. What is the ratio of incoming pets vs. adopted pets?
   - This key metric helps the shelter know how they are doing

Strategy
- Find the total number intakes.
- Find the total number of adoptions
- Find the ratio of incoming pets vs adopted pets.

In [12]:
total_intakes = len(aac_intakes)

total_adoptions = aac_outcomes[aac_outcomes['outcome_type'] == 'Adoption'].shape[0]

ratio = total_adoptions / total_intakes

print(f"The ratio of incoming pets vs. adopted pets is: {ratio:.2f}")

The ratio of incoming pets vs. adopted pets is: 0.43


## 4. What is the distribution of the types of animals in the sheler?
   - Find the count of each type of animal in the shelter

Strategy
- Count the number occurences of each type in the animal type column.

In [15]:
animal_types = aac_intakes['animal_type'].value_counts()

print(f"The count of types of animals:")
for label, value in animal_types.items():
    print(f"- {label}'s: {value}")

The count of types of animals:
- Dog's: 45743
- Cat's: 29659
- Other's: 4434
- Bird's: 342
- Livestock's: 9


## 5. What are the adoption rates for specific breeds?
   - Find the top 5 dog breeds in the shelter (based on count)
   - Find the adoption percentage of each breed

Strategy
- Find total number of breeds, times they appear and return the top 5
- Filter for adoptions and count the number of adoptions per breed
- Filter 'adoptions_by_breed' by 'top_five_breed'
- Calculate percentage

In [18]:
top_five_breeds = aac_intakes['breed'].value_counts().head()

adoptions_by_breed = aac_outcomes[aac_outcomes['outcome_type'] == 'Adoption']['breed'].value_counts()

breed_filtered_adoptions = adoptions_by_breed[adoptions_by_breed.index.isin(top_five_breeds.index)]

breed_adoption_percentage = (breed_filtered_adoptions / top_five_breeds) * 100

print(f"The top 5 breeds adoption percentages are:")
for label, value in breed_adoption_percentage.items():
    print(f"- {label}'s: {value}")

The top 5 breeds adoption percentages are:
- Chihuahua Shorthair Mix's: 47.181069958847736
- Domestic Medium Hair Mix's: 45.65682498930252
- Domestic Shorthair Mix's: 43.067307283472935
- Labrador Retriever Mix's: 49.65916133030366
- Pit Bull Mix's: 37.323722970855535


## 6. What are the adoption rates for different colorings?
- Find the top 5 colorings in the shelter (based on count)
- Find the adoption percentage of each color

Strategy
- Find total number of colors and times they appear and return the top 5
- Filter for adoptions and count the number of adoptions per color
- Filter adoptions_by_color for top_five_colors
- Get percentage
- Order the values descending

In [21]:
top_five_colors = aac_intakes['color'].value_counts().head()

adoptions_by_color = aac_outcomes[aac_outcomes['outcome_type'] == 'Adoption']['color'].value_counts()

color_filtered_adoptions = adoptions_by_color[adoptions_by_color.index.isin(top_five_colors.index)]

color_adoption_percentage = ((color_filtered_adoptions / top_five_colors) * 100).sort_values(ascending=False)

print(f"The top 5 colors adoption percentages are:")
for label, value in color_adoption_percentage.items():
    print(f"- {label}'s: {value}")

The top 5 colors adoption percentages are:
- Black/White's: 45.73141486810551
- Brown Tabby's: 42.656563405393364
- Black's: 41.087928464977644
- White's: 37.97823797823798
- Brown's: 22.194582642343835


## 7. About how many animals are spayed/neutered each month?
   - This will help the shelter allocate resources and staff. Assume that all intact males and females will be spayed/neutered.

Strategy
- Convert 'datetime' column to datetime
- Create a 'month' column as a period object for monthly grouping
- Filter for Spayed Females, Intact Females, Neutered Males and Intact Males
- Concatenate all filtered outcomes into one DataFrame
- Group by month and count occurrences

In [24]:
aac_outcomes['datetime'] = pd.to_datetime(aac_outcomes['datetime'])

aac_outcomes['month'] = aac_outcomes['datetime'].dt.to_period('M')
 
spayed_female = aac_outcomes[aac_outcomes['sex_upon_outcome'] == 'Spayed Female']
intact_female = aac_outcomes[aac_outcomes['sex_upon_outcome'] == 'Intact Female']
neutered_male = aac_outcomes[aac_outcomes['sex_upon_outcome'] == 'Neutered Male']
intact_male = aac_outcomes[aac_outcomes['sex_upon_outcome'] == 'Intact Male']

monthly_spayed_neutered = pd.concat([spayed_female, intact_female, neutered_male, intact_male])

monthly_counts = monthly_spayed_neutered.groupby('month').size().mean()

print(f"Number of animals spayed/neutered each month: {monthly_counts}")

Number of animals spayed/neutered each month: 1343.0


## Extra Credit

## 1. How many animals in the shelter are repeats? Which animal was returned to the shelter the most?
   - This means the animal has been brought in more than once.

Strategy
- Find how many animals are repeats
- Find the top returned animal's id
- Search for the top animal id's name

In [28]:
repeat_animals = sum(aac_intakes['animal_id'].value_counts() > 1)
most_returned_animal_id = aac_intakes['animal_id'].value_counts().index[0]
animal_name = aac_intakes[aac_intakes['animal_id'] == most_returned_animal_id]['name'].iloc[0]

print(f"Number of repeat animals: {repeat_animals} \nMost returned animal's name: {animal_name}")

Number of repeat animals: 6154 
Most returned animal's name: Lil Bit


## 2. What are the adoption rates for the following age groups?
- baby: 4 months and less
- young: 5 months - 2 years
- adult: 3 years - 10 years
- senior 11+

Strategy
- Filter for pets adopted and their ages upon adoption
- Separate the months and years
- 

In [44]:
all_outcomes = aac_outcomes['age_upon_outcome']
adoption_outcomes = aac_outcomes[aac_outcomes['outcome_type'] == 'Adoption']['age_upon_outcome']

babies_total = []
young_total = []
adult_total = []
senior_total = []

babies_adopted = []
young_adopted = []
adult_adopted = []
senior_adopted = []


def group_total_outcomes(ages):
    for age in ages:
        if 'month' in str(age):
            if int(age.split(" ")[0]) <= 4:
                babies_total.append(age)
            elif int(age.split(" ")[0]) > 4:
                young_total.append(age)
        elif 'year' in str(age):
            if int(age.split(" ")[0]) <= 2:
                young_total.append(age)
            elif int(age.split(" ")[0]) > 2 and int(age.split(" ")[0]) <= 10:
                adult_total.append(age)
            elif int(age.split()[0]) > 11:
                senior_total.append(age)


def group_adoption_outcomes(ages):
    for age in ages:
        if 'month' in str(age):
            if int(age.split(" ")[0]) <= 4:
                babies_adopted.append(age)
            elif int(age.split(" ")[0]) > 4:
                young_adopted.append(age)
        elif 'year' in str(age):
            if int(age.split(" ")[0]) <= 2:
                young_adopted.append(age)
            elif int(age.split(" ")[0]) > 2 and int(age.split(" ")[0]) <= 10:
                adult_adopted.append(age)
            elif int(age.split()[0]) > 11:
                senior_adopted.append(age)


group_total_outcomes(all_outcomes)
group_adoption_outcomes(adoption_outcomes)


def adoption_perentage(adopted, total):
    return len(adopted) / len(total) * 100

calc_babies = adoption_perentage(babies_adopted, babies_total)





NameError: name 'babites_adopted' is not defined