In [1]:
## Initialising libraries
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [2]:
## Import dataset
villagers = pd.read_csv("/kaggle/input/animal-crossing-new-horizons-nookplaza-dataset/villagers.csv")

In [3]:
## Checking the dataset
villagers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 391 entries, 0 to 390
Data columns (total 17 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Name             391 non-null    object
 1   Species          391 non-null    object
 2   Gender           391 non-null    object
 3   Personality      391 non-null    object
 4   Hobby            391 non-null    object
 5   Birthday         391 non-null    object
 6   Catchphrase      391 non-null    object
 7   Favorite Song    391 non-null    object
 8   Style 1          391 non-null    object
 9   Style 2          391 non-null    object
 10  Color 1          391 non-null    object
 11  Color 2          391 non-null    object
 12  Wallpaper        391 non-null    object
 13  Flooring         391 non-null    object
 14  Furniture List   391 non-null    object
 15  Filename         391 non-null    object
 16  Unique Entry ID  391 non-null    object
dtypes: object(17)
memory usage: 52.1+ K

In [4]:
villagers.head()

Unnamed: 0,Name,Species,Gender,Personality,Hobby,Birthday,Catchphrase,Favorite Song,Style 1,Style 2,Color 1,Color 2,Wallpaper,Flooring,Furniture List,Filename,Unique Entry ID
0,Admiral,Bird,Male,Cranky,Nature,27-Jan,aye aye,Steep Hill,Cool,Cool,Black,Blue,dirt-clod wall,tatami,717;1849;7047;2736;787;5970;3449;3622;3802;410...,brd06,B3RyfNEqwGmcccRC3
1,Agent S,Squirrel,Female,Peppy,Fitness,2-Jul,sidekick,Go K.K. Rider,Active,Simple,Blue,Black,concrete wall,colorful tile flooring,7845;7150;3468;4080;290;3971;3449;1708;4756;25...,squ05,SGMdki6dzpDZyXAw5
2,Agnes,Pig,Female,Big Sister,Play,21-Apr,snuffle,K.K. House,Simple,Elegant,Pink,White,gray molded-panel wall,arabesque flooring,4129;7236;7235;7802;896;3428;4027;7325;3958;71...,pig17,jzWCiDPm9MqtCfecP
3,Al,Gorilla,Male,Lazy,Fitness,18-Oct,ayyyeee,Go K.K. Rider,Active,Active,Red,White,concrete wall,green rubber flooring,1452;4078;4013;833;4116;3697;7845;3307;3946;39...,gor08,LBifxETQJGEaLhBjC
4,Alfonso,Alligator,Male,Lazy,Play,9-Jun,it'sa me,Forest Life,Simple,Simple,Red,Blue,yellow playroom wall,green honeycomb tile,4763;3205;3701;1557;3623;85;3208;3584;4761;121...,crd00,REpd8KxB8p9aGBRSE


## Villager species

In [5]:
## Getting counts of species
species_count = pd.DataFrame(villagers['Species'].value_counts().reset_index(name='Count'))
species_count.head(10)

Unnamed: 0,Species,Count
0,Cat,23
1,Rabbit,20
2,Frog,18
3,Squirrel,18
4,Duck,17
5,Dog,16
6,Cub,16
7,Pig,15
8,Bear,15
9,Mouse,15


In [6]:
species_count.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 35 entries, 0 to 34
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   Species  35 non-null     object
 1   Count    35 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 688.0+ bytes


In [7]:
## Visualising villager species distribution
fig = go.Figure(data=[go.Pie(labels=species_count['Species'], 
                             values=species_count['Count'], 
                             direction='clockwise',
                             hovertemplate = '<b>%{label}</b> </br>Number of villagers: %{value} <br>% of villagers: %{percent}',
                             name='')])
fig.update_traces(textposition='inside')
fig.update_layout(title='Villager species distribution', uniformtext_minsize=12, uniformtext_mode='hide', legend_title="Species")
fig.show()

## Villager personalities

In [8]:
## Creating a dataframe of personality counts
personality_count = pd.DataFrame(villagers['Personality'].value_counts().reset_index(name='Count'))
personality_count

Unnamed: 0,Personality,Count
0,Lazy,60
1,Normal,59
2,Cranky,55
3,Snooty,55
4,Jock,55
5,Peppy,49
6,Smug,34
7,Big Sister,24


In [9]:
## Visualising villager personality distribution
fig = go.Figure(data=[go.Pie(labels=personality_count['Personality'], 
                             values=personality_count['Count'],
                             direction='clockwise',
                             hovertemplate = "<b>%{label}</b> </br>Number of villagers: %{value} <br>% of villagers: %{percent}",
                             name='')])
fig.update_traces(textposition='inside')
fig.update_layout(title='Villager personality distribution', legend_title='Personality')
fig.show()

In [10]:
## Creating a dataframe of personality counts by species
species_personality = villagers.groupby(['Species', 'Personality']).size().sort_values(ascending=False).reset_index(name='Count')
species_personality.head(10)

Unnamed: 0,Species,Personality,Count
0,Rabbit,Peppy,8
1,Dog,Lazy,6
2,Frog,Jock,5
3,Wolf,Cranky,5
4,Cat,Snooty,5
5,Cat,Peppy,5
6,Bear,Cranky,5
7,Squirrel,Normal,5
8,Mouse,Peppy,4
9,Rabbit,Lazy,4


In [11]:
## Visualising personality types by species
fig = px.bar(species_personality, x='Species', y='Count', color='Personality', title='Personality type by species',
             labels={"Count": "Number of villagers"},
             hover_name='Species', hover_data={'Personality':True, 'Species':False, 'Count':True},
             category_orders={'Personality': ['Lazy', 'Normal', 'Cranky', 'Snooty', 'Jock', 'Peppy', 'Smug', 'Big Sister']})
fig.update_xaxes(categoryorder='total descending')
fig.show()

## Villager hobbies

In [12]:
## Creating a dataframe of hobby counts
hobby_count = pd.DataFrame(villagers['Hobby'].value_counts().reset_index(name='Count'))
hobby_count

Unnamed: 0,Hobby,Count
0,Nature,66
1,Fitness,66
2,Fashion,66
3,Play,65
4,Education,64
5,Music,64


In [13]:
## Visualising villager hobby distribution
fig = go.Figure(data=[go.Pie(labels=hobby_count['Hobby'], 
                             values=hobby_count['Count'], 
                             direction='clockwise',
                             hovertemplate = '<b>%{label}</b> </br>Number of villagers: %{value} <br>% of villagers: %{percent}',
                             name='')])
fig.update_traces(textposition='inside')
fig.update_layout(title='Villager hobby distribution', legend_title='Hobby')
fig.show()

In [14]:
## Creating a dataframe of hobby counts by species
species_hobby = villagers.groupby(['Species', 'Hobby']).size().sort_values(ascending=False).reset_index(name='Count')
species_hobby.head(10)

Unnamed: 0,Species,Hobby,Count
0,Gorilla,Fitness,9
1,Rabbit,Fashion,8
2,Cat,Music,7
3,Cub,Play,6
4,Sheep,Fashion,6
5,Frog,Fitness,6
6,Dog,Play,6
7,Duck,Fashion,6
8,Hippo,Education,5
9,Rabbit,Nature,5


In [15]:
## Visualising hobbies by species
fig = px.bar(species_hobby, x='Species', y='Count', color='Hobby', title='Hobbies by species',
             labels={"Count": "Number of villagers"},
             hover_name='Species', hover_data={'Hobby':True, 'Species':False, 'Count':True},
             category_orders={'Hobby': ['Nature', 'Fitness', 'Fashion', 'Play', 'Education', 'Music']})
fig.update_xaxes(categoryorder='total descending')
fig.show()

## Hobby and personality combinations

In [16]:
## Creating a dataframe of personality and hobby combinations
personality_hobby = villagers.groupby(['Personality', 'Hobby']).size().sort_values(ascending=False).reset_index(name='Count')
personality_hobby.head(10)

Unnamed: 0,Personality,Hobby,Count
0,Jock,Fitness,36
1,Snooty,Fashion,32
2,Lazy,Play,32
3,Peppy,Fashion,31
4,Normal,Nature,24
5,Lazy,Nature,23
6,Normal,Education,22
7,Cranky,Education,19
8,Jock,Play,15
9,Smug,Music,14


In [17]:
## Visualising personality and hobby combinations
fig = px.bar(personality_hobby, x='Personality', y='Count', color='Hobby', title='Hobbies by personality type',
             labels={"Count": "Number of villagers"},
             hover_name='Personality', hover_data={'Hobby':True, 'Personality':False, 'Count':True},
             category_orders={'Hobby': ['Nature', 'Fitness', 'Fashion', 'Play', 'Education', 'Music']})
fig.update_traces(width=0.5)
fig.update_xaxes(categoryorder='total descending')
fig.show()

In [18]:
## Sunburst chart
fig = px.sunburst(personality_hobby, path=['Personality', 'Hobby'], values='Count', title='Hobbies by personality type',
                color='Hobby', color_discrete_map={'(?)':'#B6E880', 'Nature':'#636EFA', 'Fitness':'#EF553B', 'Fashion':'#00CC96', 'Play':'#AB63FA', 'Education':'#FFA15A', 'Music': '#19D3F3'})
fig.update_traces(insidetextorientation='radial', hovertemplate='%{label}: %{value}', sort=True)
fig.show()

## Most common combinations

In [19]:
## Creating a dataframe of combinations of species, personality, and hobby
combination = villagers.groupby(['Species', 'Personality', 'Hobby']).size().sort_values(ascending=False).reset_index(name='Count')
combination.head(10)

Unnamed: 0,Species,Personality,Hobby,Count
0,Rabbit,Peppy,Fashion,6
1,Dog,Lazy,Play,5
2,Cub,Lazy,Play,4
3,Duck,Snooty,Fashion,4
4,Frog,Jock,Fitness,4
5,Mouse,Jock,Fitness,3
6,Cat,Lazy,Play,3
7,Rabbit,Lazy,Nature,3
8,Cub,Normal,Nature,3
9,Sheep,Snooty,Fashion,3
