In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import missingno as msno
import re

# Getting data from Nookipedia API

In [None]:
import requests
query = {'api_key':'dec79279-a19e-4691-bd8a-3035d3fbbaa7', 'nhdetails': True}
response = requests.get("https://api.nookipedia.com/villagers?game=nh&nhdetails=true", query)
api_df = response.json()

In [None]:
anch = [[villager["name"], villager["gender"], villager["species"], villager["personality"], villager["sign"], villager["nh_details"]["hobby"], len(villager['appearances']) - 1] for villager in api_df]

In [None]:
anch = pd.DataFrame(anch, columns=["name", "gender", "species", "personality", "sign", "hobby", "n_appearances"])
#anch.set_index("name", inplace=True)

In [None]:
anch.loc[anch["personality"] == "Big sister", "personality"] = "Sisterly"

# Computing villagers' personality compatibility matrix

In [None]:
personality_compatibility = pd.read_csv("data/compatibility_personality.csv")
sign_compatibility = pd.read_csv("data/compatibility_sign.csv")
species_compatibility = pd.read_csv("data/compatibility_species.csv")

In [None]:
compatibility_matrix = pd.DataFrame(np.zeros((len(anch), len(anch))), columns=anch["name"], index=anch["name"])

for villager1 in compatibility_matrix.index:
    for villager2 in compatibility_matrix.columns:
        
        #personality
        villager1_personality = anch[anch["name"] == villager1].personality.values[0]
        villager2_personality = anch[anch["name"] == villager2].personality.values[0]
        
        #sign
        villager1_sign = anch[anch["name"] == villager1].sign.values[0]
        villager2_sign = anch[anch["name"] == villager2].sign.values[0]
        
        #species
        villager1_species = anch[anch["name"] == villager1].species.values[0]
        villager2_species = anch[anch["name"] == villager2].species.values[0]
        
        compatibility = personality_compatibility[villager1_personality][villager2_personality] + \
                        sign_compatibility[villager1_sign][villager2_sign] 
        
        if (villager1_species not in species_compatibility.columns.values) or ((villager2_species not in species_compatibility.columns.values)):
            if villager1_species == villager2_species:
                compatibility += '♦'
            else:
                compatibility += '♣'
        else:
            compatibility += species_compatibility[villager1_species][villager1_species]

        compatibility_matrix.loc[villager1,villager2] = compatibility

In [None]:
def good_compatibility(s):
    if '♥' in s:
        if '♦' in s:
            if '♣' in s:
                return True
            elif '♦' in s:
                return True
    return False

villagers_compatibility = compatibility_matrix.copy()

for i in compatibility_matrix.index:
    for j in compatibility_matrix.columns:
        compatibility = villagers_compatibility.loc[i,j]
        if compatibility.count('♥') >= 2 or good_compatibility(compatibility):
            villagers_compatibility.loc[i,j] = 2
        elif compatibility.count('✖') >= 2:
            villagers_compatibility.loc[i,j] = 0
        else:
            villagers_compatibility.loc[i,j] = 1

# Adding Tier/Rank data


In [None]:
tier_rank = pd.read_csv("data/acnh_villager_rank_data.csv")
villagers = anch

In [None]:
#How much villagers in tier_rank df are not represented in the villagers df
tier_rank["name"][tier_rank["name"].isin(villagers["name"]) == False].index.value_counts().sum()

In [None]:
#We correct some missmatching names
tier_rank['name'] = tier_rank['name'].replace(['OHare'],"O\'Hare")
tier_rank['name'] = tier_rank['name'].replace(['Buck(Brows)'],"Buck")
tier_rank['name'] = tier_rank['name'].replace(['Renee'],"Renée")
tier_rank['name'] = tier_rank['name'].replace(['WartJr'],"Wart Jr.")
tier_rank['name'] = tier_rank['name'].replace(['Crackle(Spork)'],"Spork")

In [None]:
#Assess if all the villagers in the villagers df are included in the tier_rank df
tier_rank["name"][tier_rank["name"].isin(villagers["name"])].index.value_counts().sum() == villagers.shape[0]

True

In [None]:
#We drop villagers that are not in our tier_rank dataframe
tier_rank.drop(tier_rank["name"][tier_rank["name"].isin(villagers["name"]) == False].index)

In [None]:
#We join both dataframes
villagers.set_index('name', drop=True, inplace=True)
tier_rank.set_index('name', drop=True, inplace=True)

In [None]:
villagers = villagers.join(tier_rank)

In [None]:
villagers.sort_values(["tier","rank"], inplace=True)
villagers["rank"] = np.arange(1,villagers.shape[0]+1)

# Saving to CSV

In [None]:
anch.to_csv("data/acnh_villager.csv")
compatibility_matrix.to_csv("data/compatibility_villagers.csv")
villagers_compatibility.to_csv("data/compatibility_matrix.csv")