In [10]:
import pandas as pd
import numpy as np

pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', 100)

# not scientific numbers
pd.set_option('display.float_format', '{:.5f}'.format)
np.set_printoptions(suppress=True)


In [11]:
villagers_df = pd.read_csv("./data/villagers.csv")
villagers_df.columns

Index(['Name', 'Species', 'Gender', 'Personality', 'Hobby', 'Birthday',
       'Catchphrase', 'Favorite Song', 'Style 1', 'Style 2', 'Color 1',
       'Color 2', 'Wallpaper', 'Flooring', 'Furniture List', 'Filename',
       'Unique Entry ID'],
      dtype='object')

In [12]:
villagers_df["Species"].value_counts()

Species
Cat          23
Rabbit       20
Squirrel     18
Frog         18
Duck         17
Dog          16
Cub          16
Bear         15
Pig          15
Horse        15
Mouse        15
Bird         13
Penguin      13
Sheep        13
Wolf         11
Elephant     11
Deer         10
Ostrich      10
Gorilla       9
Chicken       9
Eagle         9
Koala         9
Goat          8
Kangaroo      8
Monkey        8
Hamster       8
Alligator     7
Lion          7
Anteater      7
Hippo         7
Tiger         7
Bull          6
Rhino         6
Cow           4
Octopus       3
Name: count, dtype: int64

In [13]:
villagers_df["Personality"].value_counts()

Personality
Lazy          60
Normal        59
Snooty        55
Cranky        55
Jock          55
Peppy         49
Smug          34
Big Sister    24
Name: count, dtype: int64

In [14]:
species_count_df = pd.DataFrame(villagers_df['Personality'].value_counts()).reset_index()
species_count_df.columns = ['Personality', 'Amount']
species_count_df


Unnamed: 0,Personality,Amount
0,Lazy,60
1,Normal,59
2,Snooty,55
3,Cranky,55
4,Jock,55
5,Peppy,49
6,Smug,34
7,Big Sister,24


# extra data

In [15]:
reactions_df = pd.read_csv("./data/reactions.csv")
reactions_df

Unnamed: 0,Name,Source,Source Notes,Internal ID,Unique Entry ID
0,Aggravation,Peppy,,,6suhKYAAhh5QMKt64
1,Agreement,Cranky,,,7smqwK9J7eFBxta28
2,Amazed,Snooty,,,vosig4bbr4kPq57AT
3,Apologetic,Big Sister,,,jBKsQYCAd7PtymQeB
4,Bashfulness,Lazy,,,wzvKbFrEJCseTohoz
5,Bewilderment,Cranky,,,WRSSRZMtM6rXfXBrR
6,Cold Chill,Big Sister,,,K62u2owKFDLmPCB3Q
7,Confident,Big Sister,Requires a high level of friendship,,TGifHTbyLBh4f3gDQ
8,Curiosity,Peppy,,,SskgipgnKzm5TsK7d
9,Daydreaming,Normal,Requires a high level of friendship,,JFrTBJCEaX5R4kABT


In [16]:
#Drop the columns we dont want
new_reactions_df = reactions_df.dropna(axis=1, how='all')

#cleadned df
print(new_reactions_df.head())

          Name      Source Source Notes    Unique Entry ID
0  Aggravation       Peppy          NaN  6suhKYAAhh5QMKt64
1    Agreement      Cranky          NaN  7smqwK9J7eFBxta28
2       Amazed      Snooty          NaN  vosig4bbr4kPq57AT
3   Apologetic  Big Sister          NaN  jBKsQYCAd7PtymQeB
4  Bashfulness        Lazy          NaN  wzvKbFrEJCseTohoz


In [17]:
new_reactions_df['Source'].unique()

array(['Peppy', 'Cranky', 'Snooty', 'Big Sister', 'Lazy', 'Normal', 'All',
       'Jock', 'Smug'], dtype=object)

## match with 'source' with 'personality' from villagers df

In [18]:
new_reactions_df = new_reactions_df.rename(columns= {'Source':'Personality'})
new_reactions_df['Personality'] = new_reactions_df['Personality'].str.strip()
new_reactions_df['Personality'].unique()

array(['Peppy', 'Cranky', 'Snooty', 'Big Sister', 'Lazy', 'Normal', 'All',
       'Jock', 'Smug'], dtype=object)

# merge df's by column with same name

In [19]:
# Merging datasets to combine relevant information
merged_df = pd.merge(species_count_df, new_reactions_df)
merged_df = merged_df.rename(columns={"Name" : "Reaction"})

merged_df

Unnamed: 0,Personality,Amount,Reaction,Source Notes,Unique Entry ID
0,Lazy,60,Bashfulness,,wzvKbFrEJCseTohoz
1,Lazy,60,Mistaken,,DPR7WWkmR36Bsf2LG
2,Lazy,60,Pride,Requires a high level of friendship,xKoQpdJMtocNFRckm
3,Lazy,60,Shyness,,9H7jt7AGKbh6mW4y8
4,Lazy,60,Sorrow,,W8nngMS98uwpHHW4m
5,Normal,59,Daydreaming,Requires a high level of friendship,JFrTBJCEaX5R4kABT
6,Normal,59,Fearful,,noKrWYCMSWcH4jwvi
7,Normal,59,Glee,,8nAwdhzZfotqpLC9R
8,Normal,59,Pleased,,Z3MuKfQMaH3D8vkPA
9,Normal,59,Sadness,,Wo8q6KhPtL2CYwat4
