In [4]:
from src.util_functions.retrieve_data_from_csv import read_data_from_csv
import pandas as pd

# read from characters file
characters_filepath = "data/fifth_clean_up_data/stage_5_characters.csv"
characters_csv_data = read_data_from_csv(characters_filepath)

# make a df
characters_df = pd.DataFrame(characters_csv_data[1:], columns=characters_csv_data[0])
characters_df.columns

Index(['given_name', 'middle_name', 'maiden_name', 'surname', 'alias',
       'nickname', 'title_prefix', 'title_suffix', 'name_order', 'full_name',
       'fandom', 'gender', 'race', 'rpf_or_fic'],
      dtype='object')

In [59]:
# all-characters gender percentages (all time, entire set)

total_chars = characters_df.get(["full_name"]).count()
total_chars = total_chars.rename(index={"full_name":"total_num_of_characters"})
print(total_chars)

total_gender_percentages = characters_df.get(["full_name","gender"]).groupby("gender").count()
total_gender_percentages = total_gender_percentages.rename(columns={"full_name": "count"})
print(total_gender_percentages) # TO VISUALISE



total_num_of_characters    922
dtype: int64
               count
gender              
Ambig             12
F                367
F | Other         11
M                520
M | F | Other      4
M | Other          5
Other              2


In [25]:
# all-characters race percentages (all time, entire set)

total_race_percentages = characters_df.get(["full_name","race"]).groupby("race").count()
total_race_percentages = total_race_percentages.rename(columns={"full_name": "count"})
print(total_race_percentages) # TO VISUALISE



                          count
race                           
Af Lat                        4
Am Ind                        3
Am Ind / E Asian (Multi)      1
Ambig                        57
As Ind                        3
As Ind / S Asian (Multi)      1
Asian (Multi)                 3
Black                        13
Black (Multi)                 5
Central As                    1
E Asian                     163
E Asian (Multi)              13
Eu Ind (Multi)                2
Latin                        27
Latin (Multi)                 7
MENA                         11
MENA (Multi)                  2
Māori Ind                     1
Māori Ind (Multi)             3
N.H.                         35
Romani                        2
S Asian                       3
S Asian (Multi)               1
SE Asian                      9
SE Asian (Multi)              4
SE Eu                         1
SE Eu (Multi)                 1
Unknown                       5
White                       537
White (M

In [88]:
# find fandoms with lowest (aka no) racial diversity

total_fandoms = characters_df.get(["fandom"]).nunique()
total_fandoms = total_fandoms.rename(index={"fandom":"total_fandoms"})
# total is 210

racial_div_by_fandom = characters_df.get(
    ["full_name","fandom","race"]
).groupby(
    ["fandom", "race"]
).count()
racial_div_by_fandom = racial_div_by_fandom.rename(columns={"full_name": "count"})
#print(racial_div_by_fandom)

# we have 123 fandoms that have only one race in em... huh..
lowest_racial_div = racial_div_by_fandom.where(
    racial_div_by_fandom.groupby(racial_div_by_fandom.index.droplevel(1)).count() == 1
).count()
lowest_racial_div = lowest_racial_div.rename(index={"count": "fandoms_with_only_one_racial_group"})

plural_vs_monoracial_fandoms = pd.DataFrame([total_fandoms])
plural_vs_monoracial_fandoms.insert(
    loc=1, 
    column="fandoms_with_only_one_racial_group", 
    value=lowest_racial_div["fandoms_with_only_one_racial_group"])
plural_vs_monoracial_fandoms.insert(
    loc=2, 
    column="fandoms_with_multiple_racial_groups", 
    value=plural_vs_monoracial_fandoms["total_fandoms"] - plural_vs_monoracial_fandoms["fandoms_with_only_one_racial_group"]
)
plural_vs_monoracial_fandoms = plural_vs_monoracial_fandoms.transpose().rename(columns={0: "count"})

print(plural_vs_monoracial_fandoms) # TO VISUALISE!


                                     count
total_fandoms                          210
fandoms_with_only_one_racial_group     123
fandoms_with_multiple_racial_groups     87


In [85]:
# find fandoms with highest racial diversity (Genshin would likely be in here)

highest_racial_div = racial_div_by_fandom.where(
    racial_div_by_fandom.groupby(racial_div_by_fandom.index.droplevel(1)).count() > 1
).droplevel(1).dropna()
highest_racial_div = highest_racial_div.groupby(highest_racial_div.index).count().sort_values(
    by="count", ascending=False
).head(6) # everything else was under 5

print(highest_racial_div) # TO VISUALISE


                     count
fandom                    
Marvel                   8
The 100                  7
DC                       7
Star Wars                6
Teen Wolf                5
Genshin Impact | 原神      5


In [None]:
# how many male vs female vs other characters a fandom has on average

In [None]:
# how much racial diversity a fandom has on average 
# (how many groups, what percentage)