In [1]:
from visualisation.vis_utils.read_csv_to_df import df_from_csv
import pandas as pd

# read from ships file make a df
characters_df = df_from_csv("data/fifth_clean_up_data/stage_5_characters.csv")
print(characters_df.columns)
ships_df = df_from_csv("data/fifth_clean_up_data/stage_5_ships.csv")
print(ships_df.columns)

Index(['given_name', 'middle_name', 'maiden_name', 'surname', 'alias',
       'nickname', 'title_prefix', 'title_suffix', 'name_order', 'full_name',
       'fandom', 'gender', 'race', 'rpf_or_fic'],
      dtype='object')
Index(['slash_ship', 'gen_ship', 'members_no', 'fandom', 'rpf_or_fic',
       'gender_combo', 'race_combo', 'member_1', 'member_2', 'member_3',
       'member_4'],
      dtype='object')


In [2]:
# we need to join these to get a df that has a row per each char per each ship
# -> so we can count the rows per each character!

# getting only columns we need (for now)
character_columns_df = characters_df.copy().get(
    ['full_name','fandom', 'gender', 'race']
)
ship_columns_df = ships_df.copy().get(
    [
        'slash_ship', 'members_no', 'fandom', 
        'rpf_or_fic', 'gender_combo', 'race_combo', 
        'member_1', 'member_2', 'member_3', 'member_4'
    ]
)

# adding count of ships per fandom before joining characters
ships_per_fandom_df = ship_columns_df.copy().get(
    ['fandom', 'slash_ship',]
).groupby("fandom").count().rename(columns={"slash_ship":"no_of_ships"})
ship_columns_df = ship_columns_df.join(
    other=ships_per_fandom_df, 
    on="fandom", 
    rsuffix="_right", 
    lsuffix="_left"
)

# add a column to char df with concat tag!
character_columns_df["name_tag"] = character_columns_df["fandom"] + " - " + character_columns_df["full_name"]
character_columns_df = character_columns_df.set_index(character_columns_df["name_tag"])
#print(character_columns_df.head())

# make temp df for each member position
member_1_df = ship_columns_df.join(
    other=character_columns_df, 
    on="member_1", 
    rsuffix="_right", 
    lsuffix="_left"
)
member_2_df = ship_columns_df.join(
    other=character_columns_df, 
    on="member_2", 
    rsuffix="_right", 
    lsuffix="_left"
)
member_3_df = ship_columns_df.join(
    other=character_columns_df, 
    on="member_3", 
    rsuffix="_right", 
    lsuffix="_left"
)
member_4_df = ship_columns_df.join(
    other=character_columns_df, 
    on="member_4", 
    rsuffix="_right", 
    lsuffix="_left"
)

# -> combine all rows into one big df 
full_character_df = pd.concat([member_1_df, member_2_df, member_3_df, member_4_df])

# drop "member" columns now that they're joined
full_character_df.pop("member_1")
full_character_df.pop("member_2")
full_character_df.pop("member_3")
full_character_df.pop("member_4")
# drop duplicate "fandom" column
full_character_df.pop("fandom_right")

# remove any none value rows from 3 & 4 if you haven't yet 
full_character_df = full_character_df.dropna().rename(
    columns={"fandom_left": "fandom"}
).sort_values(by="fandom")

full_character_df.tail()


Unnamed: 0,slash_ship,members_no,fandom,rpf_or_fic,gender_combo,race_combo,no_of_ships,full_name,gender,race,name_tag
597,Katsuki Yuuri x Victor Nikiforov,2,Yuri!!! on ICE | ユーリ!!! on ICE,fictional,M / M,E Asian / White,3,Victor Nikiforov,M,White,Yuri!!! on ICE | ユーリ!!! on ICE - Victor Nikiforov
597,Katsuki Yuuri x Victor Nikiforov,2,Yuri!!! on ICE | ユーリ!!! on ICE,fictional,M / M,E Asian / White,3,Katsuki Yuuri,M,E Asian,Yuri!!! on ICE | ユーリ!!! on ICE - Katsuki Yuuri
598,Mila Babicheva x Sara Crispino,2,Yuri!!! on ICE | ユーリ!!! on ICE,fictional,F / F,White / Latin,3,Mila Babicheva,F,White,Yuri!!! on ICE | ユーリ!!! on ICE - Mila Babicheva
599,Otabek Altin x Yuri Plisetsky,2,Yuri!!! on ICE | ユーリ!!! on ICE,fictional,M / M,Central As / White,3,Otabek Altin,M,Central As,Yuri!!! on ICE | ユーリ!!! on ICE - Otabek Altin
598,Mila Babicheva x Sara Crispino,2,Yuri!!! on ICE | ユーリ!!! on ICE,fictional,F / F,White / Latin,3,Sara Crispino,F,Latin,Yuri!!! on ICE | ユーリ!!! on ICE - Sara Crispino


In [39]:
# hottest character (character in most ships in fandom)

# we need: 
hottest_df = full_character_df.copy().where(
    cond=full_character_df["no_of_ships"] > 1
).get(
    ["fandom", "full_name", "slash_ship", "gender", "race"]
)

# renaming the doctor & gender diff player characters as they are the same character
renaming_dict = {}
reassigning_dict = {}
for doctor in [
    "The Eleventh Doctor",
    "The Ninth Doctor",
    "The Tenth Doctor",
    "The Thirteenth Doctor",
    "The Twelfth Doctor",
]:
    renaming_dict[doctor] = "The Doctor"
for pc in [
    "Hawke (Female) | Player Character",
    "Inquisitor (Female) | Player Character",
    "Warden (Female) | Player Character",
    "Shepard (Female) | Player Character",
    "Shepard (Male) | Player Character",
]:
    if "Hawke" in pc:
        renaming_dict[pc] = "Hawke | Player Character"
    elif "Inquisitor" in pc:
        renaming_dict[pc] = "Inquisitor | Player Character"
    elif "Warden" in pc:
        renaming_dict[pc] = "Warden | Player Character"
    elif "Shepard" in pc:
        renaming_dict[pc] = "Shepard | Player Character"

# and setting their genders as ambig bc they vary but we want to count them as one here
hottest_df["gender"] = hottest_df["gender"].mask(
    cond=(
        (hottest_df["full_name"].str.contains("Female", na=False)
        ) | (hottest_df["full_name"].str.contains("Male", na=False)
        ) | (hottest_df["full_name"].str.contains(" Doctor", na=False))
        ),
    other="Ambig"
)
hottest_df["full_name"] = hottest_df["full_name"].replace(to_replace=renaming_dict)

# group by fandom, by characters, count characters
hottest_df = hottest_df.groupby(
    ["fandom", "full_name", "gender", "race"]
).count().rename(
    columns={"slash_ship":"no_of_ships_they_in"}
).reset_index().sort_values(
    by=["fandom", "no_of_ships_they_in"],
    ascending=False
)

# figuring out which fandoms' characters are all tied for ship numbers
# unique_fandoms = hottest_df["fandom"].unique()
# tied_fandoms = []
# for fandom in unique_fandoms:
#     fandom_group = hottest_df.where(
#         cond=hottest_df["fandom"] == fandom
#     ).sort_values(by="no_of_ships_they_in").dropna()
#     if fandom_group["no_of_ships_they_in"].max() == fandom_group["no_of_ships_they_in"].min() and \
#     fandom_group.shape[0] > 1 and fandom_group["no_of_ships_they_in"].max() > 1:
#         tied_fandoms.append(fandom)
# print(tied_fandoms) # -> only ['Carmilla', 'Amphibia']

# removing all chars that are in less than 3 ships, and any fandoms where all chars are tied
hottest_df = hottest_df.where(
    cond=(hottest_df["no_of_ships_they_in"] > 2) & (
    hottest_df["fandom"] != 'Carmilla') & (
    hottest_df["fandom"] != 'Amphibia')
).dropna()
#hottest_df # currently only has 62 rows!

unique_fandoms = hottest_df["fandom"].unique()
hottest_chars_by_ship_no_dict = {}
for fandom in unique_fandoms:
    hottest_chars_by_ship_no_dict[fandom] = {}
    fandom_group = hottest_df.where(
        cond=hottest_df["fandom"] == fandom
    ).sort_values(by="no_of_ships_they_in").dropna()
    for num in [3,4,5,6,7,8]:
        char_rank_list = list(fandom_group["full_name"].where(
            fandom_group["no_of_ships_they_in"] == num
        ).dropna())
        if len(char_rank_list) > 0:
            hottest_chars_by_ship_no_dict[fandom][num] = char_rank_list

hottest_chars_by_ship_no = pd.DataFrame(hottest_chars_by_ship_no_dict).sort_index(ascending=False)

hottest_chars_dict = {}
for fandom in hottest_chars_by_ship_no.columns:
    all_chars = hottest_chars_by_ship_no[fandom].dropna()
    hottest_chars_dict[fandom] = []
    for index in all_chars.index:
        value = [
            index, # no of ships
            all_chars.loc[index] # ppl tied at that number
        ]
        hottest_chars_dict[fandom].append(value)

# hottest_chars_by_ship_no
hottest_chars_dict # first item in list is highest ranked!

rankings = {}
rank_lookup_dict = {
    1: "1st",
    2: "2nd",
    3: "3rd",
    4: "4th",
}
for fandom in hottest_chars_dict:
    count = 1
    length = len(hottest_chars_dict[fandom])
    rankings[fandom] = {}
    for rank in hottest_chars_dict[fandom]:
        rank_no = rank_lookup_dict[count]

        names_list = sorted(rank[1])
        no_of_ships = rank[0]
        names_str = names_list[0]
        if len(names_list) > 1:
            for name in names_list[1:]:
                names_str += " & " + name
            names_str += " (tied)"

        rankings[fandom][rank_no] = {
            "no_of_ships": no_of_ships,
            "names": names_str,
        }
        count += 1

# print(rankings)

rankings_columns = ["fandom", "rank", "names", "no_of_ships"]
rankings_list = []
for fandom in rankings:
    for rank in rankings[fandom]:
        temp_list = [
            fandom, 
            rank,
            rankings[fandom][rank]["names"],
            rankings[fandom][rank]["no_of_ships"]
        ]
        rankings_list.append(temp_list)

hottest_rank_df = pd.DataFrame(
    data=rankings_list, 
    columns=rankings_columns
).sort_values(by=["fandom", "rank"])
hottest_rank_df


Unnamed: 0,fandom,rank,names,no_of_ships
39,A Song of Ice and Fire / Game of Thrones Universe,1st,Sansa Stark,3
38,Adam Lambert,1st,Adam Lambert,3
35,Bangtan Boys / BTS,1st,Min Yoongi | Suga,5
36,Bangtan Boys / BTS,2nd,Jeon Jungkook | Jungkook & Park Jimin | Jimin ...,4
37,Bangtan Boys / BTS,3rd,Kim Namjoon | Rap Monster/RM & Kim Taehyung | ...,3
33,Buffy Universe,1st,Buffy Summers,4
34,Buffy Universe,2nd,Spike,3
31,DC,1st,Alexandra 'Alex' Danvers | Supergirl/Sentinel,5
32,DC,2nd,Clark Kent | Superman & Kara Danvers/Zor-El | ...,3
30,Doctor Who,1st,The Doctor,6


In [4]:
# plotly imports

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [60]:
# visualise hottest characters:

fig = go.Figure(data=[
    go.Table(
        header=dict(
            values=list(hottest_rank_df.columns),
            align='left',
            line_color='slategrey',
            fill_color='skyblue',
        ),
        cells=dict(
            values=[hottest_rank_df["fandom"], hottest_rank_df["rank"], hottest_rank_df["names"], hottest_rank_df["no_of_ships"],],
            align='left',
            line_color='slategrey',
            fill_color='aliceblue',
        )
    )
])
fig.show() 

# I can't save the whole thing from here, 
# TODO: will need to look into kaleido smh 
# (and then we might as well put all of this into python files smh)