In [None]:
import requests, json
from bs4 import BeautifulSoup
def filterDeckJson(deckJson):
    filteredJson = {}
    for deckCategory in ['main', 'extra']:
        filteredJson[deckCategory] = list(set([card['card']['name'] for card in deckJson[deckCategory]]))
    filteredJson['engines'] = [engine['name'] for engine in deckJson['engines']]
    filteredJson['deckType'] = deckJson['deckType']['name']
    filteredJson['srPrice'] = deckJson['srPrice']
    filteredJson['urPrice'] = deckJson['urPrice']
    filteredJson["power"] = deckJson["deckType"].get("tournamentPower", 0)
    filteredJson['fullDeck'] = list(set(filteredJson['main'] + filteredJson['extra']))
    
    return filteredJson

url = "https://www.masterduelmeta.com/top-decks"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
script_tag = soup.find('script', {'sveltekit:data-url': '/api/v1/top-decks?created[$gte]=(days-14)&fields=-__v&limit=0', 'type': 'application/json'})

allDecksJson = json.loads(json.loads(script_tag.text)['body'])
filteredDecks = [filterDeckJson(deckJson) for deckJson in allDecksJson]

In [None]:
deckTypePowerDict = dict([(deck['deckType'], deck['power']) for deck in filteredDecks])

In [None]:
import itertools

listOfDecklists = [list(set([card for card in filteredDeck['fullDeck']])) for filteredDeck in filteredDecks]
uniqueCardsSeenSet = set(itertools.chain.from_iterable(listOfDecklists))
PCardGivenCardAndDeck = {deckType : {} for deckType in deckTypePowerDict.keys()}
cardCounter = {deckType : {} for deckType in deckTypePowerDict.keys()}
PCardGivenDeck = {deckType : {} for deckType in deckTypePowerDict.keys()}
deckTypeCounter = {deckType : 0 for deckType in deckTypePowerDict.keys()}

In [None]:
from tqdm import tqdm
for deckList in filteredDecks:
    deckTypeCounter[deckList['deckType']] += 1
    for conditionCard in deckList['fullDeck']:
        
        if PCardGivenCardAndDeck[deckList['deckType']].get(conditionCard) is None:
            PCardGivenCardAndDeck[deckList['deckType']][conditionCard] = {}
            
        for givenConditionCard in deckList['fullDeck']:
            if PCardGivenCardAndDeck[deckList['deckType']][conditionCard].get(givenConditionCard) is None:
                PCardGivenCardAndDeck[deckList['deckType']][conditionCard][givenConditionCard] = 0
            PCardGivenCardAndDeck[deckList['deckType']][conditionCard][givenConditionCard] += 1

        if cardCounter[deckList['deckType']].get(conditionCard) is None:
            cardCounter[deckList['deckType']][conditionCard] = 0
        cardCounter[deckList['deckType']][conditionCard] += 1

for deckType in tqdm(PCardGivenCardAndDeck.keys()):
    for conditionCard in PCardGivenCardAndDeck[deckType].keys():
        PCardGivenDeck[deckType][conditionCard] = cardCounter[deckType][conditionCard] / deckTypeCounter[deckType]
        for givenConditionCard in PCardGivenCardAndDeck[deckType][conditionCard].keys():
            PCardGivenCardAndDeck[deckType][conditionCard][givenConditionCard] /= cardCounter[deckType][conditionCard]

In [None]:
import graphistry
# You will need to register your graphistry api key here

In [None]:
import pandas as pd

deckType = "Voiceless Voice"
print(deckType)

df = pd.DataFrame.from_dict(PCardGivenCardAndDeck[deckType], orient='index').fillna(0)
nodes = pd.DataFrame()
nodes['id'] = df.columns
nodes['name'] = df.columns

nodes['node_weight'] = nodes['name'].apply(lambda name : PCardGivenDeck[deckType][name])

sources = []
targets = []
values = []

for conditionCard in PCardGivenCardAndDeck[deckType].keys():
    for givenConditionCard in PCardGivenCardAndDeck[deckType][conditionCard].keys():
        value = PCardGivenCardAndDeck[deckType][conditionCard][givenConditionCard]
        if value > 0 and conditionCard != givenConditionCard:
            sources.append(conditionCard)
            targets.append(givenConditionCard)
            values.append(value)

links = pd.DataFrame({'source' : sources, "target" : targets, "value" : values})

URL_PARAMS = {'play': 15000, 'edgeCurvature': 0.1, 'precisionVsSpeed': -3, "edgeOpacity" : 0.05, "pointsOfInterestMax" : 60}

g = graphistry.bind(source="source", 
                    destination="target", 
                    edge_weight="value",
                       node="id",
                   point_size="node_weight",
                    point_title="name",
                   point_label="name").settings(url_params=URL_PARAMS).edges(links).nodes(nodes)
g.plot()

In [None]:
import numpy as np

deckTourneyPowerArr = np.array(list(deckTypePowerDict.values()))

reweightedTourneyPowerArr = deckTourneyPowerArr


reweightedTourneyPowerArr = deckTourneyPowerArr * (0.9 / deckTourneyPowerArr.sum())

nonTourneyWeight = 0.1 / np.count_nonzero(deckTourneyPowerArr == 0)

reweightedTourneyPowerArr[reweightedTourneyPowerArr == 0] = nonTourneyWeight
PDeck = dict(zip(deckTypePowerDict.keys(), reweightedTourneyPowerArr))


In [None]:
from tqdm import tqdm
PCard = {}

for card in uniqueCardsSeenSet:
    PCard[card] = 0
    for deck in PDeck.keys():
        PCard[card] += PCardGivenDeck[deck].get(card, 0) * PDeck[deck]

PDeckGivenCard = {}
for card in uniqueCardsSeenSet:
    PDeckGivenCard[card] = {}
    for deck in PDeck.keys():
        PDeckGivenCard[card][deck] = PCardGivenDeck[deck].get(card, 0) * PDeck[deck] / PCard[card]
        


In [None]:
PCardGivenCard = {}

'''
Use the chatgpt optimized version in the cell below which somehow magically does the same thing but about 10x faster

for conditionCard in tqdm(uniqueCardsSeenSet):
    PCardGivenCard[conditionCard] = {}
    for givenConditionCard in uniqueCardsSeenSet:
        PCardGivenCard[conditionCard][givenConditionCard] = 0
        for deck in PDeck.keys():
            try:
                PCardGivenCard[conditionCard][givenConditionCard] += PCardGivenCardAndDeck[deck][conditionCard][givenConditionCard] * PDeckGivenCard[conditionCard][deck]
            except KeyError:
                pass'''

In [None]:
card_to_index = {card: idx for idx, card in enumerate(uniqueCardsSeenSet)}
index_to_card = {idx: card for card, idx in card_to_index.items()}
deck_to_index = {deck: idx for idx, deck in enumerate(PDeck.keys())}

# Step 2: Initialize NumPy arrays
num_cards = len(uniqueCardsSeenSet)
num_decks = len(PDeck)

# Initialize the probability arrays with distinct names
PCardGivenCard_array = np.zeros((num_cards, num_cards))
PDeckGivenCard_array = np.zeros((num_cards, num_decks))
PCardGivenCardAndDeck_array = np.zeros((num_decks, num_cards, num_cards))

# Populate the PDeckGivenCard_array using existing PDeckGivenCard data
for card, deck_probs in PDeckGivenCard.items():
    card_idx = card_to_index[card]
    for deck, prob in deck_probs.items():
        deck_idx = deck_to_index[deck]
        PDeckGivenCard_array[card_idx, deck_idx] = prob

# Populate the PCardGivenCardAndDeck_array using existing PCardGivenCardAndDeck data
for deck, condition_probs in PCardGivenCardAndDeck.items():
    deck_idx = deck_to_index[deck]
    for condition_card, given_probs in condition_probs.items():
        condition_idx = card_to_index[condition_card]
        for given_card, value in given_probs.items():
            given_idx = card_to_index[given_card]
            PCardGivenCardAndDeck_array[deck_idx, condition_idx, given_idx] = value

# Step 3: Vectorized multiplication and summation to populate PCardGivenCard_array
for condition_idx in tqdm(range(num_cards)):
    PCardGivenCard_array[condition_idx, :] = (
        PCardGivenCardAndDeck_array[:, condition_idx, :] * PDeckGivenCard_array[condition_idx, :, np.newaxis]
    ).sum(axis=0)

# Convert the result back to a nested dictionary
PCardGivenCard_dict = {
    index_to_card[condition_idx]: {
        index_to_card[given_idx]: PCardGivenCard_array[condition_idx, given_idx]
        for given_idx in range(num_cards)
    }
    for condition_idx in range(num_cards)
}

PCardGivenCard_dict

In [None]:
PCardGivenCard = PCardGivenCard_dict

In [None]:
import pandas as pd

uniqueCardsList = list(uniqueCardsSeenSet)

nodes = pd.DataFrame()
nodes['id'] = uniqueCardsList
nodes['name'] = uniqueCardsList

nodes['node_weight'] = nodes['name'].apply(lambda name : PCard[name])

sources = []
targets = []
values = []

for conditionCard in tqdm(uniqueCardsSeenSet):
    for givenConditionCard in uniqueCardsSeenSet:
        value = PCardGivenCard[conditionCard][givenConditionCard]
        if value > 0 and conditionCard != givenConditionCard:
            sources.append(conditionCard)
            targets.append(givenConditionCard)
            values.append(value)

links = pd.DataFrame({'source' : sources, "target" : targets, "value" : values})

d = dict(links.drop(columns=["source"]).groupby("target").mean()['value'])
links["target_mean"] = links['target'].apply(lambda name : d[name])

links['my_color'] = links['value'] - links['target_mean']
links['my_color'] = links['my_color'] - links['my_color'].min()
links['my_color'] = links['my_color'] / links['my_color'].max()

URL_PARAMS = {'play': 15000, 'edgeCurvature': 0.1, 'precisionVsSpeed': -3, "edgeOpacity" : 0.02, "pointsOfInterestMax" : 5}

g = graphistry.bind(
    source="source", 
    destination="target", 
    edge_weight="value",
    node="id",
    point_size="node_weight",
    point_title="name",
    point_label="name",
    )
g.encode_edge_color('my_color', palette=['#39FF14', 'gray'], as_continuous=True).settings(url_params=URL_PARAMS).edges(links).nodes(nodes).plot()

In [None]:
import pandas as pd
import networkx as nx
from cdlib import algorithms
# Step 1: Create a directed graph from the edge DataFrame
G = nx.from_pandas_edgelist(links, source='source', target='target', edge_attr='value', create_using=nx.DiGraph)

# Step 2: Apply community detection (Leiden algorithm)
# Convert graph to a format compatible with cdlib
communities = algorithms.leiden(G, weights='value')

# Step 3: Map communities back to the nodes
# Extract communities (a list of lists with node IDs)
community_mapping = {}
for idx, community in enumerate(communities.communities):
    for node in community:
        community_mapping[node] = idx

# Step 4: Add the community labels to the nodes DataFrame
nodes['my_community'] = nodes['id'].map(community_mapping)

#nodes["my_community"] = np.minimum(nodes['my_community'], 17)

In [None]:
import seaborn as sns

def rgb_to_hex(rgb_tuple):
    # Convert RGB values from range 0-1 to 0-255
    rgb_255 = tuple(int(255 * x) for x in rgb_tuple)
    # Format the values as a hex string
    return "#{:02X}{:02X}{:02X}".format(*rgb_255)

numberColors = nodes['my_community'].max() + 1
palette = sns.color_palette("bright", n_colors=numberColors)

paletteDict = dict(zip(
    [str(k) for k in range(numberColors)], 
    [rgb_to_hex(rgb_tuple) for rgb_tuple in palette]))
paletteDict

In [None]:
g2 = g.encode_point_color("my_community", categorical_mapping = paletteDict)
g2.settings(url_params=URL_PARAMS).edges(links).nodes(nodes).plot()

In [None]:
nodes["node_weight"] = nodes['node_weight'] + 0.1

In [None]:
nodes