In [1]:
import re
from PIL import Image
import pytesseract
import pandas as pd
from collections import Counter
import numpy as np
from PIL import ImageGrab, Image

In [None]:
# Get the image from the clipboard
clipboard_image = ImageGrab.grabclipboard()

if isinstance(clipboard_image, ImageGrab.Image.Image):
    # Save the image as a PNG file
    clipboard_image.save("clipboard_image_3.png", "PNG")
    print("Image saved as 'columns.png'")
else:
    print("No image found in the clipboard!")

In [None]:
def cut_image(image_path, left_name, right_name):

    image = Image.open(image_path)

    # Get image dimensions
    width, height = image.size

    # Split the image vertically
    left_box = (0, 0, width // 2, height)
    right_box = (width // 2, 0, width, height)

    left_image = image.crop(left_box)
    right_image = image.crop(right_box)

    return left_image.save(f'{left_name}.png'), right_image.save(f'{right_name}.png')

cut_image('clipboard_image.png', 'left_1', 'left_2')
cut_image('clipboard_image_2.png', 'left_3', 'left_4')
cut_image('clipboard_image_3.png', 'left_5', 'left_6')

In [4]:
marvel_rivals_characters = {
    "Vanguards": [
        "BRUCE BANNER",
        "CAPTAIN AMERICA",
        "DOCTOR STRANGE",
        "GROOT",
        "MAGNETO",
        "PENI PARKER",
        "THOR",
        "VENOM"
    ],
    "Duelists": [
        "BLACK PANTHER",
        "BLACK WIDOW",
        "HAWKEYE",
        "HELA",
        "IRON FIST",
        "IRON MAN",
        "MAGIK",
        "MISTER FANTASTIC",
        "MOON KNIGHT",
        "NAMOR",
        "PSYLOCKE",
        "THE PUNISHER",
        "SCARLET WITCH",
        "SPIDER-MAN",
        "SQUIRREL GIRL",
        "STAR-LORD",
        "STORM",
        "WINTER SOLDIER",
        "WOLVERINE"
    ],
    "Strategists": [
        "ADAM WARLOCK",
        "CLOAK & DAGGER",
        "INVISIBLE WOMAN",
        "JEFF THE LAND SHARK",
        "LOKI",
        "LUNA SNOW",
        "MANTIS",
        "ROCKET RACCOON"
    ]
}

In [5]:
def get_text_from_image(image_path: str):
    return pytesseract.image_to_string(Image.open(image_path))

def parse_challenge_data(text: str):

    # 2. Time (e.g. "13D 4H")
    time_match = re.search(r'\b(\d+D\s*\d+H)\b', text)
    time_remaining = time_match.group(1) if time_match else None

    #    (Optional) If Tesseract sometimes splits "13D" into "13" + "D",
    #    you may want to parse them separately or do more robust checks.

    # 3. Objective: "Deal 15000 Damage" => verb, number, type
    objective_pattern = r'\b([A-Za-z]+)(?:\s+[a-zA-Z]+)?\s+(\d+)\s+(Damage|Enemies|Assists|KO Streak|Health)\b'
    objective_match = re.search(objective_pattern, text, re.IGNORECASE)
    if objective_match:
        objective_verb = objective_match.group(1)
        objective_number = objective_match.group(2)
        objective_type = objective_match.group(3)
    else:
        objective_verb = None
        objective_number = None
        objective_type = None

    # 4. Progress (two captures): "8457 /15000" => current, total
    progress_pattern = r'\b(\d+)\s*/\s*(\d+)\b'
    progress_match = re.search(progress_pattern, text)
    if progress_match:
        progress_current = progress_match.group(1)
        progress_total = progress_match.group(2)
    else:
        progress_current = None
        progress_total = None

    # 6. Exclude known numbers (objective, progress, time if recognized as digits)
    exclude_set = set()

    if objective_number:
        exclude_set.add(objective_number)
    if progress_current:
        exclude_set.add(progress_current)
    if progress_total:
        exclude_set.add(progress_total)

    # 4. Regex for hero names (all-caps words, possibly multiple words)
    heroes_pattern = r"\b[A-Z]+(?:[ & ]?[-&]?[ ]?[\n]?[A-Z]+)*\b"
    all_caps_words = re.findall(heroes_pattern, text)
    # Filter out any uppercase words not considered heroes
    exclusions = {"DEAL", "DAMAGE", "AS", "OR",
                  "ENEMIES", "ASSIST", "KO", "NS", "INFLICT"}
    heroes = [word for word in all_caps_words if word not in exclusions]

    # If Tesseract might pick up "13" or "4" from "13D 4H" as separate digits,
    # exclude them too:
    if time_remaining:
        # extract digits from the time string
        time_digits = re.findall(r'\d+', time_remaining)
        exclude_set.update(time_digits)

    return {
        "time_remaining": time_remaining,
        "objective": {
            "verb": objective_verb,
            "number": objective_number,
            "type": objective_type
        },
        "progress": {
            "current": progress_current,
            "total": progress_total
        },
        "heroes": heroes
    }

def get_role(hero):
    for role, heroes in marvel_rivals_characters.items():
        if hero in heroes:
            return role
    return None

role_mission_index = {'Vanguards': ['Take', 'Inflict'],
                      'Duelists': ['Inflict', 'Defeat'],
                      'Strategists': 'Heal'}

def get_mission(hero):
    for mission, roles in role_mission_index.items():
        if hero in marvel_rivals_characters[mission]:
            if isinstance(roles, list):
                return ', '.join(roles)
            else:
                return roles
    return None

def sum_mission_count(mission):
    sum=0
    for quest in quest_counter.keys():
        if quest in mission:
            sum += 1
    return sum

In [None]:
texts = ""
for column in ["left_1.png", "left_2.png", "left_3.png", "left_4.png", "left_5.png"]:
    texts = texts + get_text_from_image(column)
    print(f"Text: {texts}")

text = texts.replace("ee", "").split("\n\n")

In [None]:
print(text)

In [8]:
result={}
for i in range(0, len(text)):
    result[i] = parse_challenge_data(text[i])

for i, entry in enumerate(result.values()):
	entry['heroes'] = [hero.replace("\n", ' ') for hero in entry['heroes']]

	entry['heroes'] = ['THE PUNISHER' if hero == 'THE' else hero for hero in entry['heroes']]
	entry['heroes'] = [np.nan if hero == 'PUNISHER' else hero for hero in entry['heroes']]

	entry['heroes'] = set(entry['heroes'])

	try:
		if entry['time_remaining'] == None and entry['objective']['verb'] == None:
			entry.pop('time_remaining')
			entry.pop('heroes')
			entry.pop('objective')
			result[i-1]['progress'] = result[i]['progress']
	except:
		pass

filtered_result = {k: v for k, v in result.items() if set(v.keys()) != {"progress"}}

In [None]:
hero_counter = Counter()
quest_counter = Counter()

for entry in filtered_result.values():
    try:
        hero_counter.update(entry['heroes'])
        if not len(entry['heroes']):
            quest_counter.update([entry['objective']['verb']])
        else:
            None
    except:
        pass

filtered_result_df = pd.DataFrame(dict(hero_counter.items()), index=[0]).T.sort_values(0, ascending=False)
filtered_result_df.reset_index(drop=False, inplace=True)
filtered_result_df = filtered_result_df.rename(columns={'index': 'hero', 0: 'count'}).dropna()
quest_counter.pop(None)

quest_counter

In [None]:
filtered_result_df

In [None]:
filtered_result_df['role'] = filtered_result_df['hero'].apply(get_role)

filtered_result_df['mission'] = filtered_result_df['hero'].apply(get_mission)

filtered_result_df['mission_count'] = filtered_result_df['mission'].apply(sum_mission_count)

filtered_result_df['priority'] = filtered_result_df['count'] + filtered_result_df['mission_count']

filtered_result_df = filtered_result_df.sort_values('priority', ascending=False)

filtered_result_df.head(10)