# Card Classification via Perceptual Hashing

In this notebook, we explore using perceptual hashing to classify Pokémon cards.

download jsons and images in datasets/pokemon

In [None]:
from fastai.vision.all import *
import os
import json

In [26]:
repo_location = 'C:/Code/React/CollectiblesApp'
path = Path(f'{repo_location}/src/ai_dev/datasets/pokemon/data')

In [27]:
pokemon_sets = os.listdir(path)
pokemon_sets[0:5], len(pokemon_sets)

(['cards_2011bw.json',
  'cards_2012bw.json',
  'cards_2014xy.json',
  'cards_2015xy.json',
  'cards_2016xy.json'],
 187)

In [28]:
all_cards = []
for pokemon_set in pokemon_sets:
    if not pokemon_set.endswith('.json') or not pokemon_set.startswith('cards_'):
        continue
    with open(f'{path}\\{pokemon_set}', 'r') as file:
        json_data = json.load(file)
        cards = json_data['cards']
        for card in cards:
            all_cards.append(card)

len(all_cards), all_cards[0]

(20111,
 {'category': 'Pokemon',
  'id': '2011bw-1',
  'illustrator': 'Ken Sugimori',
  'localId': '1',
  'name': 'Snivy',
  'rarity': 'None',
  'set': {'cardCount': {'official': 12, 'total': 12},
   'id': '2011bw',
   'name': "Macdonald's Collection 2011"},
  'variants': {'firstEdition': False,
   'holo': True,
   'normal': False,
   'reverse': False,
   'wPromo': False},
  'variants_detailed': [{'type': 'holo', 'size': 'standard'}],
  'hp': 60,
  'types': ['Grass'],
  'description': 'It is very intelligent and calm. Being exposed to lots of sunlight makes its movements swifter.',
  'stage': 'Basic',
  'attacks': [{'name': 'Slam',
    'effect': 'Flip 2 coins. This attack does 20 damage times the number of heads.',
    'damage': '20×'}],
  'legal': {'standard': False, 'expanded': False},
  'updated': '2025-08-16T20:39:55Z',
  'pricing': {'cardmarket': None,
   'tcgplayer': {'updated': '2025-11-19T20:05:32.000Z',
    'unit': 'USD',
    'holofoil': {'lowPrice': 2.19,
     'midPrice': 2.8

In [29]:
id_to_url = {}
id_to_card = {}
for card in all_cards:
    id_to_card[card['id']] = card
    if 'image' in card:
        id_to_url[card['id']] = f'{card['image']}/high.jpg'

len(id_to_url), list(id_to_url.items())[0]

(19159, ('base1-1', 'https://assets.tcgdex.net/en/base/base1/1/high.jpg'))

In [30]:
fns = get_image_files(Path('C:\\Code\\React\\CollectiblesApp\\src\\ai_dev\\datasets\\pokemon\\data\\images'))
fns

(#19159) [Path('C:/Code/React/CollectiblesApp/src/ai_dev/datasets/pokemon/data/images/base1/base1-1.jpg'),Path('C:/Code/React/CollectiblesApp/src/ai_dev/datasets/pokemon/data/images/base1/base1-10.jpg'),Path('C:/Code/React/CollectiblesApp/src/ai_dev/datasets/pokemon/data/images/base1/base1-100.jpg'),Path('C:/Code/React/CollectiblesApp/src/ai_dev/datasets/pokemon/data/images/base1/base1-101.jpg'),Path('C:/Code/React/CollectiblesApp/src/ai_dev/datasets/pokemon/data/images/base1/base1-102.jpg'),Path('C:/Code/React/CollectiblesApp/src/ai_dev/datasets/pokemon/data/images/base1/base1-11.jpg'),Path('C:/Code/React/CollectiblesApp/src/ai_dev/datasets/pokemon/data/images/base1/base1-12.jpg'),Path('C:/Code/React/CollectiblesApp/src/ai_dev/datasets/pokemon/data/images/base1/base1-13.jpg'),Path('C:/Code/React/CollectiblesApp/src/ai_dev/datasets/pokemon/data/images/base1/base1-14.jpg'),Path('C:/Code/React/CollectiblesApp/src/ai_dev/datasets/pokemon/data/images/base1/base1-15.jpg'),Path('C:/Code/Reac

## Create Hashes

We use the `imagehash` library and generate difference hashes for the cards.

In [31]:
import imagehash

In [32]:
hashes = dict()
hash_size = 16

In [33]:
def get_hashes(img):
    hash = str(imagehash.dhash(img, hash_size))
    return hash

def create_hashes_inner(fn):
    id = os.path.splitext(fn)[0].split('\\')[-1]
    img = Image.open(fn)
    hash = get_hashes(img)
    hashes[id] = hash

def create_hashes(fns=None):
    if fns is None:
        raise 'No images to hash'
    parallel(create_hashes_inner, fns, n_workers=32, threadpool=True)

In [34]:
create_hashes(fns)

In [35]:
card_data = dict()
for id, hash in hashes.items():
    card_data[id] = {
        'hash': hash,
        'card': id_to_card[id]
    }

In [36]:
# Remove cardmarket pricing data
for id, info in card_data.items():
    if 'pricing' in card_data[id]['card'] and card_data[id]['card']['pricing']['cardmarket']:
        del card_data[id]['card']['pricing']['cardmarket']

with open(f'{repo_location}/public/card_data.json', 'w') as f:
    json.dump(card_data, f)