# Sudoku Search for Training Data

In [34]:
import os
import uuid
import hashlib
import torch
import ray
from fastbook import *

In [35]:
imageDir = "./train/images"

In [36]:
hashCache = []

if ray.is_initialized() == False:
    ray.init()

In [37]:
urls = search_images_ddg('sudoku', max_images=300)
len(urls),urls[0]

(300,
 'https://kinderbilder.download/wp-content/uploads/2020/06/kombinatorik-ein-sudoku-benotigt-mindestens-17-vorgaben-bestimmt-fur-sudoku-kostenlos-drucken-schwer.jpg')

In [38]:
def calculateMd5(filename):
    runningMd5 = hashlib.md5()
    with open(filename, "rb") as f:
        for chunk in iter(lambda: f.read(4096), b""):
            runningMd5.update(chunk)
    return runningMd5.hexdigest()

In [39]:
def doesContentAlreadyExist(filename, dir):
    newFileHash = calculateMd5(filename)

    if (len(hashCache) == 0):
        for f in os.listdir(dir):
            absFile = os.path.join(dir, f)
            if os.path.isfile(absFile):
                hash = calculateMd5(absFile)
                hashCache.append(hash)

    for hash in hashCache:
        if (hash == newFileHash):
            return True
            
    return False

In [40]:
for url in urls:
    url = url.split('?')[0]
    print ("Processing URL: " + url)
    e = re.findall('\.[a-z]+$', url)
    if (len(e)) == 1:
        fileExtension = e[0]
        randomName = str(uuid.uuid4().hex)
        filename = randomName + fileExtension
        tempFilename = "/tmp/" + filename
        
        download_url(url, tempFilename)

        if doesContentAlreadyExist(tempFilename, imageDir) == True:
            print ("Found: " + tempFilename)
            os.remove(tempFilename)
        else:
            print ("NEW: " + tempFilename)
            os.replace(tempFilename, imageDir + "/" + filename)

    else:
        print ("Unable to process image due to unexpected extension: " + url)

Processing URL: https://kinderbilder.download/wp-content/uploads/2020/06/kombinatorik-ein-sudoku-benotigt-mindestens-17-vorgaben-bestimmt-fur-sudoku-kostenlos-drucken-schwer.jpg


Found: /tmp/9faefb02ad3344628bae52cc17f156f9.jpg
Processing URL: https://creagame.com/shop/613/sudoku-9x9-niveau-facile-pack-n-1-de-10-grilles.jpg


Found: /tmp/1352140a86564c16b73151ae3ccb8d88.jpg
Processing URL: http://observer.case.edu/wp-content/uploads/2013/08/Sudoku-Answers-12-941x1024.jpg


Found: /tmp/25b25ab7da6646d1ba28332e9936fb58.jpg
Processing URL: http://3.bp.blogspot.com/_Kh0CZuWd0T8/Sn_2OoqPKwI/AAAAAAAAFEg/z_vhmFkfOws/s400/PRINTABLE+SUDOKU+PUZZLES+(3).gif


Found: /tmp/c783a2b6749f4ddfb9120bf2e3f1516d.gif
Processing URL: https://printable-crosswordpuzzles.com/wp-content/uploads/2019/06/sudoku-for-all-ages-plus-lots-of-other-printable-activities-for-kids-sudoku-puzzles-printable-6x6.png


Found: /tmp/4a6ab01666e04012acc784ba031f406e.png
Processing URL: https://i.pinimg.com/originals/70/7c/60/707c607157135366a4ddc5376ef1bac9.gif


Found: /tmp/2123a3ce47094e81ab51a6723a9574aa.gif
Processing URL: https://i.pinimg.com/736x/01/9f/d0/019fd0723a6e7c366ec439e3ea096757--mind-puzzles-sudoku-puzzles.jpg


Found: /tmp/2420b68f1545431ea0ad72a641819c2b.jpg
Processing URL: https://i1.wp.com/www.printablee.com/postpic/2011/01/x-16-printable-sudoku-puzzles_370898.jpg


Found: /tmp/3ca7fe8bd6594b06875d3a928cd23434.jpg
Processing URL: http://www.printactivities.com/Kid_Sudoku_Puzzles/Kid_Sudoku_Puzzle_6x6_01.gif


Found: /tmp/b410a33064ec4458bfe68dcaea4d9391.gif
Processing URL: http://baylorlariat.com/wp-content/uploads/2011/10/20111013pzsud-s.jpg


Found: /tmp/376dd3065bc34f82a3ccd08a4f8c8dde.jpg
Processing URL: http://thetripclip.com/tc/images/pins/sudoku-for-all-ages-4x4-6x6-9x9.png


Found: /tmp/6760a67613c8422b85c43117e9ec07f8.png
Processing URL: https://printable-crosswordpuzzles.com/wp-content/uploads/2019/06/easy-sudoku-printable-kids-activities-printable-sudoku-puzzles-8-per-page.jpg


NEW: /tmp/a19a7c6885ec4fc99e5186d045c9bc58.jpg
Processing URL: http://s3.amazonaws.com/hashcube/images/sudokusolver.net/images/macsudoku_47layers_complete.gif


Found: /tmp/bb9a91c6cbbd4ea6b4bbcff6453df6e7.gif
Processing URL: https://printablesudokufree.com/wp-content/uploads/2019/05/free-sudoku-for-your-local-publications-sudoku-of-the-day-large-printable-sudoku-grid.png


Found: /tmp/85ab82bfd7644795830e64d2092b8edf.png
Processing URL: https://i.pinimg.com/originals/d5/bb/31/d5bb31daef45ff8511d8213ca99ac426.jpg


Found: /tmp/01a0d8887f7542e69cf90e62b23f4e4c.jpg
Processing URL: https://kinderbilder.download/wp-content/uploads/2020/06/sudoku-vorlagen-fur-kinder-6x6-kostenlos-herunterladen-und-innen-sudoku-schwer-drucken-2048x1479.jpg


Found: /tmp/2241795a95a94e08bf1cbe5ff91e0f78.jpg
Processing URL: https://www.thegreatapps.com/application/upload/Apps/2020/07/sudoku-28.png


Found: /tmp/56d850f1d37c41d49ba3f2423623872c.png
Processing URL: http://www.filebuzz.com/software_screenshot/full/sudoku_x-481262.png


Found: /tmp/b92aebe7aec344e5a4b8e38044dbbb0b.png
Processing URL: https://printablesudokufree.com/wp-content/uploads/2019/05/printable-sudoku-printable-sudoku-five-squares.png


Found: /tmp/5fce37942b3f4d96a493af211c63bc33.png
Processing URL: https://printablesudokufree.com/wp-content/uploads/2019/05/9x9-sudoku-7-printable-sudoku-9x9.jpg


Found: /tmp/cce5bad4e34545a9be9379da990640ff.jpg
Processing URL: https://malvorlagen-seite.de/wp-content/uploads/2018/12/sudoku-6x6-4.jpg


Found: /tmp/84f1e4c50d2a47e3a464240cd784df31.jpg
Processing URL: http://photos1.blogger.com/blogger/546/1536/1024/500sudokus-acrobat_Page_017.jpg


Found: /tmp/2c429f0f0b234224a230d92d627ceabe.jpg
Processing URL: https://www.sudoku.4thewww.com/other/16x16-350.gif


Found: /tmp/188cd1ab7cdc4a3ba189427f74a19359.gif
Processing URL: https://miro.medium.com/max/4000/1*z6Qli8oi_nOPQCyqo3rRVg.png


Found: /tmp/1ff9e2c89fc442adbece266949031ea3.png
Processing URL: https://moinhat.net/wp-content/uploads/2023/12/sudoku-zen-puzzle-game.jpg


Found: /tmp/58a3ee98bf8c4567bf8255e3949f5b7f.jpg
Processing URL: https://4freeprintable.com/wp-content/uploads/2019/07/observer-killer-sudoku-life-and-style-the-guardian-killer-sudoku-free-printable-1.jpg


Found: /tmp/9d3d51ca9ce54b9694ff2cb65351f69f.jpg
Processing URL: https://primanyc.net/wp-content/uploads/2020/04/sudoku-moyen-pour-sudoku-grande-section-1.jpg


Found: /tmp/2675de934c61484da706443cb1223d72.jpg
Processing URL: https://i.pinimg.com/736x/c0/30/00/c030003c5d9c2a412a7a1a5bc4613bef--sudoku-crossword.jpg


Found: /tmp/b9430f222a6749c79e2d73c51f4496c6.jpg
Processing URL: https://printablesudokufree.com/wp-content/uploads/2019/05/double-harakiri-sudoku-x-printable-super-sudoku.png


Found: /tmp/83d00ba6f147499489784533dbd75728.png
Processing URL: https://sudokuprintables.com/wp-content/uploads/2020/04/hard-sudoku-printable-6-per-page-printabler-38.jpg


Found: /tmp/5f3cb34c5e734f1d9e88467a3479673d.jpg
Processing URL: http://oppidanlibrary.com/wp-content/uploads/2017/05/Free-Printable-Sudoku-Puzzles.jpg


Found: /tmp/57230dc022cd4bc8ae525dfa37947063.jpg
Processing URL: https://sudokuprintables.com/wp-content/uploads/2020/04/printable-sudoku-free-printable-easy-sudoku-puzzle-to-print-9.jpg


Found: /tmp/6e1c0f3010ef46c69fdfbf4dd481ca41.jpg
Processing URL: https://i.etsystatic.com/21728163/r/il/b7d550/2149138849/il_1588xN.2149138849_2swl.jpg


Found: /tmp/2e0f92594896480da9b34e9f27332bac.jpg
Processing URL: http://delphiforfun.org/programs/images/Sudoku1.jpg


Found: /tmp/f09055ab789648efa208c726243ca4f3.jpg
Processing URL: http://www.filebuzz.com/software_screenshot/full/25972-analytical_sudoku.gif


Found: /tmp/b4f8d506a8e94672b7d6a189038bc531.gif
Processing URL: https://my-sudoku.com/images/snap_banner.jpg


Found: /tmp/3a7ba8fe48fa474eb921b7d7a0a16dbf.jpg
Processing URL: https://printablesudokufree.com/wp-content/uploads/2019/05/printable-sudoku-samurai-give-these-puzzles-a-try-and-youll-be-printable-sudoku-krazydad-puzzles.jpg


NEW: /tmp/569e7b6bceb346d1a948a682c4e116f8.jpg
Processing URL: http://3.bp.blogspot.com/-PRDdqeCjWMk/Tj7xNe2hj7I/AAAAAAAAELU/OEEcDi5GGFs/s1600/100-sudoku-facil_Page_009-728747.jpg


NEW: /tmp/fd609527c82948389a966f2ae5d5390f.jpg
Processing URL: https://2.bp.blogspot.com/_IbbWPS5nN5w/TRK9NBFKZ7I/AAAAAAAAAuY/NmlY2TID8jc/s1600/jigsaw.png


Found: /tmp/c077289ec5b6491a82f09cf4125f5f46.png
Processing URL: https://4.bp.blogspot.com/-2TPwKAa-NfY/XFmIgwLNNwI/AAAAAAAAAUY/muQ7_sggJFETjReCtTsc-AouyUYYVl1ewCLcBGAs/s1600/1200px-Sudoku-by-L2G-20050714.svg.png


Found: /tmp/167589bbde4f4ee89d19dd3400a36f90.png
Processing URL: https://3.bp.blogspot.com/-ggiGD61URG0/Tj7qXsjIj_I/AAAAAAAAEKE/AXYlw0w7YGc/s1600/100-sudoku-facil_Page_001.jpg


Found: /tmp/17dff9fd723f4911971590357878d17d.jpg
Processing URL: https://printablesudokufree.com/wp-content/uploads/2019/05/large-print-sudoku-christmas-180-easy-to-hard-puzzles-etsy-printable-sudoku-2-per-page-blank.jpg


Found: /tmp/ea7633771adf44c081f53352d2bae5ec.jpg
Processing URL: https://i.pinimg.com/736x/6a/25/d2/6a25d21726fc55a8bc730e89f5fc6660--sudoku-stress.jpg


Found: /tmp/804d63a4379048beb1006d07a0542c5e.jpg
Processing URL: https://free-printablehq.com/wp-content/uploads/2019/07/printable-sudoku-samurai-give-these-puzzles-a-try-and-youll-be-free-printable-sudoku-6-per-page-809x1024.jpg


Found: /tmp/805e52e65a124de3abf0ef97bb56eeee.jpg
Processing URL: https://sudokuprintables.com/wp-content/uploads/2020/03/sudoku-blank-print-calendar-sudoku-dying-my-hair-1024x1022.png


NEW: /tmp/90d00b9efdc3496193d836c4e31e23c4.png
Processing URL: http://www.phy6.org/outreach/edu/Sudokufigs/SudokuK2.gif


Found: /tmp/2e1d09a60c1e48168a63dc1e7998b0bc.gif
Processing URL: https://www.freeprintabletm.com/wp-content/uploads/2021/05/daily-sudoku-printable-printable-template-free.png


Found: /tmp/0369c6c5cc484f4494ec508117a0c875.png
Processing URL: https://kinderbilder.download/wp-content/uploads/2020/06/sudoku-online-losen-mit-dem-sudoku-solver-fur-sudoku-zum-ausdrucken-sehr-schwer-scaled.jpg


Found: /tmp/8adbe141912a466a8e12280cc1139d4c.jpg
Processing URL: https://3.bp.blogspot.com/-WpXnBAjKNF0/ThmKiAD_TsI/AAAAAAAAAX4/zlvGhDlXY1U/s1600/21.jpg


Found: /tmp/35c8f5029f01403b8d8aaf376f2ab025.jpg
Processing URL: https://printablesudokufree.com/wp-content/uploads/2019/05/hard-sudoku-printable-canas-bergdorfbib-co-printable-sudoku-fiendish.jpg


Found: /tmp/4d8403f5473345259fe03606e363465e.jpg
Processing URL: https://4.bp.blogspot.com/-484h80c-Sjc/UX5FCGV89jI/AAAAAAAAAbw/AHxptGoZeT8/s1600/SUDOKU.jpg


Found: /tmp/3d9a3cd6c83e4a6db07d13d9cc9d3a28.jpg
Processing URL: https://sudokuprintables.com/wp-content/uploads/2020/04/sudoku-puzzles.jpg


Found: /tmp/7344459d9a314614ab6ed940ab06b135.jpg
Processing URL: https://printablesudokufree.com/wp-content/uploads/2019/05/double-harakiri-sudoku-x-printable-multi-sudoku-puzzles.png


Found: /tmp/e278d0c11b67484688c6bf18b663108c.png
Processing URL: https://www.lactudecamille.com/wp-content/uploads/2017/04/imagessudoku-10.jpg


Found: /tmp/ceb33bf0267e49c488eb1bb0dac7b274.jpg
Processing URL: https://www.fossguru.com/wp-content/uploads/2021/04/Sudoku-Kingdom-696x586.jpg


Found: /tmp/400a3aca8bbd4979b7be4416a8005111.jpg
Processing URL: https://crosswordpuzzles-printable.com/wp-content/uploads/2019/06/sudoku-for-all-ages-plus-lots-of-other-printable-activities-for-kids-printable-sudoku-puzzles-9x9.png


NEW: /tmp/1f10f5e7d44b4ec2ba64043cbac05c76.png
Processing URL: https://i5.walmartimages.com/asr/80426d5b-f91f-4f1d-9d76-7eac44d225ea_1.e3e66566ead52a7d68b199d3c0bdacda.jpeg


Found: /tmp/0ca07a5abccb4143b213af7ada78a584.jpeg
Processing URL: https://cdn.formtemplate.org/images/482/blank-sudoku-grid.png


Found: /tmp/cac5adf622334118a78cb9fe17be5bb0.png
Processing URL: https://printablesudokufree.com/wp-content/uploads/2019/05/super-challenger-sudoku-printable-puzzles-printable-sudoku-super-challenger.jpg


NEW: /tmp/de5ab724700641fdbbef9b1974d521e5.jpg
Processing URL: http://photos1.blogger.com/blogger/546/1536/1024/500sudokus-acrobat_Page_035.jpg


NEW: /tmp/e7e3ecc7a6d44991b68e48babcd398ac.jpg
Processing URL: https://i.pinimg.com/originals/5c/be/c3/5cbec304dabf40b449f3047e9176d414.png


Found: /tmp/373f2278dbca49d083d217015bb73c8a.png
Processing URL: http://www.memory-improvement-tips.com/images/hard-1b-4.jpg


NEW: /tmp/871c1c9a2fcd48c38644d7440167080a.jpg
Processing URL: https://www.gridgit.com/cdn/2019/09/simple-medium-printable-sudoku-puzzles-4-per-page.jpg


Found: /tmp/e2207a63727e4df5afe1e598cd50fa04.jpg
Processing URL: https://oppidanlibrary.com/wp-content/uploads/2017/05/Easy-Sudoku-Puzzles-Download.jpg


Found: /tmp/53bfaf4ff3a0445a9f06233de11bcaea.jpg
Processing URL: https://thumbs.dreamstime.com/z/sudoku-solution-free-to-use-9264088.jpg


Found: /tmp/6708889d6d3c4ab2909af4c282e88746.jpg
Processing URL: https://uploads.guim.co.uk/2018/11/15/Sudoku_4221_easy.jpg


Found: /tmp/cd42334cb2854d878711dd9a4b59e576.jpg
Processing URL: http://photos1.blogger.com/blogger/7180/2224/1600/Sudoku_001.jpg


NEW: /tmp/2cd606aceef14a1cbb38e992e65aa616.jpg
Processing URL: https://kinderbilder.download/wp-content/uploads/2020/06/sudoku-vorlagen-fur-kinder-6x6-kostenlos-herunterladen-und-ganzes-sudoku-zum-ausdrucken-sehr-schwer-scaled.jpg


Found: /tmp/ad41459feb914287b19e516f1b20cf88.jpg
Processing URL: http://sudoku-instructions.com/EnglishSudokuCourse/wpimages/Sudoku%20Instructions%20Program%20-%20printing%20of%20sudoku%20puzzles.jpg


Found: /tmp/e99a77d108f947cb8d1a91e6a8a313c3.jpg
Processing URL: https://images0.persgroep.net/rcs/8tBMR78sgCPuq_wZQBgKvLeRNNg/diocontent/169235734/_fill/900/900/
Unable to process image due to unexpected extension: https://images0.persgroep.net/rcs/8tBMR78sgCPuq_wZQBgKvLeRNNg/diocontent/169235734/_fill/900/900/
Processing URL: https://i.pinimg.com/originals/ce/48/56/ce4856cd7ec5e4aaf10ac3985c5a4067.jpg


Found: /tmp/bbf10fcacbab44e79af0e131ffd0f46e.jpg
Processing URL: http://www.gamesavenue.net/gallery/mathematics-puzzle-games/sudoku_puzzle_game-copy.jpg


URLError: <urlopen error [Errno 8] nodename nor servname provided, or not known>

In [None]:
ds = ray.data.read_images(imageDir)
ds.size_bytes()
