In [1]:
MIN_RESOLUTION = 400

In [2]:
url = input()

In [3]:
from urllib.parse import urljoin


def convert_image_to_jpg(src):
	import requests
	from PIL import Image
	from io import BytesIO
	response = requests.get(urljoin(url, src))
	img = Image.open(BytesIO(response.content))
	
	img = img.resize(img.size)

	if img.size[0] < MIN_RESOLUTION or img.size[1] < MIN_RESOLUTION:
		ratio = max(MIN_RESOLUTION / img.size[0], MIN_RESOLUTION / img.size[1])
		new_size = (int(img.size[0] * ratio), int(img.size[1] * ratio))
		img = img.resize(new_size, Image.ANTIALIAS)

	return img

In [4]:
def find_all_images(url):
	import requests
	from bs4 import BeautifulSoup
	response = requests.get(url)
	soup = BeautifulSoup(response.text, 'html.parser')
	images = soup.find_all(['img', 'picture'])

	print(response.text)

	return [convert_image_to_jpg(image['src'] or image['source']) for image in images if 'src' in image.attrs or 'source' in image.attrs]

In [5]:
def pack_zip_file(images):
	import os
	import zipfile
	import io
	zip_file = io.BytesIO()
	with zipfile.ZipFile(zip_file, 'w') as z:
		for i, image in enumerate(images):
			image.save(f'img{i}.jpg', 'JPEG')
			z.write(f'img{i}.jpg')
			os.remove(f'img{i}.jpg')

	zip_file.seek(0)

	with open('images.zip', 'wb') as f:
		f.write(zip_file.getvalue())

In [21]:
def zip_to_yolov8_directory(zip_file):
	import zipfile, io
	import os
	import glob

	from sklearn.model_selection import train_test_split
	zip_file = zipfile.ZipFile(io.BytesIO(zip_file.read()))

	os.makedirs('train', exist_ok=True)
	os.makedirs('val', exist_ok=True)

	existing_files = glob.glob('train/*.jpg') + glob.glob('val/*.jpg')
	for file in existing_files:
		os.remove(file)

	
	image_files = [f for f in zip_file.namelist() if f.endswith('.jpg')]
	train_files, val_files = train_test_split(image_files, test_size=0.2)

	for file in train_files:
		with zip_file.open(file) as source, open(os.path.join('train', file), 'wb') as target:
			target.write(source.read())

	for file in val_files:
		with zip_file.open(file) as source, open(os.path.join('val', file), 'wb') as target:
			target.write(source.read())

In [7]:
pack_zip_file(find_all_images(url))

<!DOCTYPE html>
<html lang="en" dir="ltr">
  <head>
    <meta charset="utf-8">
      <title>(e:ltr cn&gt;=452) or (e:ltc cn&gt;=411) · Scryfall Magic The Gathering Search</title>
    
<meta name="viewport" content="width=device-width, initial-scale=1.0" />

<link rel="preconnect" href="https://cards.scryfall.io" crossorigin="true" />
<link rel="dns-prefetch" href="https://cards.scryfall.io" />

<meta name="description" content="550 Magic cards found where (the set is “ltr” and the collector number &gt;= 452) or (the set is “ltc” and the collector number &gt;= 411)" />




<link rel="alternate" type="application/atom+xml" title="ATOM" href="https://scryfall.com/blog/feed" />

<link rel="search" type="application/opensearchdescription+xml" title="Scryfall" href="https://scryfall.com/opensearch.xml" />

<meta property="og:site_name" content="Scryfall Magic The Gathering Search" />
<meta property="og:type" content="website" />
<meta property="og:title" content="(e:ltr cn&gt;=452) or (e:ltc

In [33]:
zip_to_yolov8_directory(open('images.zip', 'rb'))