In [1]:
import json
import re
from typing import TypedDict, cast

import utils
from rich.progress import track


def get_maker_urls():
    soup = utils.soup("https://www.onlinecarparts.co.uk/car-brands.html")
    return [utils.get_link(el) for el in soup.select(".top-auto .top-auto-item")]


def parse_car_permalink(permalink: str) -> str:
    r = re.match(
        r"https://www.onlinecarparts.co.uk/car-brands/spare-parts-(.+?)/(.+).html",
        permalink,
    )
    maker, model = r.groups()
    return f"{maker}/{model}"


def get_cars(maker_url: str):
    soup = utils.soup(maker_url)
    return [
        (
            utils.get_link(el),
            cast(str, el.select_one("img")["src"]),
        )
        for el in soup.select(".top-auto .top-auto-item")
    ]


def get_car_permalinks(car_url: str):
    soup = utils.soup(car_url)
    links = [utils.get_link(el) for el in soup.select(".vehicle-list .vehicle-list__link")]
    return [parse_car_permalink(link) for link in links]


maker_urls = get_maker_urls()
cars = utils.flatten([get_cars(maker_url) for maker_url in track(maker_urls, "Getting cars")])


class Record(TypedDict):
    permalink: str
    image: str


records = utils.flatten(
    [
        [Record(permalink=permalink, image=img) for permalink in get_car_permalinks(url)]
        for url, img in track(cars, "Getting permalinks")
    ]
)

data = utils.mkpath("data/ocp_car_images")
record_file = data / "records.json"

with record_file.open("w") as f:
    json.dump(records, f, indent=4)

Output()

Output()

OcpClientProcessError: Command '['php', '../../artisan', 'ocp:curl', 'https://www.onlinecarparts.co.uk/car-brands/spare-parts-toyota-fj.html']' returned non-zero exit status 1.

STDERR:
b''

STDOUT:
b'\r\n   App\\Services\\Bots\\OcpClientException \r\n\r\n  Http request failed with status code 500.\nUrl: https://www.onlinecarparts.co.uk/car-brands/spare-parts-toyota-fj.html\nResponse: <!DOCTYPE html>\n<html lang="en">\n<head>\n    <meta name="viewport"\n          content="width=device-width, initial-scale=1, maximum-scale=1.0, user-scalable=no, shrink-to-fit=no">\n    <meta charset="utf-8">\n    <meta http-equiv="x-ua-compatible" content="ie=edge">\n    <title> 500 - www.onlinecarparts.co.uk</title>\n    <style>\n        * {\n            margin: 0px auto;\n            padding: 0px;\n            text-align: center;\n        }\n\n        body {\n            background-color: #f4f4f8;\n        }\n\n        .cont_principal {\n            position: absolute;\n            width: 100%;\n            height: 100%;\n            overflow: hidden;\n        }\n\n        .cont_error {\n            position: absolute;\n            width: 100%;\n            height: 300px;\n            top: 50%;\n            margin-top: -150px;\n        }\n\n        .cont_error > h1 {\n            font-family: \'Lato\', sans-serif;\n            font-weight: 400;\n            font-size: 50px;\n            color: #20548e;\n            position: relative;\n            left: -100%;\n            transition: all 0.5s;\n        }\n\n\n        .cont_error > p {\n            font-family: \'Lato\', sans-serif;\n            font-weight: 300;\n            font-size: 24px;\n            letter-spacing: 5px;\n            color: #344357;\n            position: relative;\n            left: 100%;\n            transition: all 0.5s;\n            transition-delay: 0.5s;\n            -webkit-transition: all 0.5s;\n            -webkit-transition-delay: 0.5s;\n        }\n\n        @media (max-width: 576px) {\n            .cont_aura_1 {\n                right: -350px;\n            }\n        }\n\n        .cont_aura_1 {\n            position: absolute;\n            width: 300px;\n            height: 120%;\n            top: 25px;\n            right: -340px;\n            background-color: #344357;\n            box-shadow: 0px 0px 60px 20px rgba(52, 67, 87, 0.5);\n            -webkit-transition: all 0.5s;\n            transition: all 0.5s;\n        }\n\n        .cont_aura_2 {\n            position: absolute;\n            width: 100%;\n            height: 300px;\n            right: -10%;\n            bottom: -301px;\n            background-color: #344357;\n            box-shadow: 0px 0px 60px 10px rgba(52, 67, 87, 0.5), 0px 0px 20px 0px rgba(0, 0, 0, 0.1);\n            z-index: 5;\n            transition: all 0.5s;\n            -webkit-transition: all 0.5s;\n        }\n\n\n\n        .cont_error_active > .cont_error > h1 {\n            left: 0%;\n        }\n\n        .cont_error_active > .cont_error > p {\n            left: 0%;\n        }\n\n        .cont_error_active > .cont_aura_2 {\n            animation-name: animation_error_2;\n            animation-duration: 4s;\n            animation-timing-function: linear;\n            animation-iteration-count: infinite;\n            animation-direction: alternate;\n            transform: rotate(-20deg);\n        }\n\n        .cont_error_active > .cont_aura_1 {\n            transform: rotate(20deg);\n            right: -170px;\n            animation-name: animation_error_1;\n            animation-duration: 4s;\n            animation-timing-function: linear;\n            animation-iteration-count: infinite;\n            animation-direction: alternate;\n        }\n\n        @-webkit-keyframes animation_error_1 {\n            from {\n                -webkit-transform: rotate(20deg);\n                transform: rotate(20deg);\n            }\n            to {\n                -webkit-transform: rotate(25deg);\n                transform: rotate(25deg);\n            }\n        }\n\n        @-o-keyframes animation_error_1 {\n            from {\n                -webkit-transform: rotate(20deg);\n                transform: rotate(20deg);\n            }\n            to {\n                -webkit-transform: rotate(25deg);\n                transform: rotate(25deg);\n            }\n\n        }\n\n        @-moz-keyframes animation_error_1 {\n            from {\n                -webkit-transform: rotate(20deg);\n                transform: rotate(20deg);\n            }\n            to {\n                -webkit-transform: rotate(25deg);\n                transform: rotate(25deg);\n            }\n\n        }\n\n        @keyframes  animation_error_1 {\n            from {\n                -webkit-transform: rotate(20deg);\n                transform: rotate(20deg);\n            }\n            to {\n                -webkit-transform: rotate(25deg);\n                transform: rotate(25deg);\n            }\n        }\n\n\n        @-webkit-keyframes animation_error_2 {\n            from {\n                -webkit-transform: rotate(-15deg);\n                transform: rotate(-15deg);\n            }\n            to {\n                -webkit-transform: rotate(-20deg);\n                transform: rotate(-20deg);\n            }\n        }\n\n        @-o-keyframes animation_error_2 {\n            from {\n                -webkit-transform: rotate(-15deg);\n                transform: rotate(-15deg);\n            }\n            to {\n                -webkit-transform: rotate(-20deg);\n                transform: rotate(-20deg);\n            }\n        }\n\n        }\n        @-moz-keyframes animation_error_2 {\n            from {\n                -webkit-transform: rotate(-15deg);\n                transform: rotate(-15deg);\n            }\n            to {\n                -webkit-transform: rotate(-20deg);\n                transform: rotate(-20deg);\n            }\n        }\n\n        @keyframes  animation_error_2 {\n            from {\n                -webkit-transform: rotate(-15deg);\n                transform: rotate(-15deg);\n            }\n            to {\n                -webkit-transform: rotate(-20deg);\n                transform: rotate(-20deg);\n            }\n        }\n\n\n    </style>\n</head>\n<body>\n<div class="cont_principal">\n    <div class="cont_error">\n        <h1>Updating our website...</h1>\n        <p>In a few moments, it will be even more awesome!</p>\n    </div>\n</div>\n<script>\n    window.onload = function () {\n        document.querySelector(\'.cont_principal\').className = "cont_principal cont_error_active";\n    }\n</script>\n<script>(function(){function c(){var b=a.contentDocument||a.contentWindow.document;if(b){var d=b.createElement(\'script\');d.innerHTML="window.__CF$cv$params={r:\'891918c31b9d696b\',t:\'MTcxODAxOTczMy4wMDAwMDA=\'};var a=document.createElement(\'script\');a.nonce=\'\';a.src=\'/cdn-cgi/challenge-platform/scripts/jsd/main.js\';document.getElementsByTagName(\'head\')[0].appendChild(a);";b.getElementsByTagName(\'head\')[0].appendChild(d)}}if(document.body){var a=document.createElement(\'iframe\');a.height=1;a.width=1;a.style.position=\'absolute\';a.style.top=0;a.style.left=0;a.style.border=\'none\';a.style.visibility=\'hidden\';document.body.appendChild(a);if(\'loading\'!==document.readyState)c();else if(window.addEventListener)document.addEventListener(\'DOMContentLoaded\',c);else{var e=document.onreadystatechange||function(){};document.onreadystatechange=function(b){e(b);\'loading\'!==document.readyState&&(document.onreadystatechange=e,c())}}}})();</script></body>\n</html>\r\n\r\n  at C:\\Users\\Semih\\PhpstormProjects\\modelotoparca\\app\\Services\\Bots\\OcpClient.php:31\r\n     27\xe2\x96\x95             throw new \\Exception(\'Response blocked by cloudflare.\');\r\n     28\xe2\x96\x95         }\r\n     29\xe2\x96\x95 \r\n     30\xe2\x96\x95         if (! ($httpStatusCode >= 200 && $httpStatusCode < 300)) {\r\n  \xe2\x9e\x9c  31\xe2\x96\x95             throw new OcpClientException($httpStatusCode, $url, $response);\r\n     32\xe2\x96\x95         }\r\n     33\xe2\x96\x95 \r\n     34\xe2\x96\x95         return $response;\r\n     35\xe2\x96\x95     }\r\n\r\n  1   C:\\Users\\Semih\\PhpstormProjects\\modelotoparca\\app\\Console\\Commands\\OcpCurlCommand.php:17\r\n      App\\Services\\Bots\\OcpClient::request("https://www.onlinecarparts.co.uk/car-brands/spare-parts-toyota-fj.html")\r\n\r\n  2   C:\\Users\\Semih\\PhpstormProjects\\modelotoparca\\vendor\\laravel\\framework\\src\\Illuminate\\Container\\BoundMethod.php:36\r\n      App\\Console\\Commands\\OcpCurlCommand::handle()\r\n\r\n'

----

In [None]:
import json
from typing import TypedDict, cast
import utils
import re


class Record(TypedDict):
    permalink: str
    image: str


data = utils.mkpath("data/ocp_car_images")
record_file = data / "records.json"


with record_file.open("r") as f:
    records: list[Record] = json.load(f)


def get_image_id(url: str):
    return re.match(r"https://scdn.autoteiledirekt.de/groups/170x100/(\d+).png", url).group(1)

In [None]:

import utils
from concurrent_download import download_concurrent

image_path = utils.mkpath(data / "images")


def get_img_path(url: str):
    name = get_image_id(url)
    return image_path / f"{name}.png"


image_set = {r["image"] for r in records}
await download_concurrent(image_set, 20, get_img_path)

In [None]:
from db import Session, Image, Car
from sqlalchemy import column, insert, select, table
from rich.progress import track

path_template = "images/cars/{}.png"
with Session() as session:
    for record in track(records):
        path = path_template.format(get_image_id(record["image"]))
        permalink = record["permalink"]
        cars = session.scalars(select(Car).where(Car.permalink.contains(permalink))).all()
        images = [dict(path=path, imageable_id=car.id, imageable_type=r"\App\Models\Car") for car in cars]
        session.execute(insert(Image).values(images).prefix_with("IGNORE"))
    session.commit()