In [None]:
from os import environ as env
env['DJANGO_SETTINGS_MODULE'] = 'ov_wag.settings.dev'
env["DJANGO_ALLOW_ASYNC_UNSAFE"] = "true"
print(env['OV_DB_NAME'])
import django
django.setup()
from wagtail.models import Page

ov = Page.objects.get(id=3)
aapb = Page.objects.get(id=59)  

In [None]:
# from home.models import HomePage
# aa = HomePage(title="AAPB")
# aa.save()

In [None]:
from cmless.models import Collection
import json
with open('/home/harpo/gbh/aapb/AAPB2/collections.json') as f:
    data = json.load(f)
    collections = [
        Collection(**item) for item in data
    ]
# collections[0].resources

In [None]:
from aapb_collections.models import AAPBCollection, AAPBRecordsBlock
from cmless.parse import markdownify


In [None]:
# collection = OpenVaultCollection(title='test', content=[('text', 'programatic creation'), ('text', 'with streamfield data!!!'), ('text', markdownify(collections[0].resources))])
# aapb.add_child(instance=collection)
# c = collections[0]
# markdownify(c.featured)

In [None]:
import re
from typing import List, Dict

def parse_cmless_thumbnail(markdown_string: str) -> List[Dict[str, str]]:
    """
    Parse a markdown string containing cmless images into a list of objects.

    Args:
        markdown_string (str): The markdown string to parse

    Returns:
        List[Dict[str, str]]: List of dictionaries with 'title' and 'image_url' keys
    """
    # Pattern to match ![title](url)
    pattern = r'\!\[([^\]]*)\]\(([^\)]*)\)'
    matches = re.findall(pattern, markdown_string)

    cmless_images = []
    for match in matches:
        title, url = match
        cmless_images.append({
            'title': title.strip(),
            'url': url.strip().split(' ')[0]
        })

    return cmless_images

def parse_featured_markdown(markdown_string: str) -> List[Dict[str, str]]:
    """
    Parse a markdown string containing featured items into a list of objects.
    
    Expected format: [![title](image_url)](link_url)
    
    Args:
        markdown_string (str): The markdown string to parse
        
    Returns:
        List[Dict[str, str]]: List of dictionaries with 'title', 'image_url', and 'link_url' keys
    """
    # Pattern to match [![title](image_url)](link_url)
    pattern = r'\[\!\[([^\]]*)\]\(([^\)]*)\)\]\(([^\)]*)\)'
    
    matches = re.findall(pattern, markdown_string)
    
    featured_items = []
    for match in matches:
        title, image_url, link_url = match
        guid = link_url.split('/')[-1].split('#')[0]  # Extract guid from link_url
        start_time = link_url.split('#at_')[-1] if '#at_' in link_url else None
        featured_items.append({
            'title': title.strip(),
            # 'thumbnail': image_url.strip(),
            'guids': guid.strip(),
            'start_time': start_time.strip().replace('_s', '') if start_time else None
        })
    
    return featured_items


# parse_featured_markdown("[![Ethic for Broadcasting](https://s3.amazonaws.com/americanarchive.org/special-collections/Minow_TVtile.jpg)](/catalog/cpb-aacip_500-kw57jd6j)\n[![The Carnegie Commission](https://s3.amazonaws.com/americanarchive.org/special-collections/Minow_TVtile.jpg)](/catalog/cpb-aacip_507-707wm14c2g)\n[![North Carolina Now](https://s3.amazonaws.com/americanarchive.org/special-collections/Minow_TVtile.jpg)](/catalog/cpb-aacip_129-09j3v33q#at_1173.159414_s)\n[![Public Television Hearings](https://s3.amazonaws.com/americanarchive.org/special-collections/Minow_TVtile.jpg)](/catalog/cpb-aacip_15-07tmpp7v)\n[![New Vistas for Television](https://s3.amazonaws.com/americanarchive.org/special-collections/Minow_TVtile.jpg)](/catalog/cpb-aacip_15-451g1x4f)\n[![Medal of Freedom, NewsHour](https://s3.amazonaws.com/americanarchive.org/special-collections/Minow_TVtile.jpg)](/catalog/cpb-aacip_525-gx44q7rv3h#at_3150.040405_s)")

In [None]:
from wagtail.images.models import Image

def download_image(url: str, title: str | None = None) -> Image | None:
    import requests
    from django.core.files.base import ContentFile
    from wagtail.images import get_image_model

    ImageModel = get_image_model()
    response = requests.get(url)
    if not title:
        title = url.split("/")[-1]
    if response.status_code == 200:
        image = Image(file=ContentFile(response.content, name=title), title=title)
        image.save()
        return image
    else:
        print(f"Failed to download image from {url}")
        return None
# i = Image(title="Example Image", file="Screenshot from 2025-07-19 22-13-28.png")
# i = Image(title="Example Image", file="original_images/WeBelongHere.png")
# https://s3.amazonaws.com/americanarchive.org/special-collections/WeBelongHere.png
# i = download_image("https://s3.amazonaws.com/americanarchive.org/special-collections/WeBelongHere.png")


In [None]:
from aapb_collections.models import AAPBCollection

def create_collection_page(collection):
    if collection.funders:
        funders = ''
        for funder in parse_cmless_thumbnail(collection.funders.strip()):
            image = download_image(funder['url'], title=funder['title']) if funder else None
            if image:
                funders += f'<embed alt="{funder["title"]}" embedtype="image" format="fullwidth" id="{image.id}"/>'
    content=[
            ('background', markdownify(collection.background)) if collection.background else None,
            ('help', markdownify(collection.help)) if collection.help else None,
            ('resources', markdownify(collection.resources)) if collection.resources else None,
            ('terms', markdownify(collection.terms)) if collection.terms else None,
            ('timeline', markdownify(collection.timeline)) if collection.timeline else None,
            ('funders', funders) if 'funders' in locals() else None
        ]
    content = [item for item in content if item is not None]
    if collection.sort:
        sort = collection.sort.split('+')
        sort_by = sort[0] if len(sort) > 0 else None
        if sort_by == 'asset_date':
            sort_by = 'date'
        elif sort_by == 'asset_title':
            sort_by = 'title'
        sort_order = sort[1] if len(sort) > 1 else None
    if collection.featured:
        featured = parse_featured_markdown(collection.featured)
        featured_items = [('records', record) for record in featured]
    if collection.title.find('<em>') != -1 or collection.title.find('*') != -1:
        display_title = collection.title
        title = collection.title.replace('<em>', '').replace('</em>', '').replace('*', '')
    
    if collection.thumbnail:
        image = parse_cmless_thumbnail(collection.thumbnail)[0]
        thumbnail = download_image(image['url'], title=title if 'title' in locals() else image['title']) if image else None
                

    # return content
    page = AAPBCollection(
        title=title if 'title' in locals() else collection.title,
        display_title=markdownify(display_title) if 'display_title' in locals() else None,
        introduction=markdownify(collection.summary),
        content=content,
        featured_items=featured_items if 'featured_items' in locals() else None,
        sort_by=sort_by if 'sort_by' in locals() else None,
        sort_order=sort_order if 'sort_order' in locals() else None,
        hero_image=thumbnail if 'thumbnail' in locals() else None,
    )
    return page

# create_collection_page(collections[0])

In [None]:
for collection in collections:
    page = create_collection_page(collection)
    print(f"Creating collection: {page}")
    aapb.add_child(instance=page)
    print(f"Added collection: {collection.title}")

In [None]:
# with open('/home/harpo/gbh/aapb/AAPB2/exhibits.json') as f:
#     exhibits = json.load(f)
# exhibits

In [None]:
# for name, exhibit in exhibits.items():
    