# Import from pyvideo.org 

Convert presentations hosted at pyvideo.org into a JSON doc suitable for importing into the `Presentation` with `manage.py loaddata`. Pyvideo has machine readable data at https://github.com/pyvideo/data/, so this is fairly easy to do. There's no single entry point for all talks by a given author, but it's easy enough just to spin through everything and find the right talks. But for speed this reads from a local checkout. Make this with `hub clone --depth=1 pyvideo/data pyvideo-data` also for speed.

In [35]:
import json
import time
import datetime
import pathlib
from django.utils.text import slugify

In [41]:
SPEAKER = 'Jacob Kaplan-Moss'
DATA_DIR = pathlib.Path("/mnt/c/Users/jacob/c/pyvideo-data")

talks = []
for conference_dir in DATA_DIR.iterdir():
    if conference_dir.is_dir() and not conference_dir.name.startswith('.'):
        for vid_file in conference_dir.glob('videos/*.json'):
            vid_data = json.loads(vid_file.read_text())
            vid_data["_slug"] = vid_file.stem
            if SPEAKER in vid_data.get('speakers', []):
                vid_data['_conference'] = json.loads((conference_dir/"category.json").read_text())
                talks.append(vid_data)

In [42]:
talks[0]

{'description': 'Django: Under The Hood: http://djangounderthehood.com/\n\nDjango: Under The Hood is an annual Django conference for experienced Django developers. Come and learn about the internals of Django, and help to shape its future.',
 'recorded': '2016-11-03',
 'speakers': ['Tim Graham', 'Jacob Kaplan-Moss'],
 'thumbnail_url': 'https://i.ytimg.com/vi/MgpMh2aXzWM/hqdefault.jpg',
 'title': 'Tim Graham & Jacob Kaplan-Moss about Intro to sprints at Django: Under The Hood 2016',
 'videos': [{'type': 'youtube',
   'url': 'https://www.youtube.com/watch?v=MgpMh2aXzWM'}],
 '_slug': 'tim-graham-jacob-kaplan-moss-about-intro-to-sprints-at-django-under-the-hood-2016',
 '_conference': {'title': 'Django Under the Hood 2016'}}

In [48]:
fixtures = []
for i, talk in enumerate(talks):
    fixtures.append({
        "pk": i + 1,
        "model": "speaking_portfolio.Presentation",
        "fields": {
            "title": talk["title"],
            "slug": talk["_slug"][:50].rstrip('-'),
            "date": talk["recorded"],
            "description": talk["description"],
            "conference_title": talk["_conference"]["title"],
            "video_link": talk["videos"][0]["url"],
        }
    })

In [49]:
fixtures[0]

{'pk': 1,
 'model': 'speaking_portfolio.Presentation',
 'fields': {'title': 'Tim Graham & Jacob Kaplan-Moss about Intro to sprints at Django: Under The Hood 2016',
  'slug': 'tim-graham-jacob-kaplan-moss-about-intro-to-sprint',
  'date': '2016-11-03',
  'description': 'Django: Under The Hood: http://djangounderthehood.com/\n\nDjango: Under The Hood is an annual Django conference for experienced Django developers. Come and learn about the internals of Django, and help to shape its future.',
  'conference_title': 'Django Under the Hood 2016',
  'video_link': 'https://www.youtube.com/watch?v=MgpMh2aXzWM'}}

In [50]:
with open('/tmp/speaking.json', 'w') as fp:
    json.dump(fixtures, fp, indent=2)