## The Walks of Nathan Lowell
A look at Nate's path to the back gate.

The journey begins [#tommw](https://www.nathanlowell.com/tommw/)

Archive File Structure

``` python
{'meta': {'ids':[
                '12345',
                '67890',
                ...]
          'last_updated': 'UTC Timestamp',
         }
 'data': [
     {'link':URl string,
      'condition': string,
      ...}
         ]
}
 

```


[#tommw](https://www.nathanlowell.com/tommw/) is a cross channel social media campaign architeched and executed by science fiction author Nathan Lowell. 

There are 4 planks to Nathan's campain:
1. A blog hosted at https://www.nathanlowell.com/tommw/
2. A photo stream hosted on [FlickR](https://www.flickr.com/photos/nlowell/)
3. Podcast feed via [RSS](http://www.nathanlowell.com/tommw/feed/)
4. Posts to twitter with the #tommw hastag on [@nlowell on twitter](https://twitter.com/nlowell)

In [1]:
from io import BytesIO
import json
from PIL import Image
import shutil
import sys

import arrow
from bs4 import BeautifulSoup
import requests


import photo as pto
import podcast as pc
import twitter as twit

# Functions to manage the archive file

def get_archive():

    # Load the archive file
    try:
        with open('archive.json', 'r') as file:
            archive= json.loads(file.read())
        print('Archive loaded.')
        
    except:
        # Create archive data structure.

        archive = {'meta':{'ids':[],
                           'audio':[],
                           'tw_ids':[],
                          'last_updated':'New File',
                          },
                  'data':[],
                  }
        print('No archived file, building new archive.json')
        
    return archive

def save_archive(archive):
    
    with open('archive.json', 'w') as file:
        archive['meta']['last_updated'] = str(arrow.now())
        file.write(json.dumps(archive, indent=4))
        
    return True


def get_tweet_archive(record='tweets'):
    
    filename = record + '.json'
    
    try:
        with open(filename, 'r') as file:
            archive= json.loads(file.read())
        print('tweets_loaded.')
        
    except:
        
        print('No archived tweet file: ', filename,
              ' build a new file with build_twitter_archive function')
        
    return archive



def backfill_dates(archive):
    '''Helper function to convert timestamps
    to a date. Date is used to match audio files to photos.'''

    for walk in archive['data']:

        timestamp = walk['timestamp']

        date = timestamp.split(' ')[0]

        walk['date'] = date

    return archive

    

if __name__ == "__main__":


    # Path to temp directory
    latest = 'latest/'

    # Import the archive from disk
    archive = get_archive()
    walks = get_tweet_archive('walk')

    # Scrape the blog for links to the other media components
    blog_page = requests.get("https://www.nathanlowell.com/tommw/")
    soup = BeautifulSoup(blog_page.text, 'html.parser')

    # Find a flickr link
    flickr = soup.find_all(href=True, attrs={'data-flickr-embed':'true'})

    # Find a podcast link
    try:
        podcast_link = soup.find(class_="powerpress_link_pinw").get('href')

    except:
        print('No new walk posts found.')
        podcast_link = None

    if not flickr:
        print('No flickr links found on blog home page.')
        flickr_link = None

    else:
        flickr_link = flickr[0]['href']


    if not podcast_link and not flickr_link:
        print('No recent walks found on blog.')


    else:

        # Process the photostream
        stream_url = pto.get_stream_url(flickr_link)
        stream = pto.get_photostream(stream_url)
        archive = pto.process_photo_stream_page(stream, archive)

        # Save the audio file
        mp3_file, archive = pc.get_mp3(podcast_link, archive)


        if len(archive['meta']['ids']) < 400:
            album_url = 'https://www.flickr.com/photos/nlowell/albums/72157626736309035'
            archive = pto.build_flickr_archive(flickr_link, archive, album=False)
            archive = pto.build_flickr_archive(album_url, archive, album=True)



    # Check the album page
    album_url = 'https://www.flickr.com/photos/nlowell/albums/72157626736309035'
    stream = pto.get_photostream(album_url, album=True)
    archive = pto.process_photo_stream_page(stream, archive)



    # Check twitter

    tweets = twit.new_tweets(archive)
    filtered = twit.filter_twitter_search(tweets)
    walks = filtered['walks']

    if walks:
        archive = twit.merge_tw_walks_into_photo_walks(archive, walks)
        print(len(walks) + ' walks added to archive.')

    else:
        print('No walks added to archive.')

    # Save the data to archive.json
    save_archive(archive)


Archive loaded.
tweets_loaded.
No new walk posts found.
No flickr links found on blog home page.
No recent walks found on blog.


INFO: queries: ['#tommw since:2019-02-12 until:2019-02-13']
INFO: Querying #tommw since:2019-02-12 until:2019-02-13
INFO: Got 0 tweets for %23tommw%20since%3A2019-02-12%20until%3A2019-02-13.
INFO: Got 0 tweets (0 new).


No walks added to archive.
