In [461]:
import os
import numpy as np
import pandas as pd
from random import randint
from random import choice
import requests

import tracery
from tracery.modifiers import base_english

##  Process the OpenStreetMap Data

In [5]:
places = pd.read_csv("data/shops_and_tourism.tsv", sep="\t")

In [6]:
canalsdf = pd.read_csv("data/canals_roads_venice.tsv", sep="\t")

In [7]:
canalsdf.head()

Unnamed: 0,bicycle,highway,name,note,surface,visible_name,boat,canoe,motorboat,waterway,...,motor_vehicle,segregated,smoothness,step,ramp,ramp:bicycle,ramp:stroller,ramp:wheelchair,name_1,noexit
0,no,pedestrian,Calle del Teatro o de la Commedia,high dop,,,,,,,...,,,,,,,,,,
1,no,pedestrian,Fondamenta dei Tolentini,,paving_stones,Fondamenta di Santa Chiara,,,,,...,,,,,,,,,,
2,,,Rio della Croce,,,,yes,yes,yes,canal,...,,,,,,,,,,
3,,,Rio de Ognisanti,,,,,,yes,canal,...,,,,,,,,,,
4,,,Rio di Noale,,,,,yes,yes,canal,...,,,,,,,,,,


In [8]:
canalsdf[canalsdf['name'].str.contains("Rio")].dropna(axis=1, how='all').columns

Index(['bicycle', 'highway', 'name', 'note', 'surface', 'visible_name', 'boat',
       'canoe', 'motorboat', 'waterway', 'canoe:conditional', 'wikidata',
       'bridge', 'layer', 'ship', 'width', 'area', 'lit', 'wikipedia',
       'tunnel', 'covered', 'alt_name', 'foot', 'loc_name', 'oneway',
       'old_name', 'canoa:conditional', 'fixme', 'sidewalk', 'maxwidth',
       'natural'],
      dtype='object')

In [9]:
canalsdf[pd.notna(canalsdf['highway'])].dropna(axis=1, how='all')['surface'].value_counts()

paving_stones    353
paved             25
sett              15
wood               9
asphalt            6
concrete           1
Name: surface, dtype: int64

In [10]:
places.head()

Unnamed: 0,amenity,name,operator,addr:housenumber,addr:neighbourhood,addr:place,tourism,brand,layer,ref:mise,...,smoothness,surface,addr:number,boat:parts,name:cs,facebook,isced:level,operator:type,addr:suburb,guest_house
0,ferry_terminal,"San Zaccaria (Pietà) ""A""",ACTV,,,,,,,,...,,,,,,,,,,
1,,Albergo Locanda Silva,,4423.0,Castello,Castello,guest_house,,,,...,,,,,,,,,,
2,fuel,Agip,Iclera S.N.C. di Cancellara Michele & C.,,,,,Agip Eni,1.0,31659.0,...,,,,,,,,,,
3,ferry_terminal,San Basilio,ACTV,,,,,,,,...,,,,,,,,,,
4,ferry_terminal,Palanca,ACTV,,,,,,,,...,,,,,,,,,,


In [11]:
# Create the list of strings to choose for places
shops_text = []

def get_a_an(text):
    vowels = ['a', 'e', 'i', 'o', 'u', 'y']
    if text[0] in vowels:
        return f" an {text}"
    else:
        return f" a {text}"

for i, row in places.iterrows():
    if not pd.isna(row['tourism']):
        shops_text.append("the " + row['name'] + "," + get_a_an(row['tourism'].replace('_', ' ')))
        shops_text.append(f"the {row['tourism'].replace('_', ' ')}, {row['name']}")
    if not pd.isna(row['amenity']):
        if row['amenity'] == "drinking_water":
            shops_text.append(f"the {row['name']}, where you can get a drink of water")
            shops_text.append(f"the drinking fountain at {row['name']}")
        elif row['amenity'] == "fast_food":
            shops_text.append(f"the {row['name']}, a fast food restaurant")
            shops_text.append(f"the fast food restaurant, {row['name']}")
        elif row['amenity'] == "ice_cream":
            shops_text.append(f"the {row['name']}, an ice cream place")
            shops_text.append(f"the ice cream place, {row['name']}")
        else:                 
            shops_text.append(f"the {row['name']}," + get_a_an(row['amenity'].replace('_', ' ')))
            shops_text.append(f"the {row['amenity'].replace('_', ' ')}, {row['name']}")


In [116]:
# Create the list of strings for canals and 'roads' too

canals = []
walks = []

for i, row in canalsdf.iterrows():
    if not pd.isna(row['waterway']):
        string = f"the {row['name']}"
        if string not in canals:
            canals.append(f"the {row['name']}")
    elif not pd.isna(row['highway']):
        string = f"the {row['name']}"
        if string not in walks:
            walks.append(string)

In [None]:
shops_text

In [None]:
canals

In [None]:
walks

In [119]:
# Subset out the squares as important features 

squares = [walk for walk in walks if 'Campo' in walk]

In [None]:
squares

## Code to Run Special Functions

In [462]:
# Used for post-processing GPT2 output.  Not super successful.

from nltk.tokenize import sent_tokenize

def remove_dupe_sents(text):
    sent_tokenize_list = list(set(sent_tokenize(text)))
    return sent_tokenize_list

In [430]:
# Functions used in conjunction with Tracery, as code modifiers.



## This function calls GPT2 and does a bunch to try to rationalize the output.  It often fails with the small model.
def call_model(text, prefix=None, length=100):
    punct_without_period = '!"#$%&\'()*+,-/:;<=>?@[\\]^_`{|}~“'
    # these params don't seem to work quite right?  the prefix still shows up.
    req = requests.post('http://0.0.0.0:8080',
                    json={'length': length, 'temperature': 0.7, 
                        'prefix': text, 'top_k':10, 'include_prefix': 'False'})
    res = req.json()['text']
    res = res.replace(text, '') # does this work right? try to remove the prompt text from the string.
    # remove random punctuation which is common from the small model.
    res = res.translate(str.maketrans('', '', punct_without_period))
    # An artifact of the data training using Gutenberg.
    if 'Illustration' in res:
        res = res.replace('Illustration', ' ')
    res = res.replace('\n', ' ')
    
    # For the output, if you want to specify the actual prepended text - which may differ from the prompt!
    if prefix:
        res = prefix + ' ' + res
    # Try to get rid of repeated sentences.
    splits = remove_dupe_sents(res)
    # Only keep sentences that are longer than junk.  This also often fails.  GPT2 tiny doesn't have sentences.
    splits = [spl for spl in splits if (len(spl.split(' ')) > 1) and (len(spl) > 2)]
    final = ' '.join(splits)
    if final:
        return final
    else:
        return ' '

##  Functions that take text and call GPT2.
def get_stage(text, *params):
    final = call_model(text, prefix="You see ")
    return final.rstrip(".")

def get_views_desc(text, *params):
    final = call_model(f"You see beautiful Venetian views of {text} and", prefix=f"views of {text} ", length=50)
    return final.rstrip(".")

def get_atmosphere(text, *params):
    global sentences
    
    prefix = choice(['\nA tip: ', '\nAside: ', '\nReminder: ', '\nSetting the scene: ', '\nThoughts: ', '\nBy the way: ', 
                    '\nIncidentally: ', '\nSo you know: ', '\nRandom fact: ', '\nTake a note: '])
    if sentences and len(sentences) > 1:
        safesents = [sent for sent in sentences if "<" not in sent]  # get rid of the links
        if safesents:
            inputtext = ' '.join(safesents[-2:])
            if inputtext:
                return call_model(inputtext, prefix=prefix, length=50).rstrip('.')
    else:
        return "\nIn point of fact: It's a cold day in Venice"

def get_square_desc(text, *params):
    final = call_model(f"After winding your way through the alleys of Venice, you enter the square {text}.", prefix=text + ' ', length=50)
    return (text + ' ' + final).rstrip(".")

In [465]:
## Code to make little crops of the Venice jpg map randomly

from PIL import Image

# Set up the info used by the map code here 
img = Image.open("venice-tourist-map_smaller.jpg")
output = "images/"

def get_image_crop(img, output, filenumber):
    width = randint(100, 200)
    height = randint(100, 200)
    imgwidth = img.size[0]
    imgheight = img.size[1]
    left = randint(0, imgwidth-width)
    right = randint(0, imgheight-height)
    box = (left, right, left+width, right+width)
    im1 = img.crop(box)
    #im1.show()
    filename = output + "image_" + str(filenumber) + ".jpg"
    im1.save(filename)
    return filename
    
def get_map(text, *params):
    global maps
    global img
    global output
    
    filename = get_image_crop(img, output, maps)
    maps += 1
    return f"\n<img src='{filename}'>\n"

In [458]:
# Code to get prompt text for GPT2 model based on how far we are thru the total length. Did not work super well.
# Partly because I made the final 'novel' in chunks due to need to check for bugs and stuff.

def get_prefix(text):
    global counter
    global length
    
    fraction = counter/length
    prefixes = {
        'a': ["You've arrived in Venice and you look around you. You see", "You're in Venice now! Everywhere you can see"],
        'b': ["You've been walking a while in Venice and have reached the square. You see",
             "You're at a huge square now, and you see", "In the Venetian square, you see"],
        'c': ["After turning left at the church, you reach the canal. It is now dark. Night in Venice is beautiful. You see",
             "The lights of Venice shine across the water. It's beautiful. You can hear and see"],
        'd': ["The water is high in Venice, often a problem. It has been raining and the canals are full. The water overflows onto the pavement. You see",
             "You wish you had an umbrella. Venice is wet and dark. You see"],
        'e': ["You hurry through the streets of Venice, feeling lost. You've been walking a long time. Is someone following you? You see",
             "The alleys of Venice are narrow. Be careful as you run. You hear behind you"]
    }
    if fraction < .15:
        return choice(prefixes['a'])
    elif fraction < .25:
        return choice(prefixes['b'])
    elif fraction < .5:
        return choice(prefixes['c'])
    elif fraction < .75:
        return choice(prefixes['d'])
    elif fraction <= 1:
        return choice(prefixes['e'])

### Utilities for Tracery to Generate Inputs

In [None]:
def get_time(text, *params):
    return str(randint(1, 20))

def get_distance(text, *params):
    return str(randint(1, 500))

### Flickr Image Search and Return

In [459]:
## Code to get search strings for flickr pics. Again, not perfect because the final output was done in chunks.

def get_photo(text, *params):
    import os
    global counter
    global length
    
    flickrkey = os.environ['FLICKR']
    
    fraction = counter/length
    prefixes = {
        'a': ["Venice square", "Italian church", "Venice fountain", "Venice houses", "Venice cat", "gondola", "Venice bridge"],
        'b': ["Venice dog", "Venice bridge", "Italian church", "Venice cafe", "Venice cats", "Venice restaurant", "Venice woman", "Venice shop"],
        'c': ["Venice night", "Venice lights", "Venice reflection", "Venice water", "Venice sunset"],
        'd': ["Venice floods", "Venice rain", "Venice night", "Venice bar", "Venice evening", "Venice storm"],
        'e': ["Venice alley", "Venice fishing", "Venice gondola night", "Venice darkness", "Venice clouds", "Venice boat"]
    }
    
    if fraction < .15:
        tags = choice(prefixes['a'])
    elif fraction < .25:
        tags = choice(prefixes['b'])
    elif fraction < .5:
        tags = choice(prefixes['c'])
    elif fraction < .75:
        tags = choice(prefixes['d'])
    elif fraction <= 1:
        tags = choice(prefixes['e'])
    
    title = tags
    tags = ','.join(title.split(' '))
    url = get_url(tags, flickrkey.replace("'","")) # ridiculous need to remove single quotes?
    result = requests.get(url)
    res = result.json()
    if 'photos' in res:
        return get_flickr_pic(res)
    else:
        return f"\n##{title}"

### Code to look up flickr images based on search

Re-used from old js code of mine. Ideally the author and photo are credited. I'll add that after the deadline as alt text.

In [460]:
flickrkey = os.environ['FLICKR']

def get_url(tags, key):
    # tags needs to be comma sep string
    baseUrl = f"https://api.flickr.com/services/rest/?method=flickr.photos.search&api_key={key}&per_page=50&format=json&nojsoncallback=1&safe_search=1&content_type=1&tag_mode=all"
    search = f"&tags={tags}"
    return baseUrl + search

def get_flickr_pic(result):
    from random import choice
    if 'photos' in result:
        photo = choice(result['photos']['photo'])
        farmId = photo['farm']
        serverId = photo['server']
        picid = photo['id']
        secret = photo['secret']
        # _z is a size attribute, see https://www.flickr.com/services/api/misc.urls.html
        newurl = f"\n<img src='http://farm{farmId}.staticflickr.com/{serverId}/{picid}_{secret}_m.jpg'>\n"
        return newurl
    else:
        return ' '

### Create the Utility Functions for Tracery Use

In [431]:
functions = {
    'get_time': get_time,
    'get_distance': get_distance,
    'get_stage': get_stage,
    'get_prefix': get_prefix,
    'view_desc': get_views_desc,
    'get_atmo': get_atmosphere,
    'get_square': get_square_desc,
    'get_photo': get_photo,
    'get_map': get_map
}

### Tracery Main Grammar

Ideally I'd have this grammar mutate as you go thru the 'book' - it was too hard and I ran out of time.  Some of the functions above that look at length and how far you are to choose prompts for GPT2 are proxying that.  It was not a good way to go.

In [427]:
rules = {
    "origin": ["#story#"],
    "story": ["Now #direction#.", "#direction.capitalize#.", "You will need to #direction#.", "Then, #direction#.", 
              "Now, #direction#.", "#arrive.capitalize#.", "#arrive.capitalize#.", "#boat.capitalize#.", 
              "#boat.capitalize#.",
              "#atmosphere#.", "#atmosphere#.", "#photo#", "#map#", "#newpara#"],
    "direction": ["#dir# #where#", "#dir# #howlong#", "#boat#", "#take#"],
    "dir": ["turn #compass#", "go #compass#", "take a #compass#", "veer #compass#", "#straight#"],
    "take": ["take the #nth# #compass#", "take the #nth# #compass#", "take the #nth# turn #compass#",
            "after the #nth# turn, #dir#", "pass the #nth# alleyway", "go by the #nth# door and #dir#"],
    "nth": ['first', 'second', 'third', 'fourth', 'fifth', 'sixth', 'seventh', 'eighth', 'ninth', 'tenth'],
    "compass": ["left #place#", "right #place#",],
    "straight": ["go straight ahead", "go ahead", "go straight on for #time.get_time# minutes", 
                 "walk without turning around"],
    "howlong": ["after #modifier# #distance.get_distance# meters", "and go for #modifier# #time.get_time# minutes", 
                "for #modifier# #time.get_time# minutes, #how#"],
    "how": ["walking quickly", "while meandering to admire the views of #views of.view_desc#",
            "striding swiftly", "fighting through the crowds here", "where you can smell #smells of#",
            "trying not to think of #smells of#", "enjoying the views of #views of.view_desc#"],
    "where": ["over the bridge", "past #shop#", "beyond #shop#, where a cat watches from the window #catdetail#",
              "through the tour group", "and just past the cat who sits there #catdetail#", 
              "and go past the dogs, #dogdetail#", "where you can smell #smells of#",
             "past #squares#, #square_desc#", "through #squares# without stopping"],
    "modifier": ["exactly", "about", "approximately", "no less than", "at least", "precisely", "around"],
    "boat": ["take a boat at the #canals#. #boatstuff#", "take a gondola at #canals#. #boatstuff#", 
             "hire a boat at #canals#. #boatstuff#", "look for a water taxi when you reach #canals#"],
    "arrive": ["you will come to #shop#. #setting.get_stage#", "you will see #shop#. #setting.get_stage#", 
               "now you will arrive at #canals#. #setting.get_stage#", "you will find #roads#. #setting.get_stage#", 
               "#atmosphere.capitalize#", "you will enter #squares.get_square#",
               "here is a nice big square, #squares.get_square#",
               "you will find yourself in #squares.get_square#",
              "you find yourself in #squares#, #square_desc#", 
               "here is a nice big square, #squares#, #square_desc#"],
    "views of": ["the church", "the canal", "the piazza", "the children playing", "the little gondolas", "the islands",
                "the people in the cafes", "the colorful houses", "the old doors", "the ancient signs"],
    "smells of": ["the ocean", "fish", "frying foods", "pasta", "rain", "mold"],
    "place": ["on #canals#", "onto #roads#", "after the passageway #roads#"],
    "catdetail": ["(is this a cat you have seen before?)", "(this cat seems familiar)", "(this cat is #color#)",
                  "(this #color# cat reminds you of another cat)", "(is this cat following you?)"],
    "dogdetail": ["without petting them", "--ok these are ones you can pet", "which might bark at you", 
                  "who might follow you for a while", "who are playing with the children", 
                  "which are not dangerous"],
    "color": ["orange", "tortoise-shell", "gray", "black", "striped", "white"],
    "boatstuff": ["Tell your pilot to take you to the stop closest to #shop#", "Show them your money, first", 
                  "While you relax in the boat, admire #views of.view_desc#", 
                  "Get off the boat after #time.get_time# minutes", "Look for the cheaper water taxis first",
                 "Get out when you reach the #roads#", "Ask to be let off at #shop#"],
    "square_desc": ["where there is a covered stone well", "where there are empty benches", 
                    "where there is fountain running into a plastic bucket", "where there is laundry hanging overhead",
                   "where children are playing", "where old men are arguing", "where tourists sit at cafes",
                   "where there are signs for #roads#", "where there is #shop#"],
    "shop": shops_text,
    "canals": canals,
    "roads": walks,
    "squares": squares,
    "photo": ["#photo_tag.get_photo#"],
    "photo_tag": ["PHOTO: "],
    "setting": ["#hi.get_prefix#"],
    "atmosphere": ["#hi.get_atmo#"],
    "map": ["#hi.get_map#"],
    "newpara": ['\n\n']
}

In [468]:
# Main function.  Some globals here that need to be seen and modified by other functions and I didn't have time for
# refactoring well.

def generate_sentences(rules, otherlibs=[base_english, functions], howmany=10):
    global sentences  # needs to be seen by some of the code looking for context input to GPT2 model
    global counter  # used to choose clauses in some modifier functions
    global length
    
    length = howmany
    sentences = []
    wordcount = 0
    
    grammar = tracery.Grammar(rules)
    for lib in otherlibs:
        grammar.add_modifiers(lib)
     
    for i in range(0, howmany):
        counter = i
        sent = grammar.flatten("#origin#")
        wordcount += len(sent.split(' '))
        print(sent)
        sentences.append(sent)
    print("words", wordcount)
    return sentences

In [469]:
# Run the loop over and over and over...  and write the sentences to the file.

maps = 0  # only run this if you start fresh - the number is needed for the filenames in the output.
sentences = generate_sentences(rules, howmany=5)

Here is a nice big square, the Campo dei Miracoli The Castle of the Princesses  There is a castle at Venice called the Castle of the Princess the Campo dei Miracoli  There you will find a large house with all the furniture furniture and furniture of a great city.
Now, take the second right on the Rio di Sant'Angelo.
Then, veer left on the Rio Novo and go for at least 5 minutes.

Thoughts:                                     The next time you run across a hill you must turn right and go.
You will find the Corte Pugliese. You see  and you know that if you go near the sea you may be caught and you shall be saved.
words 154


In [470]:
len(sentences)

5

In [472]:
# global vars for state/links

maps, counter, length

(220, 4, 5)

## Save the Results

In [394]:
# Append or Write to a Fresh File

with open("results_raw.md", "w") as handle:
    for sent in sentences:
        handle.write(sent + "\n")

In [395]:
!wc -w results_raw.md

   59680 results_raw.md


### Final Paragraphs

In [443]:
endrules = {
    "origin": ["""
        \nAre you there yet, you wonder?\n\nFinally, after a few miles of walking, you enter #squares.get_square#.\n#decorate#\nYou have definitely seen the cat here, who purrs a greeting from the top of the old well. On your left is #shop#.  The prices are reasonable and it's a little harder to find, which is why I recommend it.\n\nI hope you enjoy your stay in Venice! (You can take these directions with you.)\n#decorate#
        """],
    "shop": shops_text,
    "decorate": ["#photo#", "#map#", "#newpara#"],
    "squares": squares,
    "photo": ["#photo_tag.get_photo#"],
    "photo_tag": ["PHOTO: "],
    "map": ["#hi.get_map#"],
    "newpara": ['\n\n']
                }
                

In [445]:
# let's just pick the best one for the end, frankly.

sentences = generate_sentences(endrules, howmany=5)


        
Are you there yet, you wonder?

Finally, after a few miles of walking, you enter the Campo dei Mori the Campo dei Mori  It has a little garden on the roof and a small cottage on the other side of it. From the house of the famous poet who was a member of the Court of the Italian Court you will soon reach the place of.

<img src='images/image_217.jpg'>

You have definitely seen the cat here, who purrs a greeting from the top of the old well. On your left is the ferry terminal, Tre Archi.  The prices are reasonable and it's a little harder to find, which is why I recommend it.

I hope you enjoy your stay in Venice! (You can take these directions with you.)

<img src='http://farm66.staticflickr.com/65535/48721479088_4fa39cabc5_m.jpg'>

        

        
Are you there yet, you wonder?

Finally, after a few miles of walking, you enter the Campo Sant'Agnese This is the palace of the Emperor of the Italian Empire. In it in front of the Palace of the Emperor sits the Campo Sant'Agnes

In [448]:
sentences[3]

"\n        \nAre you there yet, you wonder?\n\nFinally, after a few miles of walking, you enter the Campo San Zulian The door is in the middle of the street and you pass through the door of a room of beautiful marble. There is a the Campo San Zulian   You pass through a passage of marble and you pass through a door of a great roof.\n\n<img src='http://farm66.staticflickr.com/65535/48989760408_b861644fdd_m.jpg'>\n\nYou have definitely seen the cat here, who purrs a greeting from the top of the old well. On your left is the A Beccafico Arte, a restaurant.  The prices are reasonable and it's a little harder to find, which is why I recommend it.\n\nI hope you enjoy your stay in Venice! (You can take these directions with you.)\n\n<img src='http://farm66.staticflickr.com/65535/49079183322_8acae59688_m.jpg'>\n\n        "

In [449]:
with open("results_raw.md", "a") as handle:
    handle.write(sentences[3] + "\n")

In [None]:
# ship it.