* Collect a file, capture <filename>
* render the image as html with a form
* submit album, location, caption, tags
* create <filename1>, which is <filename> + N times <filename has been seen>
* capture width, height
* save file to all-photos/<filename1>
* create JSON:
    
    {filename: filename1, width: width, height: height, caption: caption, tags: tags, location:location, album:album>

In [1]:
import json
class JSON(object):
    def __init__(self, path='data/info.json', dirname='data/records'):
        self.path = path
        self.dirname = dirname
        if not os.path.exists(self.path):
            self._make_new()
        
    def _make_new(self):
        with open(self.path, 'w') as f:
            json.dump([], f)
    
    def update(self, value):
        obj = self.get_data()
        obj.append(value)
        with open(self.path, 'w') as f:
            json.dump(obj, f)
            
    def get_data(self):
        with open(self.path, 'r') as f:
            obj = json.load(f)
        return obj
    
    def to_mongo_data(self):
        data = self.get_data()
        for i, doc in enumerate(data):
            ID = doc['image_id']
            with open(os.path.join(self.dirname, "{}.json".format(ID)), 'w') as f:
                json.dump(doc, f)
                
    def get_values(self, field):
        return [i.get(field, None) for i in self.get_data()]

In [2]:
# from ipywidgets import SelectMultiple, Checkbox, Layout, Checkbox
# g = SelectMultiple(options=[1,2,3], layout=Layout(width='200px', height='{}px'.format(20 * 3)))
# #g = Checkbox(description='use last')
# #for i in set(data.get_values('type')):
# #    display(Checkbox(description=i))
# display(g)

In [3]:
import json, os
from ipywidgets import Text, Image, HBox, VBox, Textarea, Layout, Checkbox, SelectMultiple
from IPython.display import display, clear_output
from PIL import Image as pil
from ipywidgets import Button
from ipywidgets import HTML as iHTML

def get_image_data(filename):
    with open(filename, 'rb') as img:
        data = img.read()
    return data

def get_image_dim(filename):
    """returns width, height"""
    with pil.open(filename) as img:
        return img.size
    
def get_image_widget(filename, width=300, height=200):
    """returns width, height"""
    widget = Image(value=get_image_data(filename), width=width, height=height) 
    return widget

def get_video_widget(path):

    video_data = """
    <video width="320" height="240" controls>
      <source src="{}">
    Your browser does not support the video tag.
    </video>
    """.format(path)
    widget = iHTML(value=video_data)
    
    return widget

def flatten(L):
    return [item for sublist in L for item in sublist]

class form(object):
    def __init__(self, paths, data):
        """
        paths needs to be a generator
        """
        self.data = data
        self.paths = paths
        self.target_image_dir = 'public/photos/all-photos'
        self.messages = ""
        self.last = {}
        self.use_last = Checkbox(value=False, description="Use last")        
        self.make_new_form()
        
    @property 
    def seen(self):
        data = self.data.get_data()
        return [i['src'] for i in data]

    def next(self):
        try:
            self.path = next(self.paths)
            if self.path in self.seen:
                self.messages += "skipping {}, already processed".format(self.path)
                return self.next()
            else:
                return True
        except StopIteration:
            print("All photos processed! Exiting")
            return False
            
    def get_object_data(self):
        with open(self.path, 'rb') as f:
            data = f.read()
        return data
    
    def scrape_album_entries(self):
        results = list(self.pick_album.value)
        if self.new_album.value:
            for entry in self.new_album.value.split(","):
                results.append(entry.strip())
        return results
        
        
    def get_seen_albums(self):
        return set(filter(lambda x: x is not None, flatten(self.data.get_values('album'))))
        
    def make_new_form(self):
        clear_output()
        self.messagebox = Textarea(self.messages)
        self.messages = ""
        keep_going = self.next()
        if keep_going:
            self.type = self.get_type()
            self.object_data = self.get_object_data()
            print("Currently on {}".format(self.path))
            if self.type in ('video', 'image'):  
                self.use_last = Checkbox(value=self.last.get('use_last', False), description="Use last")
                self.caption = Text(value=self.last.get('caption', ""), description='caption')
                self.location = Text(value=self.last.get('location', ""), description='location')
                self.tags = Text(value=self.last.get('tags', ""), description='tags')
                albums = self.get_seen_albums()
                self.pick_album = SelectMultiple(
                    options=albums, 
                    layout=Layout(width='200px', height='{}px'.format(20 * len(albums)))
                )
                self.pick_album.value = self.last.get('pick_album', tuple())
                self.new_album = Text("", description='Add new album')
                self.skip = Button(description="skip")
                self.skip.on_click(self.save_other)
                self.submit = Button(description="submit")
                self.build_last()
                if self.type == 'image':
                    self.image = get_image_widget(self.path)    
                    self.submit.on_click(self.save_image)                

                elif self.type == 'video':
                    self.image = get_video_widget(self.path)   
                    self.submit.on_click(self.save_video)

                self.button_box = HBox([self.submit, self.skip])                

                self.input_form = VBox([
                    self.use_last,
                    self.caption, 
                    self.location, 
                    self.tags,
                    self.pick_album,
                    self.new_album,
                    self.button_box])

                self.imagebox = VBox([self.image, self.messagebox])
                self.final = HBox([self.input_form, self.imagebox])

                display(self.final)
            else:
                print("Currently on {}".format(self.path))            
                self.messages += "skipping {}, not an image or video\n".format(self.path)
                self.submit = Button(description="submit")
                self.submit.on_click(self.save_other)
                self.messagebox = Textarea(self.messages)


                display(self.messagebox)
                display(self.submit)
                self.build_last()
            
    def build_last(self):
        if self.use_last.value is True:
            self.last = {
                'caption': self.caption.value,
                'location': self.location.value,
                'tags': self.tags.value,
                'use_last': self.use_last.value,
                'pick_album': self.pick_album.value
            }
        else:
            self.last = {
                'use_last': self.use_last.value
            }
    def make_name(self):
        return self.prep_filename(self.parse_filename(self.path))
        
    def parse_filename(self, path):
        return path.split("/")[-1]
        
    def prep_filename(self, filename):
        filename = filename.lower()
        split = filename.split(".")
        ext = split[-1]
        name ="".join(split[:-1])
        #name, ext = filename.split(".")
        n = len(self.seen)
        filename = "img-{0}.{1}".format(n, ext)
        return filename
    
    def save_image_object(self):
        target = os.path.join(self.target_image_dir, self.make_name())
        with open(target, 'wb') as target_file:
            target_file.write(self.get_object_data())
        self.messages += "saved image to {} \n".format(target)
        
    def save_video_object(self):
        target = os.path.join(self.target_image_dir, self.make_name())
        with open(target, 'wb') as target_file:
            target_file.write(self.get_object_data())
        self.messages += "saved video to {} \n".format(target)    
            
    def save_image_form(self):
        self.data.update(self.image_to_form()) 
        self.messages += "updated metadata to {} \n".format(data.path)
        
    def save_video_form(self):
        self.data.update(self.video_to_form()) 
        self.messages += "updated metadata to {} \n".format(data.path)
        
    def save_other_form(self):     
        self.data.update(self.skipped_to_form()) 
        self.messages += "updated metadata with skipped entry to {} \n".format(data.path)  
        
    def save_image(self, b):
        self.build_last()
        self.save_image_object()
        self.save_image_form()
        self.make_new_form()      
        
    def save_other(self, b):       
        self.build_last()        
        self.save_other_form()
        self.make_new_form()
        
    def save_video(self, b):
        self.build_last()        
        self.save_image_object()
        self.save_video_form()
        self.make_new_form()        
        
    def skipped_to_form(self):
        return {
            "display": False,
            "caption": "",
            "tags": "",
            "location": "",
            'image_id': self.make_name(),
            'src': self.path,
            'type': "",
            'width': 0,
            'height': 0,
            'album': "",        
        }        

    def image_to_form(self):
        width, height = get_image_dim(self.path)
        return {
            "display": True,
            "caption": self.caption.value,
            "tags": self.tags.value,
            "location": self.location.value,
            'image_id': self.make_name(),
            'src': self.path,
            'type': 'image',
            'width': width,
            'height': height,
            'album': self.scrape_album_entries()            
        }
    
    def video_to_form(self):
        return {
            "display": True,            
            "caption": self.caption.value,
            "tags": self.tags.value,
            "location": self.location.value,
            'image_id': self.make_name(),
            'src': self.path,
            'type': 'video',
            'width': 0,
            'height': 0,
            'album': self.scrape_album_entries()
        }    
    
    def get_type(self):
        ext = self.path.split(".")[-1]
        if ext.lower() in ('mov', 'm4v'):
            return 'video'
        elif ext.lower() in ('jpg', 'png', 'jpg-3'):
            return 'image'
        else:
            return 'other'

In [4]:
def yield_files(directory):
    for dirpath, dirnames, filenames in os.walk(directory):
        for dirname in dirnames:
            yield_files(dirname)
        for fname in filenames:
            if '.DS_Store' not in fname and 'all-photos' not in dirpath:
                yield os.path.join(dirpath, fname)

In [5]:
data = JSON()
f = form(yield_files('public/photos/queue'), data)

Currently on public/photos/queue/IMG_0269.jpg


In [6]:
data.to_mongo_data()

In [8]:
import os
try:
    from pymongo import MongoClient
except ModuleNotFoundError:
    !conda install -y pymongo
    from pymongo import MongoClient
with open('.mongo/.credentials') as f:
    lines = f.readlines()
    db_url = list(filter(lambda x: 'db_url' in x,lines))[0].split("=")[1].strip()
client = MongoClient(db_url)
db = client['kira-photos']
collection = db.get_collection('photos')
records = [i for i in collection.find({})]

json_filenames = list(os.walk(data.dirname))[0][2]
list_of_stored_image_ids = [i['image_id'] for i in records]
files_to_write = []
for filename in json_filenames:
    filepath = os.path.join(data.dirname, filename)
    with open(filepath) as f:
        json_file = json.load(f)
    
    if json_file['image_id'] not in list_of_stored_image_ids:
        files_to_write.append(json_file)
collection.insert_many(files_to_write)

Fetching package metadata .............
Solving package specifications: .

Package plan for installation in environment /opt/conda:

The following NEW packages will be INSTALLED:

    pymongo: 3.4.0-py36_0 defaults

pymongo-3.4.0- 100% |################################| Time: 0:00:00   4.88 MB/s


<pymongo.results.InsertManyResult at 0x7f47c0037cf0>

In [361]:
obj = data.get_data()

In [363]:
with open(data.path, 'w') as f:
    json.dump(obj, f)

In [349]:
!cp data/info.json data/info-backup.json 

In [None]:

ls -1 data/*.json | sed 's/.json$//' | while read col; do 
    mongoimport -d db_name -c $col < $col.json; 
done

In [None]:
cd data/records
ls -1 *.json | while read fname; do  
    mongoimport -h ds113826.mlab.com:13826 -d kira-photos -c photos -u aikramer -p m24WAGli8OXs --file $fname
done