# Export metadata to django fixture

In [1]:
import os, sys
import pandas as pd
import json
from datetime import datetime as dt

In [2]:
sys.path.append('../src')
import utils


In [3]:
def create_django_datetimestamp(dt_object=None):
    
    if dt_object==None:
        created_time = dt.now()
    else:
        created_time = dt_object
    # for django, timefield must be in format YYYY-MM-DD HH:MM[:ss[.uuuuuu]][TZ]
    # e.g. "2020-05-26T11:40:56+01:00"
    created_time = created_time.strftime('%Y-%m-%dT%H:%M:%S+01:00')
    
    return created_time

In [4]:
def df_to_json_fixture(df,
                       app_name,
                       model_name,
                       file_name_modifier='',
                       output_folder=None,
                       use_df_index_as_pk=False,
                       pk_start_num=1000,
                       create_datetimefield_name=None,
                       created_by_field_name=None,
                       created_by_value=1):
    
    """
    convert a dataframe to a django fixture file to populate an database
    each column becomes a field in the record
    
    df,
    app_name: app name in django,
    model_name: model name in django
    folder: destination folder to output files to
    use_df_index_as_pk: if True df.index will become the primary key for records
    no checks are performed
    pk_start_num: if use_df_index_as_pk is False, primary keys will start at this
    number
    create_datetimefield_name: set to the name of the datetimefield for
    recording when a record is created.
    """

    model = "{}.{}".format(app_name, model_name)
    
    if create_datetimefield_name:
        created_time = create_django_datetimestamp()
        df[create_datetimefield_name] = created_time
    
    if created_by_field_name:
        df[created_by_field_name] = created_by_value
    
    fixture_lst = []
    for i, row in df.reset_index().iterrows():
        
        if use_df_index_as_pk==True:       
            pk = row['index']
        
        else:
            pk = i+pk_start_num
        
        fields_dict = row.drop(['index']).to_dict()
        
        record = {'model':model, 
               'pk':pk,
               'fields': fields_dict}
        fixture_lst.append(record)
    
    fname = model_name+'{}.json'.format(file_name_modifier)
    if output_folder==None:
        output_folder = '../data/processed/fixtures'
        
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
        
    fpath = os.path.join(output_folder, fname)
    
    if os.path.exists(fpath):
        raise Exception('did not save, file already exists: {}'.format(fpath))

    with open(fpath, 'w') as f:
        json.dump(fixture_lst, 
                  f, 
                  skipkeys=False, 
                  sort_keys=False)

    return fixture_lst

In [7]:
fpath = os.path.join('..','data','interim','graphik_portal_202101182225.csv')
df = pd.read_csv(fpath, index_col='object_id')
df = df.reset_index().sort_values(by=['object_id'])

df = df.rename(columns={'img_url':'image_url'})
cols = ['object_id','title', 'image_url', 'detail_url', 'detail_description']
# df = df[cols]


In [11]:
amazon_base_url = "https://grs-thumbnails.s3.eu-central-1.amazonaws.com/"

In [15]:
df['image_url'] = df['img_path'].str.replace("../data/processed/",amazon_base_url)
df = df.drop(columns=['img_path'])

In [16]:
df.head()

Unnamed: 0,object_id,title,image_url,detail_url,detail_description
0,3,Marcus Curtius stürzt sich in die Erdspalte,https://grs-thumbnails.s3.eu-central-1.amazona...,https://www.graphikportal.org/document/gpo0021...,Monogrammist IB [Nagler III 1950] (Erwähnt um ...
1,18,Die Philister bringen die Bundeslade in den Te...,https://grs-thumbnails.s3.eu-central-1.amazona...,https://www.graphikportal.org/document/gpo0021...,"Battista Franco (Um 1510 - 1561), Um 1525 - 1561"
2,19,Der grosse Saal im Schloss in Prag [Linke Bild...,https://grs-thumbnails.s3.eu-central-1.amazona...,https://www.graphikportal.org/document/gpo0021...,Egidius Sadeler (der Jüngere) (Um 1570 - 1629)...
3,33,Die schöne Försterin,https://grs-thumbnails.s3.eu-central-1.amazona...,https://www.graphikportal.org/document/gpo0021...,"Henry Wyatt (1794 - 1840), nach, 1835, Francis..."
4,52,Stigmatisation des heiligen Franziskus,https://grs-thumbnails.s3.eu-central-1.amazona...,https://www.graphikportal.org/document/gpo0021...,"Agostino Carracci (1557 - 1602), Ca. 1583"


In [18]:
assert cols == df.columns.tolist()


In [20]:
fixture_dict = df_to_json_fixture(df,
                   'ImageSearch',
                   'ImageMetadata',
                   file_name_modifier='',
                   output_folder=None,
                   use_df_index_as_pk=False,
                   pk_start_num=1000,
                   create_datetimefield_name='created_date',
                   created_by_field_name=None,
                   created_by_value=1)

In [10]:
df.head()

Unnamed: 0,object_id,title,image_url,detail_url,detail_description,created_date
0,3,Marcus Curtius stürzt sich in die Erdspalte,https://www.e-gs.ethz.ch/eMP/eMuseumPlus?servi...,https://www.graphikportal.org/document/gpo0021...,Monogrammist IB [Nagler III 1950] (Erwähnt um ...,2021-01-19T09:00:21+01:00
1,18,Die Philister bringen die Bundeslade in den Te...,https://www.e-gs.ethz.ch/eMP/eMuseumPlus?servi...,https://www.graphikportal.org/document/gpo0021...,"Battista Franco (Um 1510 - 1561), Um 1525 - 1561",2021-01-19T09:00:21+01:00
2,19,Der grosse Saal im Schloss in Prag [Linke Bild...,https://www.e-gs.ethz.ch/eMP/eMuseumPlus?servi...,https://www.graphikportal.org/document/gpo0021...,Egidius Sadeler (der Jüngere) (Um 1570 - 1629)...,2021-01-19T09:00:21+01:00
3,33,Die schöne Försterin,https://www.e-gs.ethz.ch/eMP/eMuseumPlus?servi...,https://www.graphikportal.org/document/gpo0021...,"Henry Wyatt (1794 - 1840), nach, 1835, Francis...",2021-01-19T09:00:21+01:00
4,52,Stigmatisation des heiligen Franziskus,https://www.e-gs.ethz.ch/eMP/eMuseumPlus?servi...,https://www.graphikportal.org/document/gpo0021...,"Agostino Carracci (1557 - 1602), Ca. 1583",2021-01-19T09:00:21+01:00
