In [1]:
import os
import sys
from datetime import date
import ast


def get_conf(spark_context, application_conf_key='app_conf'):
    return ast.literal_eval(str(spark_context.getConf().get(application_conf_key)))


def read_configuration():

    return build_dev_conf(sys.argv)

def build_dev_conf(argv):
    return build_conf(
            forceUserRecommendations=True,
            numIterations=10,
            program_feed='/dbfs/mnt/tonja/similartitles-program-feed/similartitles-program-feed.json',
            raw_data='/mnt/tonja/mtgapi-recommendation/Prod/input-viewing-data-iq/*/*/*/*/*',
            recommendForUsers=True,
            write_prods_path=date.today().strftime('dbfs:/mnt/tonja/cf/similar-titles/%s' % ('%Y/%m/%d')),
            write_users_path='dbfs:/mnt/tonja/cf/recommendations/')


def build_conf(raw_data=None,
        forceUserRecommendations=None,
        minSeriesViewings=1,
        numIterations=50,
        program_feed=None,
        recommendForUsers=None,
        write_prods_path=None,
        write_users_path=None
    ):

    return {
        'read': {
            'rawData': raw_data,
            'programFeed': program_feed
        },
        'write': {
            'prods': write_prods_path,
            'users': write_users_path
        },
        'numberRecommended': 2000,
        'numberThreads': 100,
        'successFile': '_SUCCESS',
        'recommendForUsers':  recommendForUsers,
        'forceUserRecommendations': forceUserRecommendations,
        'hyperparams': {
            'als': {
                'rank': 30,
                'numIterations': numIterations,
                'alpha': 60.0,
                'lambda': 0.01,
                'decayMin': 0.5,
                'minDays': 180
            },
            'similarityWeights': {
                'cosine': 0.97,
                'eucl': 0.14,
                'chebychev': 0.10,
                'jsdistance': 0.84
            }
        },
        'cleaning': {
            'minSeriesViewings': minSeriesViewings,
            'maxDays': 180,
            'datasets': {
                'children': {
                    'suffix': '_kids',
                    'tagIds': ['98856437', '99368295', '21079080386']  # kidSeriesTagId, kidMoviesTagId, kidsTagId
                },
                'regular': {
                    'suffix': '',
                    'tagIds': None  # None equals "the rest"
                }
            }
        }
    }

In [2]:
%sh
ls /dbfs/databricks/cf/recommendations