### Library Functions

In [5]:
import re
import json
import spacy
import sys, os, unicodedata
import pandas as pd
from datetime import datetime, timedelta
from pandas.io.json import json_normalize

In [6]:
# File Path Constants
URL_IGNORE_LIST_PATH              = './configuration/URL Ignore List.json'
KEYWORD_IGNORE_LIST_PATH          = './configuration/Keyword Ignore List.txt'
USER_SEGMENT_LIST_PATH            = './configuration/part-00000-2aa20d63-3e3a-47d2-8bed-d199cef5b814-c000.json'
DATERANGE_CONFIGURATION_PATH      = './configuration/daterange.txt'
AMAZON_WEB_SERVICE_E3_BASE_FOLDER = r'C:\Users\liferay\Documents\analytics data\export'

In [8]:
MINIMUM_TOPIC_OF_INTEREST_THRESHOLD_SCORE = 1
DECAY_MULTIPLIER_BASE = .90

# https://stackoverflow.com/questions/11066400/remove-punctuation-from-unicode-formatted-strings/11066687#11066687
PUNCTUATION_UNICODE_TABLE = dict.fromkeys(i for i in range(sys.maxunicode) if unicodedata.category(chr(i)).startswith('P'))
NON_ENGLISH_URL_REGEX = re.compile('\/zh(_CN)?\/'
                                   '|\/fr(_FR)?\/'
                                   '|\/de(_DE)?\/'
                                   '|\/it(_IT)?\/'
                                   '|\/ja(_JP|-JP)?\/'
                                   '|\/pt(-br|_BR|_PT)?\/'
                                   '|\/es(-es|_ES)?\/'
                                   '|\/ru\/')
WWW_OR_CUSTOMER_LIFERAY_URL_REGEX = re.compile(r'^https://www\.liferay|^https://customer\.liferay')
BOT_AND_CRAWLER_REGEX = re.compile('((.*)(bot|Bot)(.*)'
                                   '|(.*)spider(.*)'
                                   '|(.*)crawler(.*)'
                                   '|HubSpot'
                                   '|CloudFlare\-AlwaysOnline'
                                   '|WkHTMLtoPDF)')
PARENTHESIS_REGEX = re.compile(u'\(.*?\)')
BANNED_KEYWORDS_LIST = []
INTEREST_CALCULATION_WINDOW_TIMEDELTA = timedelta(30)

DATE_RANGE_OPTIONS = {
    'day'   : timedelta(1),
    'week'  : timedelta(7),
    'month' : timedelta(30)
}

UTM_PARAMETERS = ['utm_source', 'utm_medium', 'utm_campaign', 'utm_term', 'utm_content']
HUBSPOT_PARAMETERS = ['_hsenc', '_hsmi', '__hstc', '__hssc', '__hsfp']
GOOGLE_ANALYTICS_PARAMETERS = ['_ga', '_gac']
URL_REDIRECT_PARAMETERS = ['redirect', '_3_WAR_osbknowledgebaseportlet_redirect']
ALL_OPTIONAL_URL_PARAMETERS = UTM_PARAMETERS + HUBSPOT_PARAMETERS + GOOGLE_ANALYTICS_PARAMETERS + URL_REDIRECT_PARAMETERS

with open(KEYWORD_IGNORE_LIST_PATH, 'r', encoding='utf-8') as f:
    for line in f:
        BANNED_KEYWORDS_LIST.append(line.strip())

nlp = spacy.load('en')

# Populate URL Ignore List
URL_IGNORE_LIST_MATCH = []
URL_IGNORE_LIST_CONTAINS = []

with open(URL_IGNORE_LIST_PATH, 'r', encoding='utf-8') as f:
    for line in f:
        json_result = json.loads(line)      
        comparison_type = json_result['Type']
        
        if comparison_type == 'match':
            URL_IGNORE_LIST_MATCH = json_result['URLs']
        elif comparison_type == 'contains':
            URL_IGNORE_LIST_CONTAINS = json_result['URLs']
        else:
            print("UNEXPECTED TYPE: {}".format(comparison_type))

START_DATE_STRING = 0
END_DATE_STRING = 0
START_DATE_DATETIME = 0
END_DATE_DATETIME = 0
CALCULATE_YESTERDAY_ONLY = False

In [45]:
# Read configuration file for start/end dates
with open(DATERANGE_CONFIGURATION_PATH, 'r', encoding='utf-8') as f:
    
    # First parameter is to calculate 'all' or only 'yesterday' topics of interest
    for line in f:
        # Ignore lines starting with a pound-sign
        if line.startswith('#'):
            continue
        else:
            if line.strip() == 'yesterday':
                CALCULATE_YESTERDAY_ONLY = True
            break
                
    # Second parameter is the start date
    for line in f:
        # Ignore lines starting with a pound-sign
        if line.startswith('#'):
            continue
        else:
            START_DATE_STRING = line.strip()
            START_DATE_DATETIME = datetime.strptime(line.strip(), '%Y%m%d')
            break
            
    # Third parameter is for end date
    for line in f:
        # Ignore lines starting with a pound-sign
        if line.startswith('#'):
            continue
        else:
            if line == 'yesterday':
                END_DATE_DATETIME = (datetime.today() - timedelta(1))
                END_DATE_STRING = END_DATE_DATETIME.strftime('%Y%m%d')
            else:
                END_DATE_STRING = line.strip()
                END_DATE_DATETIME = datetime.strptime(line.strip(), '%Y%m%d')

In [26]:
if False:
    print(START_DATE_STRING)
    print(END_DATE_STRING)

    print(START_DATE_DATETIME)
    print(END_DATE_DATETIME)

    print(CALCULATE_YESTERDAY_ONLY)

20180121
20180514
2018-01-21 00:00:00
2018-05-14 00:12:57.733937
True


#### Augment Tokenizer
The tokenizer fails on many hypenated words, so I wanted to augment it to work better.
Examples: 

* State-of-the-art collaboration platform targets quality patient care.
* Share files with a simple drag-and-drop. Liferay Sync transforms the Liferay platform into a central and secure easy-to-use document sharing service.
* Importing/Exporting Pages and Content - portal - Knowledge | "Liferay

In [28]:
import spacy
from spacy.attrs import *

#from spacy.symbols import ORTH, POS, TAG

# Source: https://github.com/explosion/spaCy/issues/396


nlp = spacy.load('en')
nlp.tokenizer.add_special_case(u'state-of-the-art', [{ORTH: 'state-of-the-art',
                                                      LEMMA: 'state-of-the-art', 
                                                      LOWER: 'state-of-the-art',
                                                      SHAPE: 'xxxxxxxxxxxxxxxx',
                                                      POS: 'ADJ', 
                                                      TAG: 'JJ'}])
nlp.tokenizer.add_special_case(u'State-of-the-art', [{ORTH: 'State-of-the-art',
                                                      LEMMA: 'state-of-the-art', 
                                                      LOWER: 'state-of-the-art',
                                                      SHAPE: 'xxxxxxxxxxxxxxxx',
                                                      POS: 'ADJ', 
                                                      TAG: 'JJ'}])
nlp.tokenizer.add_special_case(u'drag-and-drop', [{ORTH: 'drag-and-drop',
                                                      LEMMA: 'drag-and-drop', 
                                                      LOWER: 'drag-and-drop',
                                                      SHAPE: 'xxxxxxxxxxxxx',
                                                      POS: 'ADJ', 
                                                      TAG: 'JJ'}])

In [29]:
# Library Functions

import re
import langdetect
import string
from collections import OrderedDict
from langdetect.lang_detect_exception import ErrorCode, LangDetectException
from string import printable


def playFinishedSound():
    """
    This is for alerting me that something has finished executing.
    This will play a sound.
    """
    from pygame import mixer
    mixer.init()
    mixer.music.load('./configuration/finished.mp3')
    mixer.music.play()

def replace_punctuation(text):
    """
    The purpose of this function is to replace non-ASCII punctuation with its equivalent.
    """
    return text.replace("’", "'")

def segmentWordsIntoKeyWordPhraseList(words, debug=False):

    phrase_list = []
    
    if debug: print("\nOriginal Sentence: {}".format(words))
    # First segment the words by '|' or '-'
    split_words = re.split(r'[\|]| \- ', words)
    split_words = [s.strip() for s in split_words]
    cleaned_up_and_split_sentences = []
    
    # Search for instances of acronymns surrounded in parenthesis. Ex: (DXP)
    # Remove those, and add it automatically to the phrase list
    for sentence in split_words:
        terms_within_parenthesis = [term[1:-1] for term in re.findall(PARENTHESIS_REGEX, sentence)]
        phrase_list += terms_within_parenthesis
        if debug: print(terms_within_parenthesis)
            
        remaining_text = ''.join(re.split(PARENTHESIS_REGEX, sentence))
        cleaned_up_and_split_sentences.append(remaining_text)
        if debug: print(remaining_text)
        
    for sentence in cleaned_up_and_split_sentences:
        if debug: print("Sentence: {}".format(sentence))
        doc = nlp(sentence)
        for chunk in doc.noun_chunks:
            if debug: print("\tText: {} \n\tRoot: {} \n\tRoot Dependency: {} \n\tRoot Head: {}".format(chunk.text, chunk.root.text, chunk.root.dep_, chunk.root.head.text))
            
            text = chunk.text
            if debug:
                print(text)
                print("\tPOS: {}".format(chunk[0].pos_))
                print("\tTag: {}".format(chunk[0].tag_))
                print("\tChunk[0]: {}".format(chunk[0]))
                
            # Skip keywords that contain CD (Cardinal Numbers) for now
            if 'CD' in [c.tag_ for c in chunk]:
                print("Skipping, contains CD")
                continue
            
            # Skip URLs
            url_found = False
            for token in chunk:
                if debug: print(token)
                if token.like_url:
                    url_found = True
                    print("Skipping, URL Detected! ({})".format(text))
                    break
                    
            if url_found:
                continue
            
            # We'll skip a phrase for now if it contains a number
            # E.g. Free download: Gartner's evaluation of 21 digital 
            # experience platform (DXP) providers based on their completeness of vision and ability to execute
            
            # CD - [5 Critical Things] Nobody Tells You About Building a Journey Map
            # Recursively remove until no more? - These six customer experience trends will shape business in 2018
            if chunk[0].tag_ in ['DT', 'PRP$', 'WP', 'PRP', 'WRB', 'CD', ':']:
                if debug: print("Starting 'ignore word' found in: {}".format(chunk))
                #text = ' '.join(s.text for s in chunk[1:])
                
                unwanted_text = chunk[0].text
                if debug: print("Unwanted text: {}".format(unwanted_text))
                text = chunk[1:].text
                
                # If we shrunk it down to nothing
                if not text:
                    continue
            
            # Removes invisible characters
            printable_string = ''.join(char for char in text.strip() if char in printable)
            
            # Converts string to lower case; if matches criteria
            # Note: Keep acroynmns the same, check if 2 or more letters, and all caps
            printable_string = modifyCapitalizationOfWords(printable_string)
            
            #if 'blog' in printable_string:
            #    print("Original Sentence: [{}]".format(words))
            #    print("Blog Word: [{}]".format(printable_string))
            
            if text == chunk.root.text:
                phrase_list.append(printable_string)
            else:
                phrase_list.append(printable_string)
                #phrase_list.append(chunk.root.text.lower())
            
    if debug: print("Final list: {}".format(phrase_list))
    return phrase_list
    
def modifyCapitalizationOfWords(text):
    """
    This function will take the given noun phrase, and adjust captialization as necessary.
    Currently it only retains acronymn capitalization.
    I should ventually add a proper noun list as well.
    """
    
    updated_text = [word if (len(word) >=2) and (word.upper() == word) else word.lower() for word in text.split()]
    
    return ' '.join(updated_text)
    
def isEnglish(text, debug=False):
    
    # Empty String
    if not text.strip():
        return False
    
    try:
        text.encode(encoding='utf-8').decode('ascii')
    except UnicodeDecodeError:
        if debug:
            print("Failed Unicode Detector")
        return False

    try:
        possible_language_list = langdetect.detect_langs(text)
        
        if debug:
            print(possible_language_list)
        
        for entry in possible_language_list:
            if ((entry.lang == 'en') and (entry.prob > .50)):
                return True
    
        return False

    except LangDetectException:
        print("**** Language Exception caught!")
        display("Original Text: [{}]".format(text))
    
    return True

In [43]:
def get_list_of_date_folders(start_date='20180227', end_date='20180326'):
    start_date = datetime.strptime(start_date, '%Y%m%d')
    end_date = datetime.strptime(end_date, '%Y%m%d')
    step = timedelta(days=1)

    list_of_date_folder_names = []

    while start_date <= end_date:
        date_string = start_date.date().strftime('%Y%m%d')
        list_of_date_folder_names.append(date_string)
        start_date += step

    return list_of_date_folder_names

def read_json_as_list(full_file_path):
    all_web_browsing_history = []

    with open(full_file_path, 'r', encoding='utf-8') as f:
        for counter, line in enumerate(f):
            dict_entry = json.loads(line)       
            all_web_browsing_history.append(dict_entry)
                
    return all_web_browsing_history
                

def convert_string_of_json_to_df(list_of_json):
    start_time = datetime.now()
    df = json_normalize(list_of_json)
    print("\tExecution Time: {}".format(datetime.now() - start_time))
    return df

### Populate Segment Info

In [31]:
# Populate Segment Information
segment_lookup_df = pd.DataFrame()
json_list = read_json_as_list(USER_SEGMENT_LIST_PATH)
segment_lookup_df = json_normalize(json_list)
display(segment_lookup_df)
segment_lookup_df = segment_lookup_df.set_index(['identifier', 'datasource', 'datasourceindividualpk'])['segmentnames'].apply(pd.Series).stack()
segment_lookup_df = pd.DataFrame(segment_lookup_df)
segment_lookup_df = segment_lookup_df.reset_index().rename(columns={0 : 'segmentName'})

# Switch order of columns
segment_lookup_df = segment_lookup_df[['segmentName', 'identifier', 'datasource', 'datasourceindividualpk']]


if False:
    display(temp_df)
    for index, row in temp_df.groupby('segmentName'):
        print("index")
        display(index)
        print("row")
        display(row)
        print("identifier")
        display(row['identifier'])
        break
        


Unnamed: 0,datasource,datasourceindividualpk,datecreated,datemodified,demographicvaluesmap.additionalName,demographicvaluesmap.address,demographicvaluesmap.addressRegion,demographicvaluesmap.age,demographicvaluesmap.birthDate,demographicvaluesmap.browserInfo,...,demographicvaluesmap.salary,demographicvaluesmap.sfCampaign,demographicvaluesmap.sfCampaignId,demographicvaluesmap.telephone,demographicvaluesmap.whatAreYouBuilding,demographicvaluesmap.whatBusinessChallengesAreYouFacing,demographicvaluesmap.whatWouldYouLikeToTalkAbout,demographicvaluesmap.worksFor,identifier,segmentnames
0,AWI_0f2_q_9uZvuIRBN3,AWMcP1kSgcT3bCtqg0B0,2018-03-29T04:31:07.026Z,2018-05-04T16:12:43.025Z,,,[California],,"[Jan 1, 1970]",[Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11;...,...,,,,[323-555-1212],,,[hello from production!],"[Liferay, Inc.]",AWJEyFZzAxvlVqtdUejP,"[test-country-6, test-country-5, test-country-..."
1,AWI_0f2_q_9uZvuIRBN3,AWMrshzkYcRwJ2HMl9L,2018-03-21T18:19:58.147Z,2018-05-04T15:46:13.251Z,,,,,,,...,,,,,,,,,AWJJygFDNu2tno9FNhiS,"[LRDCOM UAT, test-country-8, test-country-5, t..."
2,AWJJqhlLEfU1zWepDky,ce900bd2-cff6-408e-9f31-2fd8df69aaf2,2018-03-20T19:42:55.167Z,2018-05-04T15:02:28.774Z,,,,,"[Jan 1, 1970]",,...,,,,,,,,,AWJE75a_XXGJek5_6fgG,"[LRDCOM UAT, test-country-8, test-country-5, t..."
3,AWI_0f2_q_9uZvuIRBN3,AWMEF885_nPDtHhujxtt,2018-03-29T04:31:06.833Z,2018-05-04T18:29:23.267Z,,,[null],,"[Jan 1, 1970]",[Mozilla/5.0 (iPhone; CPU iPhone OS 11_2 like ...,...,,,,[859-992-9397],,[Knowledge Management],[Testing form],[Liferay],AWJE6fWZXXGJek5_6dee,"[test-country-6, test-country-5, test-country-..."
4,AWI_0f2_q_9uZvuIRBN3,AWJmhhegDxZZoLqfGlvt,2018-03-20T19:05:29.835Z,2018-05-04T09:39:39.803Z,,,[null],,"[Jan 1, 1970]",[Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_...,...,,,,,[Call Center Application],,,[Liferay],AWJEzVPrAxvlVqtdUg5I,"[test-country-6, test-country-5, test-country-..."
5,AWI_0f2_q_9uZvuIRBN3,AWMV_xFj_nPDtHhuj3Xu,2018-03-20T18:59:34.384Z,2018-05-04T18:16:57.836Z,,,,,"[Jan 1, 1970]",,...,,,,,,,,,AWJExdwAxvlVqtdUeMD,"[test-country-8, test-country-5, test-country-..."
6,AWI_0f2_q_9uZvuIRBN3,AWMfjwNOgcT3bCtqg166,2018-03-28T00:06:48.880Z,2018-05-04T16:39:38.839Z,,[Thaltej],,,"[Jan 1, 1970]",,...,,,,[9825500947],,,,,AWJE8CAv526STMIypds6,"[test-country-6, test-country-5, LRDCOM UAT, t..."
7,AWI_0f2_q_9uZvuIRBN3,AWLieIKmsCV4XVL0I1xJ,2018-03-29T04:30:54.581Z,2018-05-04T09:57:15.658Z,,,[null],,"[Jan 1, 1970]",[Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_...,...,,[70170000001Letn],[70170000001Letn],[669943712],,,[Test sobre nuevo formulario],[Liferay],AWJEyQt2U_NIucVEYIP3,"[test-country-6, test-country-5, test-country-..."
8,AWJESY7Tq_9uZvuIRDlC,1084893,2018-03-20T19:04:16.309Z,2018-05-04T11:14:13.447Z,,,,,"[Jan 1, 1970]",,...,,,,,,,,,AWJEzDS1U_NIucVEYJcz,"[test-country-8, test-country-5, test-country-..."
9,AWJESY7Tq_9uZvuIRDlC,204980,2018-03-20T19:05:29.835Z,2018-05-04T09:39:39.803Z,,,[null],,"[Jan 1, 1970]",[Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_...,...,,,,,[Call Center Application],,,[Liferay],AWJEzVPrAxvlVqtdUg5I,"[test-country-6, test-country-5, test-country-..."


In [39]:
if False: display(segment_lookup_df)

### ETL Functions

In [33]:
import re
import numpy as np
import pandas as pd
from furl import furl

def show_dataframe_length_before_and_after(f, df):
    print("\tBefore: {}".format(len(df)))
    df = f(df)
    print("\tAfter: {}".format(len(df)))
    return df

def keep_only_unload_events(df):
    df = df[df['eventid'] == 'unload']
    return df

def remove_all_bots(df):
    df = df[~df['context.crawler'].str.contains('True', na=False)]
    df = df[~df['context.userAgent'].str.match(BOT_AND_CRAWLER_REGEX, na=False)]
    return df

def remove_non_english_urls(df):
    df = df[~df['context.url'].str.contains(NON_ENGLISH_URL_REGEX, na=False)]
    return df

def populate_url_ignore_list(df):
    
    URL_IGNORE_LIST_MATCH_REGEX_STRING    = '|'.join(['^{}$'.format(s.strip()) for s in URL_IGNORE_LIST_MATCH])
    URL_IGNORE_LIST_CONTAINS_REGEX_STRING = '|'.join(URL_IGNORE_LIST_CONTAINS)

    # TODO: Maybe use 'normalized_url' only?
    df['Ignore URL'] = df['context.url'].str.match(URL_IGNORE_LIST_MATCH_REGEX_STRING) \
                     | df['context.og:url'].str.match(URL_IGNORE_LIST_MATCH_REGEX_STRING) \
                     | df['context.url'].str.match(URL_IGNORE_LIST_CONTAINS_REGEX_STRING) \
                     | df['context.og:url'].str.match(URL_IGNORE_LIST_CONTAINS_REGEX_STRING)
    return df

def remove_non_customer_www_lr_urls(df):
    df = df[df['context.url'].str.contains(WWW_OR_CUSTOMER_LIFERAY_URL_REGEX, na=False)]
    return df

def remove_empty_user_id_entries(df):
    df['userid'].replace('', np.nan, inplace=True)
    df.dropna(subset=['userid'], inplace=True)
    return df

def __removeUrlParameters(url, parameter_list):  
    f = furl(url)
    remaining_parameters = { k: f.args[k] for k in f.args if k not in parameter_list }
    f.args = remaining_parameters    
    return f.url

def populateNormalizedUrlField(df):
    df['normalized_url'] = df['context.og:url'].fillna(df['context.url'])
    df['normalized_url'] = df['normalized_url'].apply(lambda x: __removeUrlParameters(x, ALL_OPTIONAL_URL_PARAMETERS))
    return df

def replaceBlankSpacesWithNan(df):
    # '\s+' is 1 or more
    df.replace(r'^\s*$', np.nan, regex=True, inplace=True)
    return df

def filterUnwantedColumns(df):
    wanted_columns_list = ['eventdate', 
                           'analyticskey', 
                           'userid', 
                           'eventid', 
                           'Ignore URL',
                           'normalized_url',
                           'context.url', 'context.og:url', 
                           'context.title', 'context.og:title', 
                           'context.description', 'context.og:description', 
                           'context.keywords', 
                           'eventproperties.scrollDepth', 
                           'eventproperties.viewDuration', 
                           'context.userAgent', 
                           'context.platformName', 
                           'context.browserName', 
                           'context.country', 
                           'context.region', 
                           'context.city', 
                           'clientip']
    df = df[wanted_columns_list]
    return df

def convertColumnsToAppropriateDataTypes(df):
    print("Converting eventdate to datetime objects")
    df['eventdate'] = pd.to_datetime(df['eventdate'])
    return df

### Read JSON files and save as DataFrame

In [46]:
# Plan go through list of directories, and parse in all the relevant JSON files.

start_date = START_DATE_STRING
end_date = END_DATE_STRING

list_of_date_folder_names = get_list_of_date_folders(start_date=start_date, end_date=end_date)
full_df = pd.DataFrame()

start_time = datetime.now()

for sub_folder_name in list_of_date_folder_names:
    directory_name = os.path.join(AMAZON_WEB_SERVICE_E3_BASE_FOLDER, sub_folder_name)
    #print(directory_name)
    
    for filename in os.listdir(directory_name):
        full_directory_and_file_name = os.path.join(directory_name, filename)
        
        if filename.endswith(".json"): 
            try:
                print("\n{}".format(full_directory_and_file_name))
                json_list = read_json_as_list(full_directory_and_file_name)
                print("\tEntries: {}".format(len(json_list)))
                df = convert_string_of_json_to_df(json_list)

                # XXX: Workaround to improve memory usage
                df = keep_only_unload_events(df)

                full_df = full_df.append(df, ignore_index=True)
            except:
                print("Unexpected error detected!")

full_run_time = datetime.now() - start_time
print("\n\nFull Run-Time: {}".format(full_run_time))   


C:\Users\liferay\Documents\analytics data\export\20180504\part-00000-02c5cfd8-23fb-4974-b11c-ec168805abb7-c000.json
	Entries: 117202
	Execution Time: 0:00:46.890825

C:\Users\liferay\Documents\analytics data\export\20180505\part-00000-cf5559d0-19e7-42a4-8308-def2d5a0f664-c000.json
	Entries: 73083
	Execution Time: 0:00:28.045837

C:\Users\liferay\Documents\analytics data\export\20180506\part-00000-9ca544dc-c0be-401d-b198-5a3a08d0a86d-c000.json
	Entries: 99712
	Execution Time: 0:00:39.812080

C:\Users\liferay\Documents\analytics data\export\20180507\part-00000-9c97efcb-5c94-4a11-ac30-a60c0c034dfa-c000.json
	Entries: 19029
	Execution Time: 0:00:07.124155

C:\Users\liferay\Documents\analytics data\export\20180508\part-00000-8425e705-0f73-45fd-900a-94029e6a8f2e-c000.json
	Entries: 0
	Execution Time: 0:00:00.001003
Unexpected error detected!

C:\Users\liferay\Documents\analytics data\export\20180509\part-00000-4ce17fc1-82ce-42f6-a106-4cc79faf918d-c000.json
	Entries: 66601
	Execution Time: 0

AttributeError: type object 'datetime.datetime' has no attribute 'datetime'

In [47]:
import warnings

# Surpress Warning Messages from "removing non-English URLs"
warnings.filterwarnings("ignore", 'This pattern has match groups')

print("Keeping only UNLOAD events")
etl_df = show_dataframe_length_before_and_after(keep_only_unload_events, full_df)

print("Removing Bots")
etl_df = show_dataframe_length_before_and_after(remove_all_bots, etl_df)

print("Removing Non-English URLs")
etl_df = show_dataframe_length_before_and_after(remove_non_english_urls, etl_df)

print("Removing non-customer, non-www URLs")
etl_df = show_dataframe_length_before_and_after(remove_non_customer_www_lr_urls, etl_df)

print("Removing empty userid entries")
etl_df = show_dataframe_length_before_and_after(remove_empty_user_id_entries, etl_df)

print("Populating normalized_url field")
etl_df = populateNormalizedUrlField(etl_df)

print("Populating URL Ignore List")
etl_df = show_dataframe_length_before_and_after(populate_url_ignore_list, etl_df)
print("Ignoring {} URLs".format(len(etl_df[etl_df['Ignore URL'] == True])))

print("Removing unwanted columns")
etl_df = filterUnwantedColumns(etl_df)

print("Converting columns to appropriate data types")
etl_df = convertColumnsToAppropriateDataTypes(etl_df)

print("Replacing blank spaces with NaN")
etl_df = replaceBlankSpacesWithNan(etl_df)

Keeping only UNLOAD events
	Before: 50549
	After: 50549
Removing Bots
	Before: 50549
	After: 40833
Removing Non-English URLs
	Before: 40833
	After: 31280
Removing non-customer, non-www URLs
	Before: 31280
	After: 25775
Removing empty userid entries
	Before: 25775
	After: 25775
Populating normalized_url field
Populating URL Ignore List
	Before: 25775
	After: 25775
Ignoring 6246 URLs
Removing unwanted columns
Converting columns to appropriate data types
Converting eventdate to datetime objects
Replacing blank spaces with NaN


In [48]:
# Make a copy, and use it
clean_df = etl_df.copy()
display("Length: {}".format(len(clean_df)))

'Length: 25775'

### Save URLs for Web Scraping

In [49]:
# Disable for production (for now)
if False:
    url_s = pd.Series(clean_df['normalized_url'].unique()).sort_values()
    print("Number of URLs: {}".format(len(url_s)))
    url_s.to_csv('./output/Unique Visitor URLs.csv', index=False)

### Create DataFrame: URL Lookup Information
This will be the centralized URL to information Data Frame.

In [50]:
import os.path
from collections import defaultdict

url_to_title          = clean_df.groupby(['normalized_url'])['context.title'].apply(set)
url_to_og_title       = clean_df.groupby(['normalized_url'])['context.og:title'].apply(set)
url_to_description    = clean_df.groupby(['normalized_url'])['context.description'].apply(set)
url_to_og_description = clean_df.groupby(['normalized_url'])['context.og:description'].apply(set)
url_to_keywords       = clean_df.groupby(['normalized_url'])['context.keywords'].apply(set)

In [51]:
def createUrlToKeywordDf():
    columns = ['normalized_url',
           'analyticsclient.merged_title', 
           'analyticsclient.merged_description', 
           'analyticsclient.merged_keywords',
           'analyticsclient.generated_keywords']

    url_to_keyword_df = pd.DataFrame(columns=columns)
    url_to_keyword_df['normalized_url'] = clean_df['normalized_url'].unique()
    #display(url_to_keyword_df)
    return url_to_keyword_df

def generateKeywordsFromTitleDescriptionKeywords(title, og_title, description, og_description, keywords, debug=False):
    merged_title = title.union(og_title)
    merged_description = description.union(og_description)
    
    keywords_from_title = set()
    keywords_from_description = set()
    keywords_from_keywords = set()
    
    only_english_titles = set()
    only_english_descriptions = set()
    only_english_keyword_set = set()
    
    title_description_to_keyword_cache = defaultdict(int)

    for entry in merged_title:

        # Skip empty strings       
        if pd.isnull(entry):
            continue
            
        # remove weird HTML punct
        entry = replace_punctuation(entry)
        
        cached_result = title_description_to_keyword_cache[entry]
        
        if cached_result != 0:
            keywords_from_title.update(cached_result)
            only_english_titles.update([entry])
        elif isEnglish(entry):
            #print("isEnglish() passed")
            #print("entry: ", entry)
            keyword_phrase_list = segmentWordsIntoKeyWordPhraseList(entry, debug=False)
            keywords_from_title.update(keyword_phrase_list)
            only_english_titles.update([entry])
            #print("entry: {}".format(entry))
            #print("only_english_titles: {}".format(only_english_titles))
            
            # Update Cache:
            title_description_to_keyword_cache[entry] = keyword_phrase_list
        else:
            print("Non-English detected: [{}]".format(entry))
            title_description_to_keyword_cache[entry] = []
            continue
    
    for entry in merged_description:        
        # Skip empty strings
        if pd.isnull(entry):
            continue
            
        # remove punct
        entry = replace_punctuation(entry)
        
        cached_result = title_description_to_keyword_cache[entry]
        
        if cached_result != 0:
            keywords_from_description.update(cached_result)
            only_english_descriptions.update([entry])
        elif isEnglish(entry):
            keyword_phrase_list = segmentWordsIntoKeyWordPhraseList(entry)
            keywords_from_description.update(keyword_phrase_list)
            only_english_descriptions.update([entry])
            
            # Update Cache:
            title_description_to_keyword_cache[entry] = keyword_phrase_list
        else:
            print("Non-English detected: [{}]".format(entry))
            title_description_to_keyword_cache[entry] = []
            continue
        
    for entry in keywords:
        
        # Skip empty strings
        if pd.isnull(entry):
            continue
            
        if isEnglish(entry):
            split_list = [s.strip() for s in entry.split(',')]
            keywords_from_keywords.update(set(split_list if split_list else []))
            only_english_keyword_set.update(set(split_list if split_list else []))
        else:
            print("Non-English detected: [{}]".format(entry))
            continue
    
    # Debugging
    if debug:
        print("\n\tMerged Title: {} => {}".format(only_english_titles, keywords_from_title))
        print("\tMerged Descr: {} => {}".format(only_english_descriptions, keywords_from_description))
        print("\tKeywords:     {} => {}".format(only_english_keyword_set, keywords_from_keywords))
        
    # merge all sets together
    all_keywords_merged = keywords_from_keywords.union(keywords_from_title, keywords_from_description)
    if debug: print("\tAll Keywords: {}".format(all_keywords_merged))

    # We return the English list of inputs we processed, and the final keyword output
    return list(only_english_titles), list(only_english_descriptions), list(only_english_keyword_set), list(all_keywords_merged)

def populateUrlToKeywordDf(url_to_keyword_df, debug=False):
    unique_url_list = url_to_keyword_df['normalized_url'].unique()

    for counter, url in enumerate(unique_url_list):       
        title = url_to_title.get(url, set())
        og_title = url_to_og_title.get(url, set())
        description = url_to_description.get(url, set())
        og_description = url_to_og_description.get(url, set())
        keywords_set = url_to_keywords.get(url, set())

        if debug: 
            print('\n{} / {}'.format(counter, len(unique_url_list)))
            print('{}'.format(url))
        merged_title_list, merged_description_list, merged_keyword_list, generated_keyword_list = generateKeywordsFromTitleDescriptionKeywords(title, og_title, description, og_description, keywords_set)

        # Populate url_to_keyword_df, with keywords
        index = url_to_keyword_df.loc[url_to_keyword_df['normalized_url'] == url]
        if len(index.index.values) > 1:
            print("ERROR: There shouldn't be more than 1 entry for the URL list!")
            print("index: {}".format(index))
            print("index.index.values: {}".format(index.index.values))
            break

        if len(merged_title_list) > 0: 
            url_to_keyword_df.at[index.index.values[0], 'analyticsclient.merged_title'] = merged_title_list

        if len(merged_description_list) > 0: 
            url_to_keyword_df.at[index.index.values[0], 'analyticsclient.merged_description'] = merged_description_list

        if len(merged_keyword_list) > 0: 
            url_to_keyword_df.at[index.index.values[0], 'analyticsclient.merged_keywords'] = merged_keyword_list

        url_to_keyword_df.at[index.index.values[0], 'analyticsclient.generated_keywords'] = generated_keyword_list
        
        if counter % 100 == 0:
            print("{} / {}".format(counter, len(unique_url_list)))
        
    return url_to_keyword_df

def addKeywordBoosting(df, debug=True):
    www_lr_manual_keywords = pd.read_csv('./manually generated keywords/www-lr-manual-keywords.csv')
    customer_lr_manual_keywords = pd.read_csv('./manually generated keywords/customer-lr-manual-keywords.csv')
    all_lr_manual_keywords = www_lr_manual_keywords.append(customer_lr_manual_keywords, ignore_index=True)
    all_lr_manual_keywords = all_lr_manual_keywords[['URL', 'Keywords']]
    all_lr_manual_keywords = all_lr_manual_keywords.dropna(how='any')
    all_lr_manual_keywords['Keywords'] = all_lr_manual_keywords['Keywords'].apply(lambda x: 
                                               [modifyCapitalizationOfWords(s.strip()) for s in x.split(',') if s.strip()])

    # Populate existing url-to-keyword lookup dataframe
    temp_df = pd.merge(df, all_lr_manual_keywords, how='left', left_on='normalized_url', right_on='URL')

    # Rename the "Keywords" column to "manual.keywords"
    temp_df.rename(columns={'Keywords' : 'manual.keywords'}, inplace=True)

    # Rearrange order of columns
    temp_df = temp_df[['normalized_url',
                       'analyticsclient.generated_keywords',
                       'manual.keywords',
                       'analyticsclient.merged_title',
                       'analyticsclient.merged_description', 
                       'analyticsclient.merged_keywords']]

    # Replace analyticsclient.generated_keywords [] with NaN
    temp_df.loc[temp_df['analyticsclient.generated_keywords'].str.len() == 0, 'analyticsclient.generated_keywords'] = np.nan

    # Filter out URLs where the "Automatically Generated Keywords" or "Manually generated Keywords" are missing
    if debug:
        print("Removing entries where both auto & manually generated keywords are missing")
        print("Before: {}".format(len(temp_df)))
    
    with pd.option_context('display.max_rows', 200, 'display.max_columns', None, 'display.max_colwidth', 50):
        display(temp_df)

    temp_df = temp_df[(~temp_df['analyticsclient.generated_keywords'].isnull()) | (~temp_df['manual.keywords'].isnull())]

    #with pd.option_context('display.max_rows', 200, 'display.max_columns', None, 'display.max_colwidth', 50):
    #    display(temp_df)

    if debug:
        print("After: {}".format(len(temp_df)))

    return temp_df

def generateUrlToKeywordDict(df, keyword_types=[''], use_banned_word_list=True, debug=True):
    """
    TODO:
    There will be multiple options for what type of keywords you can select from
    * manual - these are the tags manually added (there aren't that many of these)
    * title_description_keyword - these are the tags provided by the metadata
    * web_scraping - these are the tags generated by web scraping
    """
    import numpy
    
    # Add new empty column to df, for storing the combined keywords
    df['combined keywords'] = np.nan
    df['combined keywords'] = df['combined keywords'].astype(object)
    
    url_s = df['normalized_url'].unique()
    url_lookup_cache = dict()
    no_keywords_urls = []
    
    for counter, url in enumerate(url_s):
        
        if debug: 
            print("\n{} / {} - {}".format(counter, len(url_s), url))
        generated_keyword_list          = df.loc[df['normalized_url'] == url]['analyticsclient.generated_keywords'].values.tolist()
        manually_populated_keyword_list = df.loc[df['normalized_url'] == url]['manual.keywords'].values.tolist()

        # Filter [nan] scenarios
        if numpy.nan in generated_keyword_list:
            generated_keyword_list = []
        elif len(generated_keyword_list) >= 1:
            generated_keyword_list = generated_keyword_list[0]

        if numpy.nan in manually_populated_keyword_list:
            manually_populated_keyword_list = []
        elif len(manually_populated_keyword_list) >= 1:
            manually_populated_keyword_list = manually_populated_keyword_list[0]

        aggregate_keyword_list = list(set(generated_keyword_list).union(set(manually_populated_keyword_list)))

        if use_banned_word_list:
            aggregate_keyword_list = [w for w in aggregate_keyword_list if w.lower() not in BANNED_KEYWORDS_LIST]

        # Cache result
        url_lookup_cache[url] = aggregate_keyword_list
        if debug:
            print("\t{}".format(aggregate_keyword_list))
        
        if not aggregate_keyword_list:
            print("\tWarning: [{}] has 0 entries!".format(url))
            no_keywords_urls.append(url)
            
        # Add the entry back to the dataframe     
        index = df.loc[df['normalized_url'] == url]
        df.at[index.index.values[0], 'combined keywords'] = aggregate_keyword_list
            
    return url_lookup_cache, df, no_keywords_urls

# For Debugging
def lookUpKeywordBreakdownBasedOnUrl(url):
    title = url_to_title.get(url, set())
    og_title = url_to_og_title.get(url, set())
    description = url_to_description.get(url, set())
    og_description = url_to_og_description.get(url, set())
    keywords_set = url_to_keywords.get(url, set())
    
    print("Title: {}".format(title))
    print("og_title: {}".format(og_title))
    print("description: {}".format(description))
    print("og_description: {}".format(og_description))
    print("keywords: {}".format(keywords_set))
    
def generateKeywordToIndividualKeywordList(url_to_keyword_df):
    url_to_keyword_df = url_to_keyword_df[['normalized_url', 'combined keywords']]

    # Expand each normalized_url, into its own keyword row
    expanded_keywords_df = url_to_keyword_df['combined keywords'].apply(lambda x: pd.Series(x))

    url_to_unique_keyword_df = pd.DataFrame()

    for index, row in expanded_keywords_df.iterrows():
        row_df = row.dropna().to_frame(name='unique keyword')
        row_df['normalized_url'] = url_to_keyword_df['normalized_url'].loc[index]   
        url_to_unique_keyword_df = url_to_unique_keyword_df.append(row_df, ignore_index=True)

        if index % 500 == 0:
            print("{} / {}".format(index, len(expanded_keywords_df)))
    
    return url_to_unique_keyword_df

### Populate URL to Information Dataframe


I don't know why this is so resource intensive...
Maybe because of remove punctuation function?

In [None]:
url_to_keyword_df = createUrlToKeywordDf()
url_to_keyword_df = populateUrlToKeywordDf(url_to_keyword_df)

In [53]:
url_to_keyword_df = addKeywordBoosting(url_to_keyword_df)
url_lookup_cache, url_to_keyword_df, urls_without_keywords_list = generateUrlToKeywordDict(url_to_keyword_df)
url_to_unique_keyword_df = generateKeywordToIndividualKeywordList(url_to_keyword_df)

Removing entries where both auto & manually generated keywords are missing
Before: 1967


Unnamed: 0,normalized_url,analyticsclient.generated_keywords,manual.keywords,analyticsclient.merged_title,analyticsclient.merged_description,analyticsclient.merged_keywords
0,https://www.liferay.com/solutions/industries/b...,"[amp, innovation, liferay, website software, s...",,"[Banks &amp; Financial Services Intranet, Port...",[Liferay portal & intranet solutions helps ban...,
1,https://www.liferay.com/,"[most flexible platform, liferay, connected ex...","[liferay, DXP]",[Liferay: Digital experience software tailored...,"[Build modern portals, intranets, websites and...",
2,https://customer.liferay.com/c/portal/layout?p...,"[deployment, ""liferay, knowledge, introduction...",,"[Search - Knowledge | ""Liferay, Introduction t...",,
3,https://www.liferay.com/blog/en-us/digital-str...,"[digital strategy, liferay blogs, digital busi...",,[Maximizing the Value of Design | Digital Stra...,[How to streamline your design process & impro...,
4,https://www.liferay.com/company/our-story,"[digital experiences, liferay, software, web, ...","[liferay company, history]",[About Us | Liferay],[Liferay makes software that helps companies c...,
5,https://www.liferay.com/transform-your-busines...,"[leading digital experience platform, liferay]",,[Leading Digital Experience Platform | Liferay],,
6,https://www.liferay.com/locations,"[offices, liferay, locations, Locations, one]",,[Locations | Liferay],[Liferay has offices in over 15 countries. Fin...,[Locations]
7,https://customer.liferay.com/documentation/7.0...,"[portal, deployment, ""liferay, LCS connection,...",,[Troubleshooting Your LCS Connection - deploym...,,
8,https://www.liferay.com/events,"[networking, liferay, learning, upcoming event...",,[Learning and Networking Events | Liferay],"[Get connected with Liferay, join us at these ...",
9,https://www.liferay.com/events/liferay-confere...,"[cutting-edge technology, liferay, liferay's t...",,[Liferay Conferences | Liferay],"[Gather with Liferay's team, partners, and com...",


After: 1822

0 / 1822 - https://www.liferay.com/solutions/industries/banking
	['innovation', 'website software', 'intranet', 'website', 'wealth management', 'banks', 'financial services intranet', 'portal', 'financial services', 'improved customer experience', 'financial firms', 'liferay portal & intranet solutions']

1 / 1822 - https://www.liferay.com/
	['DXP', 'connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

2 / 1822 - https://customer.liferay.com/c/portal/layout?p_v_l_s_g_id=0&p_l_id=82890
	['deployment', 'portal', 'liferay DXP deployment', 'documentation']

3 / 1822 - https://www.liferay.com/blog/en-us/digital-strategy/maximizing-the-value-of-design
	['digital strategy', 'liferay blogs', 'digital business strategies', 'design', 'value', 'design process']

4 / 1822 - https://www.liferay.com/company/our-story
	['liferay company', 'digital experiences', 'web, mobile and connected devices']

5 / 1822 - https://www.liferay.com/transform

41 / 1822 - https://www.liferay.com/downloads/thanks-for-downloading
	['liferay portal', 'downloading liferay', 'liferay community']

42 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/309969
	['knowledge base', 'SP', 'SLO', 'liferay SAML', 'portal', 'liferay DXP', 'SSO', 'idp']

43 / 1822 - https://customer.liferay.com/documentation/search?_1_WAR_osbknowledgebaseportlet_delta=20&p_p_id=1_WAR_osbknowledgebaseportlet&p_p_mode=view&_1_WAR_osbknowledgebaseportlet_assetCategoryIds=80679&p_p_lifecycle=0&_1_WAR_osbknowledgebaseportlet_cur=1&_1_WAR_osbknowledgebaseportlet_mvcPath=/search/view.jsp&p_p_state=normal
	['portal']

44 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&p_p_mode=view&_1_WAR_osbknowledgebaseportlet_assetCategoryIds=80679&p_p_lifecycle=0&_1_WAR_osbknowledgebaseportlet_keywords=feeds&_1_WAR_osbknowledgebaseportlet_mvcPath=/search/view.jsp&p_p_state=normal
	['portal']

45 / 1822 - https://www.liferay.co

85 / 1822 - https://www.liferay.com/en_AU/solutions/industries/government
	['mission-critical legacy systems', 'digital-first operations', 'intranet', 'software platform', 'portal open source software platform', 'government CMS', 'liferay intranet & portal software', 'local, state and federal government', 'portal open source']

86 / 1822 - https://www.liferay.com/company/gartner/magic-quadrant-digital-experience-platforms
	["gartner's evaluation", 'ability', 'DXP', 'free download', 'completeness', 'vision']

87 / 1822 - https://www.liferay.com/en_AU/resource?title=abc-education
	['ABC education']

88 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/14494
	['knowledge base', 'java certificate', 'web server', 'tomcat', 'portal', 'HTTPS']

89 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=repository&p_p_state=normal
	['portal']

90 / 1822 - https://www.liferay.com/careers?p=job/

127 / 1822 - https://customer.liferay.com/documentation/7.0/develop/tutorials/-/official_documentation/tutorials/fundamentals
	['fundamentals', 'tutorials']

128 / 1822 - https://www.liferay.com/resources?17989categoryIds=1849769
	["liferay's resource library", 'business', 'ebooks', 'IT and business leaders', 'case studies', 'IT', 'leadership resources', 'whitepapers']

129 / 1822 - https://www.liferay.com/resource?title=tpf
	['main public transport company', 'transports publics', 'freiburg', 'swiss canton', 'TPF', 'SA']

130 / 1822 - https://www.liferay.com/events/web-events
	['things', 'webinars', 'savvy', 'liferay LIVE webinar series']

131 / 1822 - https://customer.liferay.com/documentation/7.0/deploy/-/official_documentation/deployment/preparing-for-install
	['portal', 'deployment', 'install']

132 / 1822 - https://www.liferay.com/blog/en-us/digital-strategy/what-you-need-to-know-about-gdpr
	['digital strategy', 'liferay blogs', 'liferay blog', 'practice', 'europe', 'privacy laws'

170 / 1822 - https://www.liferay.com/en/web/events-ldsf-uk/venue
	['charing cross', 'venue', 'iconic waldorf hilton hotel', 'holborn', 'walking distance', 'liferay digital solutions forum', 'Liferay Digital Solutions Forum 2018', 'LDSF venue']

171 / 1822 - https://www.liferay.com/en/web/events-ldsf-uk/
	['home']

172 / 1822 - https://www.liferay.com/web/events-ldsf-uk/agenda
	['collaborate', 'Liferay Digital Solutions Forum LDSF', 'agenda', 'share experience', 'delegates', 'action', 'change agent', 'liferay digital solutions forum', 'ideas']

173 / 1822 - https://www.liferay.com/web/events-ldsf-uk/
	['home']

174 / 1822 - https://www.liferay.com/group/control_panel?doAsGroupId=231869941&p_p_id=15&controlPanelCategory=current_site.content&refererPlid=231869987
	['web content']

175 / 1822 - https://www.liferay.com/group/control_panel/manage?p_p_id=1_WAR_osbwwwmarketingeventsportlet&doAsGroupId=231869941&p_p_lifecycle=0&controlPanelCategory=current_site.content&p_p_state=maximized&p_p_m


180 / 1822 - https://www.liferay.com/group/control_panel/manage?_1_WAR_osbwwwmarketingeventsportlet_tabs1=users&p_p_state=maximized&doAsGroupId=231869941&_1_WAR_osbwwwmarketingeventsportlet_mvcPath=/admin/edit_marketing_event.jsp&p_p_lifecycle=0&_1_WAR_osbwwwmarketingeventsportlet_advancedSearch=false&_1_WAR_osbwwwmarketingeventsportlet_redirect=/group/control_panel/manage?controlPanelCategory=current_site.content%26doAsGroupId=231869941%26p_p_id=1_WAR_osbwwwmarketingeventsportlet%26p_p_lifecycle=0%26p_p_mode=view%26p_p_state=maximized%26refererPlid=231869987&_1_WAR_osbwwwmarketingeventsportlet_delta=20&_1_WAR_osbwwwmarketingeventsportlet_keywords=&refererPlid=231869987&p_p_mode=view&_1_WAR_osbwwwmarketingeventsportlet_andOperator=true&p_p_id=1_WAR_osbwwwmarketingeventsportlet&_1_WAR_osbwwwmarketingeventsportlet_marketingEventId=231868441&_1_WAR_osbwwwmarketingeventsportlet_orderByCol=last-name&_1_WAR_osbwwwmarketingeventsportlet_cur=1&controlPanelCategory=current_site.content&_1_WAR_


183 / 1822 - https://www.liferay.com/group/control_panel/manage?_1_WAR_osbwwwmarketingeventsportlet_tabs1=sessions&p_p_id=1_WAR_osbwwwmarketingeventsportlet&doAsGroupId=231869941&_1_WAR_osbwwwmarketingeventsportlet_mvcPath=/admin/edit_marketing_event.jsp&p_p_lifecycle=0&_1_WAR_osbwwwmarketingeventsportlet_redirect=/group/control_panel/manage?controlPanelCategory=current_site.content%26doAsGroupId=231869941%26p_p_id=1_WAR_osbwwwmarketingeventsportlet%26p_p_lifecycle=0%26p_p_mode=view%26p_p_state=maximized%26refererPlid=231869987&_1_WAR_osbwwwmarketingeventsportlet_marketingEventId=231868441&controlPanelCategory=current_site.content&p_p_state=maximized&p_p_mode=view&refererPlid=231869987
	[]

184 / 1822 - https://www.liferay.com/group/control_panel/manage?p_p_id=1_WAR_osbwwwmarketingeventsportlet&_1_WAR_osbwwwmarketingeventsportlet_marketingEventSessionId=231902859&doAsGroupId=231869941&_1_WAR_osbwwwmarketingeventsportlet_mvcPath=/admin/edit_marketing_event_session.jsp&p_p_lifecycle=0&_

187 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/12778
	['knowledge base', 'portal', 'LESA ticket', 'portal-ext.properties']

188 / 1822 - https://www.liferay.com/web/events-ldsf-uk/venue
	['charing cross', 'venue', 'iconic waldorf hilton hotel', 'holborn', 'walking distance', 'liferay digital solutions forum', 'Liferay Digital Solutions Forum 2018', 'LDSF venue']

189 / 1822 - https://www.liferay.com/web/events-ldsf-uk/home
	['Liferay Digital Solutions Forum', 'next steps', 'liferay digital solutions forum', 'digital innovation', 'home']

190 / 1822 - https://www.liferay.com/web/events-ldsf-uk/home?_15_groupId=231869941&p_p_col_id=column-1&p_p_state=maximized&_15_version=1.3&p_p_lifecycle=0&_15_folderId=0&p_p_mode=view&p_p_auth=HL1HzDYu&p_p_id=15&_15_articleId=231899705&_15_redirect=https://www.liferay.com:443/web/events-ldsf-uk/home?p_p_id=56_INSTANCE_nBekuwnc0gv2%26p_p_lifecycle=0%26p_p_state=normal%26p_p_mode=view%26p_p_col_id=column-1%26p_p_col_pos=1%26p

226 / 1822 - https://www.liferay.com/services/training/topics/front-end-developer
	['front end developer']

227 / 1822 - https://www.liferay.com/subscription-services/end-of-life/liferay-digital-enterprise
	['DXP', 'liferay digital experience platform', 'service life policy', 'service life', 'end', 'digital enterprise end']

228 / 1822 - https://www.liferay.com/resource?folderId=3292406&title=Enterprise+Subscription+Benefits
	['ways', 'reliability', "liferay's way", 'security', 'industry-leading software', 'high demands', 'expert guidance', 'overview', 'many benefits', 'enterprise subscription', 'digital business', 'legal assurance', "today's environment", 'maintenance', 'new technology solutions', 'assurances', 'digital change', 'conjunction', 'risks', 'support', 'expert resources']

229 / 1822 - https://www.liferay.com/consulting
	['architects', 'professional services', 'liferay global services packages', 'consulting', 'ROI', 'extensive product', 'liferay expert developers', 'liferay

265 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=liferay+sync&p_p_state=normal
	['portal']

266 / 1822 - https://www.liferay.com/web/guest/downloads/additional
	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

267 / 1822 - https://www.liferay.com/supporting-products/liferay-connected-services
	['liferay connected services', 'liferay projects', 'services', 'customers', 'alerts', 'portal management tools', 'pack management', 'performance monitoring', 'tools', 'set']

268 / 1822 - https://www.liferay.com/services/training/public-training
	['core classes', 'liferay training', 'small teams', 'public training', 'project', 'most important topics', 'best experience']

269 / 1822 - https://www.liferay.com/downloads/
	['liferay portal', 'download liferay DXP', 'liferay downloads']

270 / 1822 - https://www.liferay.com/home?p_p_lifecycle=0&p_p_id=1_WAR_goo

306 / 1822 - https://www.liferay.com/home?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=activation+key
	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

307 / 1822 - https://www.liferay.com/careers?p=job/oLSz5fw1/applyConfirmation
	['liferay jobs', 'careers']

308 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/50336
	['knowledge base', 'portal']

309 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=User+0+is+not+allowed+to+access+UR&p_p_state=normal
	['portal']

310 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/146812
	['knowledge base', 'portal']

311 / 1822 - https://www.liferay.com/solutions/industries/insurance?doAsUserId=tnefbpccegmg/-/message_boards/message/6332552
	['innovation', 'insuranc

333 / 1822 - https://customer.liferay.com/documentation/7.0/deploy/-/official_documentation/deployment/configuring-remote-staging-in-a-clustered-environment
	['remote staging', 'deployment', 'portal', 'clustered environment']

334 / 1822 - https://www.liferay.com/transform-your-business-operations?ads_cmpid=1341043413&ads_targetid=kwd-1832712893&ads_creative=261552602338&ads_network=g&ads_adid=50569534701&ads_matchtype=e&gclid=EAIaIQobChMIh9iujKTs2gIVyJ6zCh09DQ5IEAAYASAAEgIZ2fD_BwE&ttv=2
	['leading digital experience platform']

335 / 1822 - https://customer.liferay.com/documentation/7.0/deploy/-/official_documentation/deployment/configuring-elasticsearch-for-liferay-0
	['deployment', 'elasticsearch', 'remote', 'portal', 'cluster', 'embedded']

336 / 1822 - https://www.liferay.com/en_AU/resource?title=advanced-energy
	['supplier communications', 'key system integration', 'plasma manufacturing', 'leader', 'advanced energy']

337 / 1822 - https://www.liferay.com/lp-thank-you
	[]

338 / 1

356 / 1822 - https://www.liferay.com/resource?title=britam
	['britam']

357 / 1822 - https://www.liferay.com/resource?title=haitione
	['ngos', 'haiti', 'new network', 'relief efforts']

358 / 1822 - https://www.liferay.com/group/control_panel?doAsGroupId=10182&p_p_id=15&controlPanelCategory=current_site.content&refererPlid=231354466
	['web content']

359 / 1822 - https://www.liferay.com/resource?title=carrefour-bank
	['competitors', 'multichannel self-service portal', 'carrefour bank']

360 / 1822 - https://www.liferay.com/group/control_panel/manage?_1_WAR_osbwwwmarketingeventsportlet_tabs1=marketing&p_p_state=maximized&_1_WAR_osbwwwmarketingeventsportlet_startDateGTDay=4&_1_WAR_osbwwwmarketingeventsportlet_title=&_1_WAR_osbwwwmarketingeventsportlet_advancedSearch=false&_1_WAR_osbwwwmarketingeventsportlet_anytime=true&p_p_id=1_WAR_osbwwwmarketingeventsportlet&_1_WAR_osbwwwmarketingeventsportlet_summary=&_1_WAR_osbwwwmarketingeventsportlet_startDateLTMonth=4&_1_WAR_osbwwwmarketingevents

365 / 1822 - https://www.liferay.com/group/control_panel/manage?_1_WAR_osbwwwmarketingeventsportlet_tabs1=details&p_p_id=1_WAR_osbwwwmarketingeventsportlet&doAsGroupId=10182&_1_WAR_osbwwwmarketingeventsportlet_mvcPath=/admin/edit_marketing_event.jsp&p_p_lifecycle=0&_1_WAR_osbwwwmarketingeventsportlet_redirect=https://www.liferay.com:443/group/control_panel/manage?p_p_id=1_WAR_osbwwwmarketingeventsportlet%26p_p_lifecycle=0%26p_p_state=maximized%26p_p_mode=view%26doAsGroupId=10182%26refererPlid=231354466%26controlPanelCategory=current_site.content%26_1_WAR_osbwwwmarketingeventsportlet_mvcPath=%252Fadmin%252Fedit_marketing_event.jsp%26_1_WAR_osbwwwmarketingeventsportlet_tabs1=details%26_1_WAR_osbwwwmarketingeventsportlet_redirect=https%253A%252F%252Fwww.liferay.com%253A443%252Fgroup%252Fcontrol_panel%252Fmanage%253Fp_p_id%253D1_WAR_osbwwwmarketingeventsportlet%2526p_p_lifecycle%253D0%2526p_p_state%253Dmaximized%2526p_p_mode%253Dview%2526doAsGroupId%253D10182%2526refererPlid%253D231354466%


368 / 1822 - https://www.liferay.com/group/control_panel/manage?_1_WAR_osbwwwmarketingeventsportlet_tabs1=marketing&p_p_state=maximized&_1_WAR_osbwwwmarketingeventsportlet_startDateGTDay=4&_1_WAR_osbwwwmarketingeventsportlet_title=&_1_WAR_osbwwwmarketingeventsportlet_advancedSearch=false&_1_WAR_osbwwwmarketingeventsportlet_anytime=true&p_p_id=1_WAR_osbwwwmarketingeventsportlet&_1_WAR_osbwwwmarketingeventsportlet_summary=&_1_WAR_osbwwwmarketingeventsportlet_startDateLTMonth=4&_1_WAR_osbwwwmarketingeventsportlet_marketingEventsSearchContainerPrimaryKeys=231827684,231814102,231870749,231822040,231868441,231814104,231834059,231894670,231864150,231910994,231896135,231907611,231882874,231908622,231864147,231894145,231783562,231870847,231905240,231864144&_1_WAR_osbwwwmarketingeventsportlet_andOperator=true&controlPanelCategory=current_site.content&_1_WAR_osbwwwmarketingeventsportlet_=04/04/2018&_1_WAR_osbwwwmarketingeventsportlet_startDateGTYear=2018&_1_WAR_osbwwwmarketingeventsportlet_formD

375 / 1822 - https://www.liferay.com/group/control_panel?controlPanelCategory=current_site.users&p_p_id=174&doAsGroupId=231815926&refererPlid=231815930
	['site memberships']

376 / 1822 - https://www.liferay.com/group/control_panel/manage?refererPlid=231815930&p_p_id=174&doAsGroupId=231815926&_174_tabs1=users&p_p_lifecycle=0&_174_groupId=231815926&_174_struts_action=/site_memberships_admin/edit_site_assignments&controlPanelCategory=current_site.users&p_p_state=maximized&p_p_mode=view&p_p_auth=7whrRfGp
	['site memberships']

377 / 1822 - https://www.liferay.com/group/control_panel/manage?p_p_state=maximized&doAsGroupId=231815926&_174_tabs1=users&p_p_lifecycle=0&refererPlid=231815930&p_p_mode=view&p_p_auth=7whrRfGp&_174_struts_action=/site_memberships_admin/edit_site_assignments&p_p_id=174&_174_tabs2=available&controlPanelCategory=current_site.users&_174_redirect=https://www.liferay.com:443/group/control_panel/manage?p_p_auth=7whrRfGp%26p_p_id=174%26p_p_lifecycle=0%26p_p_state=maximized%

391 / 1822 - https://www.liferay.com/resource?folderId=1645493&title=Liferay+in+the+Cloud+-+A+Guide+to+Deploying+Liferay+in+the+Cloud+With+Bitnami
	['growing lists', 'liferay portal', 'cloud', 'market', 'leading platforms', "bitnami's cloud hosting environment", 'benefits', 'open source applications', 'document', 'bitnami cloud hosting', 'characteristics', 'amazon cloud', 'AWS', 'dozens', 'deploying liferay', 'development environments', 'guide', 'apps', 'bitnami']

392 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=saml&p_p_state=normal
	['knowledge base', 'portal']

393 / 1822 - https://www.liferay.com/resource?title=grdf
	['genuine customer experiences', 'modern and mobile websites']

394 / 1822 - https://www.liferay.com/en/resource?folderId=13811&title=Data+Protection+for+Liferay+Services+and+Software
	['such as the most recent GDPR in Europe', 'data privacy', 'compliance', 'liferay software pro

	['usable digital experiences customers', 'DXP', 'liferay digital experience platform', 'single, consolidated platform']

420 / 1822 - https://customer.liferay.com/documentation/search?_1_WAR_osbknowledgebaseportlet_delta=20&p_p_id=1_WAR_osbknowledgebaseportlet&p_p_mode=view&p_p_lifecycle=0&_1_WAR_osbknowledgebaseportlet_cur=1&_1_WAR_osbknowledgebaseportlet_keywords=saml+dxp&_1_WAR_osbknowledgebaseportlet_mvcPath=/search/view.jsp&p_p_state=normal
	['portal']

421 / 1822 - https://www.liferay.com/company/our-story?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=leadership
	['web, mobile and connected devices', 'digital experiences']

422 / 1822 - https://www.liferay.com/en_AU/downloads
	['liferay portal', 'download liferay DXP', 'liferay downloads']

423 / 1822 - https://www.liferay.com/resource?folderId=3292406&title=Liferay+DXP+Datasheet
	['document', 'liferay digital experience plat

457 / 1822 - https://www.liferay.com/resources/l?ads_cmpid=871293813&ads_targetid=aud-342451739983:kwd-297933282645&ads_matchtype=b&title=portals&ads_network=g&ads_adid=52338392364&ads_creative=254384372545&gclid=EAIaIQobChMInteRuvbs2gIVREwNCh2JpAoaEAMYASAAEgInFPD_BwE&ttv=2
	['discover', 'self service', 'common problems', 'portals', 'that solve enterprise problems', 'partner portals', 'internal service', 'social collaboration', 'leading enterprises', 'digital business']

458 / 1822 - https://www.liferay.com/home?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=how+to+check+license
	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

459 / 1822 - https://www.liferay.com/leading-open-source-enterprise-platform
	['leading open source enterprise platform']

460 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/400574
	[

490 / 1822 - https://www.liferay.com/group/control_panel/manage?p_p_id=1_WAR_osbwwwmarketingeventsportlet&doAsGroupId=10182&_1_WAR_osbwwwmarketingeventsportlet_mvcPath=/admin/edit_marketing_event.jsp&p_p_lifecycle=0&_1_WAR_osbwwwmarketingeventsportlet_redirect=https://www.liferay.com:443/group/control_panel/manage?p_p_id=1_WAR_osbwwwmarketingeventsportlet%26p_p_lifecycle=0%26p_p_state=maximized%26p_p_mode=view%26doAsGroupId=10182%26refererPlid=231354466%26controlPanelCategory=current_site.content%26_1_WAR_osbwwwmarketingeventsportlet_tabs1=marketing%26_1_WAR_osbwwwmarketingeventsportlet_mvcPath=%252Fadmin%252Fview.jsp%26_1_WAR_osbwwwmarketingeventsportlet_cur=1%26_1_WAR_osbwwwmarketingeventsportlet_delta=20%26_1_WAR_osbwwwmarketingeventsportlet_keywords=%26_1_WAR_osbwwwmarketingeventsportlet_advancedSearch=false%26_1_WAR_osbwwwmarketingeventsportlet_andOperator=true%26_1_WAR_osbwwwmarketingeventsportlet_orderByCol=start-date%26_1_WAR_osbwwwmarketingeventsportlet_orderByType=desc&_1_WAR

493 / 1822 - https://www.liferay.com/group/control_panel/manage?p_p_id=1_WAR_osbwwwmarketingeventsportlet&doAsGroupId=10182&_1_WAR_osbwwwmarketingeventsportlet_mvcPath=/admin/edit_marketing_event.jsp&p_p_lifecycle=0&_1_WAR_osbwwwmarketingeventsportlet_redirect=https://www.liferay.com:443/group/control_panel/manage?p_p_id=1_WAR_osbwwwmarketingeventsportlet%26p_p_lifecycle=0%26p_p_state=maximized%26p_p_mode=view%26doAsGroupId=10182%26refererPlid=231354466%26controlPanelCategory=current_site.content%26_1_WAR_osbwwwmarketingeventsportlet_tabs1=marketing%26_1_WAR_osbwwwmarketingeventsportlet_mvcPath=%252Fadmin%252Fview.jsp%26_1_WAR_osbwwwmarketingeventsportlet_cur=1%26_1_WAR_osbwwwmarketingeventsportlet_delta=20%26_1_WAR_osbwwwmarketingeventsportlet_keywords=roadshow%26_1_WAR_osbwwwmarketingeventsportlet_advancedSearch=false%26_1_WAR_osbwwwmarketingeventsportlet_andOperator=true%26_1_WAR_osbwwwmarketingeventsportlet_orderByCol=start-date%26_1_WAR_osbwwwmarketingeventsportlet_orderByType=des


499 / 1822 - https://www.liferay.com/group/control_panel/manage?_1_WAR_osbwwwmarketingeventsportlet_tabs1=marketing&_1_WAR_osbwwwmarketingeventsportlet_advancedSearch=true&_1_WAR_osbwwwmarketingeventsportlet_title=&_1_WAR_osbwwwmarketingeventsportlet_startDateGTDay=4&p_p_state=maximized&_1_WAR_osbwwwmarketingeventsportlet_anytime=true&_1_WAR_osbwwwmarketingeventsportlet_types=4&p_p_id=1_WAR_osbwwwmarketingeventsportlet&_1_WAR_osbwwwmarketingeventsportlet_summary=&_1_WAR_osbwwwmarketingeventsportlet_startDateLTMonth=4&_1_WAR_osbwwwmarketingeventsportlet_marketingEventsSearchContainerPrimaryKeys=231827684,231814102,231821565,231870749,231822040,231868441,231814104,231834059,231819473,231912625,231894670,231887124,231864150,231910994,231896135,231907611,231882874,231813674,231908622,231864147&_1_WAR_osbwwwmarketingeventsportlet_andOperator=true&controlPanelCategory=current_site.content&_1_WAR_osbwwwmarketingeventsportlet_=04/04/2018&_1_WAR_osbwwwmarketingeventsportlet_startDateGTYear=201

508 / 1822 - https://www.liferay.com/group/control_panel/manage?p_p_id=20&doAsGroupId=10182&_20_folderId=13727&_20_struts_action=/document_library/view&_20_redirect=https://www.liferay.com:443/group/control_panel/manage?p_p_id=20%26p_p_lifecycle=0%26p_p_state=maximized%26p_p_mode=view%26doAsGroupId=10182%26refererPlid=231354466%26controlPanelCategory=current_site.content&p_p_lifecycle=0&controlPanelCategory=current_site.content&p_p_state=maximized&p_p_mode=view&refererPlid=231354466
	['SERVICES', 'media', 'documents']

509 / 1822 - https://www.liferay.com/en/web/events-symposium-north-america
	['register today', 'liferay symposium north america', 'innovative solutions', 'industry leaders', 'liferay symposium', 'key engineers', "liferay's key developers", 'world', 'few opportunities', 'new orleans', 'practical sessions', 'best practices', 'fall', 'experts', 'workshops', 'LA', 'business challenges']

510 / 1822 - https://www.liferay.com/en/group/control_panel?controlPanelCategory=current

525 / 1822 - https://customer.liferay.com/documentation/7.0/deploy/-/official_documentation/deployment/installing-patches-on-the-liferay-de-war
	['portal', 'deployment', 'liferay digital enterprise', 'patches']

526 / 1822 - https://customer.liferay.com/documentation/7.0/deploy/-/official_documentation/deployment/monitoring-garbage-collection-and-jvm
	['portal', 'deployment', 'JVM', 'garbage collection']

527 / 1822 - https://www.liferay.com/services/partners?countryA2Codes=PA
	['partners']

528 / 1822 - https://www.liferay.com/en_AU/events
	['upcoming events', 'learning', 'networking', 'events']

529 / 1822 - https://www.liferay.com/en_AU/login
	[]

530 / 1822 - https://www.liferay.com/login
	[]

531 / 1822 - https://www.liferay.com/resources/l?ads_cmpid=871293813&ads_targetid=kwd-296456588306&ads_matchtype=b&title=intranets&ads_network=g&ads_adid=50691642360&ads_creative=254360189505&gclid=EAIaIQobChMI4Om4xLXt2gIVSjyBCh383AKiEAAYBCAAEgJYlPD_BwE&ttv=2
	['guidelines', 'employee product

566 / 1822 - https://www.liferay.com/resources/l?hsa_ad=261030737506&hsa_net=adwords&hsa_mt=b&hsa_tgt=kwd-324521545143&hsa_grp=52338392364&hsa_kw=what's+a+web+portal&hsa_cam=871293813&hsa_ver=3&title=portals&gclid=EAIaIQobChMI497bwoHw2gIVllYNCh0kdAinEAAYAyAAEgLOBPD_BwE&hsa_src=s&hsa_acc=9733166788
	['discover', 'self service', 'common problems', 'portals', 'that solve enterprise problems', 'partner portals', 'internal service', 'social collaboration', 'leading enterprises', 'digital business']

567 / 1822 - https://www.liferay.com/resources/l?ads_cmpid=871293813&ads_targetid=kwd-298881187386&ads_matchtype=b&title=portals&ads_network=s&ads_adid=52338392364&ads_creative=254384372545&gclid=Cj0KCQjwrLXXBRCXARIsAIttmROwKhQP-5QlIuClOxeqxco_oC2mHNmS8cBT3B5ep8x4XxeuW4ZPIbwaAuDZEALw_wcB&ttv=2
	['discover', 'self service', 'common problems', 'portals', 'that solve enterprise problems', 'partner portals', 'internal service', 'social collaboration', 'leading enterprises', 'digital business']

568 

	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

607 / 1822 - https://www.liferay.com/web/discover/magic-quadrant-digital-experience-platforms?hsa_ver=3&hsa_ad=258215791133&hsa_cam=1270442147&hsa_tgt=kwd-0&hsa_acc=9733166788&hsa_mt=&gclid=EAIaIQobChMIkfXmorPx2gIVgxmBCh2rsgRDEAEYASAAEgKd6PD_BwE&hsa_net=adwords&hsa_src=d&hsa_grp=58660936932&hsa_kw=
	["gartner's evaluation", 'ability', 'DXP', 'free download', 'completeness', 'vision']

608 / 1822 - https://www.liferay.com/services/training/6.2/topics/developer-1
	['liferay platform']

609 / 1822 - https://www.liferay.com/careers?p=job/oIb46fwN/apply
	['liferay jobs', 'careers']

610 / 1822 - https://www.liferay.com/careers?p=job/oIb46fwN/applyConfirmation
	['liferay jobs', 'careers']

611 / 1822 - https://www.liferay.com/careers?p=apply
	['liferay jobs', 'careers']

612 / 1822 - https://customer.liferay.com/documentation/7.0/admin/-/official_documentation/portal/adding-and-managing-user

643 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/2951285
	['knowledge base', 'control panel', 'liferay digital enterprise', 'portal', 'renamed marketplace applications', 'new version']

644 / 1822 - https://www.liferay.com/careers?p=job/omS16fw5
	['liferay jobs', 'careers']

645 / 1822 - https://www.liferay.com/en/downloads
	['liferay portal', 'download liferay DXP', 'liferay downloads']

646 / 1822 - https://www.liferay.com/careers?p=job/omS16fw5/apply
	['liferay jobs', 'careers']

647 / 1822 - https://www.liferay.com/home?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=maven
	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

648 / 1822 - https://www.liferay.com/en_AU/company/gartner/magic-quadrant-digital-experience-platforms
	["gartner's evaluation", 'ability', 'DXP', 'free download', 'completeness', 'vis

	['portal']

686 / 1822 - https://www.liferay.com/en/digital-experience-platform
	['usable digital experiences customers', 'DXP', 'liferay digital experience platform', 'single, consolidated platform']

687 / 1822 - https://www.liferay.com/en/product/features/platform
	['mobile experiences', 'same open source platform', 'websites', 'open source digital experience platform', 'power', 'liferay DXP', 'organizations', "world's best portals"]

688 / 1822 - https://customer.liferay.com/documentation/7.0/deploy/-/official_documentation/deployment/installing-liferay-dxp-on-websphere-8-5-5
	['deployment', 'liferay DXP', 'portal']

689 / 1822 - https://www.liferay.com/resource?folderId=1645493&title=Liferay+DXP+Roles+Quick+Start+Guide
	['groups', 'liferay DXP roles quick start guide', 'fine-grained permissioning system', 'large and very small scopes', 'guide', 'users', 'advantage', 'liferay DXP', 'liferay roles', 'basics', 'very powerful feature', 'roles', 'resources']

690 / 1822 - https://cust

707 / 1822 - https://www.liferay.com/group/control_panel/manage?p_p_state=maximized&_15_title=&_15_status=&_15_entryEnd=20&_15_searchFolderId=1721363&_15_entryStart=0&_15_content=&p_p_id=15&p_p_mode=view&_15_struts_action=/journal/search&_15_description=&controlPanelCategory=current_site.content&_15_historyKey=_15_categorization&_15_displayStyle=&doAsGroupId=10182&_15_showSearchInfo=1&_15_viewFolders=1&_15_folderStart=0&_15_keywords=iese&_15_folderId=1721363&refererPlid=11198&_15_advancedSearch=0&p_p_auth=ei8HR2Wm&p_p_lifecycle=0&_15_viewEntries=1&_15_type=&_15_searchType=1&_15_searchArticleId=&_15_folderEnd=20&_15_andOperator=
	['web content']

708 / 1822 - https://customer.liferay.com/documentation/7.0/admin/-/official_documentation/portal/workflow-forms
	['portal', 'workflow forms']

709 / 1822 - https://www.liferay.com/en_GB/community/forums/-/message_boards/message/22392288
	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

710 / 

736 / 1822 - https://www.liferay.com/resources/l?p_p_col_id=column-1&_15_groupId=10182&p_p_id=15&_15_articleId=231917890&_15_redirect=https://www.liferay.com/resources/l?title=financial-services&p_p_lifecycle=0&_15_struts_action=/journal/edit_article&p_p_col_count=1&p_p_state=maximized&p_p_auth=pRlukInj
	['landing page']

737 / 1822 - https://www.liferay.com/blog/en-us/customer-experience/how-banking-security-is-changing-the-customer-experience
	['authentication', 'liferay blogs', 'customer experience', 'banking security', 'new demands', 'user experience']

738 / 1822 - https://www.liferay.com/resources/l?p_p_col_id=column-1&_15_groupId=10182&p_p_id=15&_15_articleId=231917890&_15_redirect=https://www.liferay.com/resources/l?title=financial-services&p_p_lifecycle=0&_15_struts_action=/journal/edit_article&p_p_col_count=1&p_p_state=maximized&p_p_auth=HAMdzx87
	['landing page']

739 / 1822 - https://www.liferay.com/resources/l?p_p_col_id=column-1&_15_groupId=10182&p_p_id=15&_15_articleId=2

765 / 1822 - https://www.liferay.com/web/discover/magic-quadrant-digital-experience-platforms?hsa_ver=3&hsa_ad=258215791133&hsa_cam=1270442147&hsa_tgt=kwd-0&hsa_acc=9733166788&hsa_mt=&gclid=EAIaIQobChMIpubj8In52gIVU4BpCh1BXAdxEAEYASAAEgLEuvD_BwE&hsa_net=adwords&hsa_src=d&hsa_grp=58660936932&hsa_kw=
	["gartner's evaluation", 'ability', 'DXP', 'free download', 'completeness', 'vision']

766 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/12785
	['knowledge base', 'portal', 'LESA ticket', 'portal-ext.properties']

767 / 1822 - https://www.liferay.com/en_AU/resources/l?title=portals
	['discover', 'self service', 'common problems', 'portals', 'that solve enterprise problems', 'partner portals', 'internal service', 'social collaboration', 'leading enterprises', 'digital business']

768 / 1822 - https://customer.liferay.com/documentation/7.0/deploy/-/official_documentation/deployment/patching-tool
	['deployment', 'service pack', 'patching tool', 'service pack(s)', 'por

799 / 1822 - https://www.liferay.com/product/features/assets?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=smith+%26+nephew
	['DAM', 'digital assets', 'documents', 'digital asset management', 'liferay DXP', 'social content']

800 / 1822 - https://www.liferay.com/product/features/assets?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=healthcare
	['DAM', 'digital assets', 'documents', 'digital asset management', 'liferay DXP', 'social content']

801 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&p_p_mode=view&_1_WAR_osbknowledgebaseportlet_assetCategoryIds=106072&p_p_lifecycle=0&_1_WAR_osbknowledgebaseportlet_mvcPath=/search/view.jsp&p_p_state=normal
	['knowledge base', 'portal']

802 / 1822 - https://www.liferay.com/resources/l?ads_cm

829 / 1822 - https://www.liferay.com/resources/l?_15_groupId=10182&p_p_id=15&_15_redirect=https://www.liferay.com:443/resources/l?p_p_auth=NZTSd0QD%26p_p_id=15%26p_p_lifecycle=0%26p_p_state=maximized%26p_p_mode=view%26_15_groupId=10182%26_15_groupId=10182%26_15_redirect=https%253A%252F%252Fwww.liferay.com%252Fresources%252Fl%253Ftitle%253Dfinancial-services%26_15_redirect=https%253A%252F%252Fwww.liferay.com%252Fresources%252Fl%253Ftitle%253Dfinancial-services%26_15_struts_action=%252Fjournal%252Fedit_article%26_15_struts_action=%252Fjournal%252Fedit_article%26_15_articleId=231918019%26_15_articleId=231918019&p_p_lifecycle=0&_15_struts_action=/journal/view_article_history&_15_articleId=231918019&p_p_state=maximized&p_p_mode=view&p_p_auth=NZTSd0QD
	['landing page']

830 / 1822 - https://www.liferay.com/resources/l?_15_groupId=10182&p_p_id=15&p_p_lifecycle=0&_15_struts_action=/journal/view_article_history&_15_articleId=231918019&p_p_state=maximized&p_p_mode=view&p_p_auth=NZTSd0QD
	['landi

858 / 1822 - https://www.liferay.com/web/discover/magic-quadrant-digital-experience-platforms?hsa_ver=3&hsa_ad=258215791133&hsa_cam=1270442147&hsa_tgt=kwd-0&hsa_acc=9733166788&hsa_mt=&gclid=EAIaIQobChMImMe22tH52gIVR7lPCh0aLAQmEAEYASAAEgLJlfD_BwE&hsa_net=adwords&hsa_src=d&hsa_grp=58660936932&hsa_kw=
	["gartner's evaluation", 'ability', 'DXP', 'free download', 'completeness', 'vision']

859 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=lifecycle&p_p_state=normal
	['portal']

860 / 1822 - https://customer.liferay.com/documentation/7.0/develop/tutorials/-/official_documentation/tutorials/waiting-on-lifecycle-events
	['portal', 'tutorials', 'lifecycle events']

861 / 1822 - https://www.liferay.com/privacy-policy
	['privacy policy']

862 / 1822 - https://www.liferay.com/resources/l?p_p_col_id=column-1&_15_groupId=10182&p_p_id=15&_15_articleId=231652720&_15_redirect=https://www.liferay.com/resources/l?ti

890 / 1822 - https://www.liferay.com/careers?p=applyConfirmation
	['liferay jobs', 'careers']

891 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=portal-tools-target-platform-indexer-client&p_p_state=normal
	['portal']

892 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&p_p_mode=view&_1_WAR_osbknowledgebaseportlet_assetCategoryIds=80889&p_p_lifecycle=0&_1_WAR_osbknowledgebaseportlet_keywords=reindex&_1_WAR_osbknowledgebaseportlet_mvcPath=/search/view.jsp&p_p_state=normal
	['portal']

893 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/3293434
	['knowledge base', 'portal']

894 / 1822 - https://www.liferay.com/solutions/industries/banking?tcid=J4K8R9
	['innovation', 'website software', 'intranet', 'website', 'wealth management', 'banks', 'financial services intranet', 'portal', 'financial services', 'improved customer experienc

927 / 1822 - https://www.liferay.com/home?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=US+BANK
	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

928 / 1822 - https://www.liferay.com/legal/copyright
	['copyright infringement', 'claims']

929 / 1822 - https://www.liferay.com/evaluation-license-agreement-for-liferay-software
	['evaluation license agreement', 'liferay software']

930 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=password&p_p_state=normal
	['portal']

931 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&p_p_mode=view&_1_WAR_osbknowledgebaseportlet_assetCategoryIds=80889&p_p_lifecycle=0&_1_WAR_osbknowledgebaseportlet_keywords=password&_1_WAR_osbknowledgebaseportlet_mvcPath=/search/view

970 / 1822 - https://www.liferay.com/transform-your-business-operations?ads_cmpid=1341043413&ads_targetid=kwd-1832712893&ads_creative=261547660742&ads_network=g&ads_adid=50569534701&ads_matchtype=e&gclid=EAIaIQobChMIwcvBka_72gIVBBxpCh178wjOEAAYASAAEgIOg_D_BwE&ttv=2
	['leading digital experience platform']

971 / 1822 - https://www.liferay.com/resources/l?ads_cmpid=871293813&ads_targetid=kwd-322465177418&ads_matchtype=b&title=portals&ads_network=s&ads_adid=52338392364&ads_creative=254384372545&gclid=EAIaIQobChMI3Obvy6_72gIVgRlpCh0ybwOBEAAYAyAAEgJIa_D_BwE&ttv=2
	['discover', 'self service', 'common problems', 'portals', 'that solve enterprise problems', 'partner portals', 'internal service', 'social collaboration', 'leading enterprises', 'digital business']

972 / 1822 - https://www.liferay.com/web/events-ldsf-uk/venue?_15_groupId=231869941&p_p_col_id=column-3&p_p_state=maximized&_15_version=1.1&p_p_lifecycle=0&_15_folderId=0&p_p_mode=view&p_p_auth=LUAePU4S&p_p_id=15&_15_articleId=231903

999 / 1822 - https://www.liferay.com/en_AU/group/control_panel/manage?p_p_state=maximized&doAsGroupId=10182&_15_entryEnd=20&_15_viewFolders=1&_15_folderStart=0&_15_folderId=231917888&_15_entryStart=0&refererPlid=231654361&p_p_mode=view&p_p_auth=kDgTyGlo&p_p_lifecycle=0&p_p_id=15&_15_viewEntries=1&_15_action=browseFolder&_15_struts_action=/journal/view&_15_folderEnd=20&controlPanelCategory=current_site.content
	['web content']

1000 / 1822 - https://www.liferay.com/en_AU/group/control_panel?controlPanelCategory=current_site.content&p_p_id=15&doAsGroupId=10182&refererPlid=231654361
	['web content']

1001 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=remote+live&p_p_state=normal
	['portal']

1002 / 1822 - https://www.liferay.com/downloads?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=liferay+EE+tr

1032 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=internal:+&p_p_state=normal
	['portal']

1033 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=internal+lcs&p_p_state=normal
	['portal']

1034 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/125361
	['knowledge base', 'portal']

1035 / 1822 - https://www.liferay.com/careers?p=job/owT16fwg/apply
	['liferay jobs', 'careers']

1036 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/20851
	['knowledge base', 'UDP', 'distributed caching', 'TCP', 'activation key', 'amazon S3', 'portal', 'clustering', 'ehcache']

1037 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/151456
	['knowledge base', 'portal']

1038 / 1822 - https://www.liferay.com/solutions/portals?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapp

	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

1069 / 1822 - https://www.liferay.com/get-a-demo
	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

1070 / 1822 - https://www.liferay.com/careers?p=job/oFP83fwp
	['liferay jobs', 'careers']

1071 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/2119411
	['knowledge base', 'portal']

1072 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/457670
	['knowledge base', 'portal']

1073 / 1822 - https://www.liferay.com/transform-your-business-operations?ads_cmpid=1341043413&ads_targetid=kwd-1832712893&ads_creative=261547660742&ads_network=g&ads_adid=50569534701&ads_matchtype=e&gclid=EAIaIQobChMIxIuR9Yf82gIVCFSGCh096QFgEAAYASAAEgJFXPD_BwE&ttv=2
	['leading digital experience platform']

1074 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/14239
	['knowledge base', 'portal']

10

1112 / 1822 - https://customer.liferay.com/en/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=compile&p_p_state=normal
	['portal']

1113 / 1822 - https://customer.liferay.com/en/documentation/knowledge-base/-/kb/18530
	['knowledge base', 'portal']

1114 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=Pre-Compiling&p_p_state=normal
	['portal']

1115 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=JASPER+JSPC+GRADLE+PLUGIN&p_p_state=normal
	['jasper JSPC gradle plugin', 'reference', 'portal']

1116 / 1822 - https://www.liferay.com/en_AU/solutions/portals
	['portal solutions', 'corporate internet portal solutions', 'enterprise web portal software', 'enterprise customers', 'liferay web portal software']

1117 / 1822 - https://www.liferay.com/en_AU/solutions/mobile
	['

1152 / 1822 - https://www.liferay.com/home?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=liferay+show+vdo+mp4
	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

1153 / 1822 - https://www.liferay.com/group/control_panel/manage?_1_WAR_osbwwwmarketingeventsportlet_tabs1=marketing&p_p_state=maximized&_1_WAR_osbwwwmarketingeventsportlet_startDateGTDay=11&_1_WAR_osbwwwmarketingeventsportlet_title=&_1_WAR_osbwwwmarketingeventsportlet_advancedSearch=false&_1_WAR_osbwwwmarketingeventsportlet_anytime=true&p_p_id=1_WAR_osbwwwmarketingeventsportlet&_1_WAR_osbwwwmarketingeventsportlet_summary=&_1_WAR_osbwwwmarketingeventsportlet_startDateLTMonth=4&_1_WAR_osbwwwmarketingeventsportlet_marketingEventsSearchContainerPrimaryKeys=231913388,231827684,231814102,231821565,231870749,231822040,231868441,231814104,231834059,231819473,231912625,231894670,231


1157 / 1822 - https://www.liferay.com/group/control_panel/manage?p_p_id=1_WAR_osbwwwmarketingeventsportlet&doAsGroupId=10182&_1_WAR_osbwwwmarketingeventsportlet_mvcPath=/admin/edit_marketing_event.jsp&p_p_lifecycle=0&_1_WAR_osbwwwmarketingeventsportlet_redirect=https://www.liferay.com:443/group/control_panel/manage?p_p_id=1_WAR_osbwwwmarketingeventsportlet%26p_p_lifecycle=0%26p_p_state=maximized%26p_p_mode=view%26doAsGroupId=10182%26refererPlid=231354466%26controlPanelCategory=current_site.content%26_1_WAR_osbwwwmarketingeventsportlet_tabs1=marketing%26_1_WAR_osbwwwmarketingeventsportlet_mvcPath=%252Fadmin%252Fview.jsp%26_1_WAR_osbwwwmarketingeventsportlet_cur=2%26_1_WAR_osbwwwmarketingeventsportlet_delta=75%26_1_WAR_osbwwwmarketingeventsportlet_keywords=%26_1_WAR_osbwwwmarketingeventsportlet_advancedSearch=false%26_1_WAR_osbwwwmarketingeventsportlet_andOperator=true%26_1_WAR_osbwwwmarketingeventsportlet_orderByCol=start-date%26_1_WAR_osbwwwmarketingeventsportlet_orderByType=desc&_1_W

1173 / 1822 - https://www.liferay.com/blog/en-uk/industries/achieving-mobile-by-default-in-the-public-sector
	['mobile', 'sustainable service delivery']

1174 / 1822 - https://www.liferay.com/sv/web/discover/magic-quadrant-digital-experience-platforms
	["gartner's evaluation", 'ability', 'DXP', 'free download', 'completeness', 'vision']

1175 / 1822 - https://www.liferay.com/nb/web/discover/magic-quadrant-digital-experience-platforms
	["gartner's evaluation", 'ability', 'DXP', 'free download', 'completeness', 'vision']

1176 / 1822 - https://www.liferay.com/ge/web/discover/magic-quadrant-digital-experience-platforms
	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

1177 / 1822 - https://www.liferay.com/browser/liferay/browser.html?Type=
	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

1178 / 1822 - https://www.liferay.com/en/web/discover/magic-quadrant-digital-experience-platforms
	

1213 / 1822 - https://www.liferay.com/leading-open-source-enterprise-platform?ads_cmpid=150144763&ads_targetid=kwd-1832712893&ads_creative=254359982373&ads_network=g&ads_adid=9057368923&ads_matchtype=e&gclid=EAIaIQobChMI6vfUseb92gIVS7jACh0Afw7qEAAYASAAEgLOqvD_BwE&ttv=2
	['leading open source enterprise platform']

1214 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=audit.message.com.liferay.portal.kernel.model.Layout.VIEW&p_p_state=normal
	['portal']

1215 / 1822 - https://customer.liferay.com/documentation/7.0/admin/-/official_documentation/portal/auditing-users
	['update', 'reports', 'permissions', 'audit', 'portal', 'users', 'auditing users', 'events']

1216 / 1822 - https://www.liferay.com/blog/en-us/digital-strategy/portal-vs-website-when-to-use-each
	['digital strategy', 'liferay blogs', 'portal', 'company', 'CMS', 'read strategies', 'right web platform', 'website']

1217 / 1822 - https://cus

1240 / 1822 - https://www.liferay.com/product/features/platform?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=tooling
	['mobile experiences', 'same open source platform', 'websites', 'open source digital experience platform', 'power', 'liferay DXP', 'organizations', "world's best portals"]

1241 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/17945
	['knowledge base', 'portal']

1242 / 1822 - https://www.liferay.com/home?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=social+office
	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

1243 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=ldap&p_p_state=normal
	['portal']

124

1275 / 1822 - https://www.liferay.com/home?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=sync
	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

1276 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=database+connection+properties&p_p_state=normal
	['portal']

1277 / 1822 - https://customer.liferay.com/documentation/search?_1_WAR_osbknowledgebaseportlet_delta=20&p_p_id=1_WAR_osbknowledgebaseportlet&p_p_mode=view&p_p_lifecycle=0&_1_WAR_osbknowledgebaseportlet_cur=1&_1_WAR_osbknowledgebaseportlet_keywords=database+connection+properties&_1_WAR_osbknowledgebaseportlet_mvcPath=/search/view.jsp&p_p_state=normal
	['portal']

1278 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_

1313 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&p_p_mode=view&_1_WAR_osbknowledgebaseportlet_assetCategoryIds=80620&p_p_lifecycle=0&_1_WAR_osbknowledgebaseportlet_keywords=s3&_1_WAR_osbknowledgebaseportlet_mvcPath=/search/view.jsp&p_p_state=normal
	['portal', 'deployment', 'document repository configuration']

1314 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=upgrading+to+liferay+dxp&p_p_state=normal
	['portal']

1315 / 1822 - https://www.liferay.com/home?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=ftp.liferay.com
	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

1316 / 1822 - https://www.liferay.com/blog/events/lsna/2018
	['liferay blogs', 'LSNA']

1317 / 1822 - https://www.liferay.com/e

1345 / 1822 - https://www.liferay.com/en/web/events-symposium-north-america/sponsors?p_p_col_id=column-1&_15_groupId=231815926&p_p_state=maximized&_15_version=1.0&p_p_lifecycle=0&_15_folderId=231909073&p_p_mode=view&p_p_auth=ZH78jQKz&p_p_id=15&_15_articleId=231909222&_15_redirect=https://www.liferay.com:443/en/web/events-symposium-north-america/sponsors?p_p_id=56_INSTANCE_ZJ4pzcIWmi0L%26p_p_lifecycle=0%26p_p_state=normal%26p_p_mode=view%26p_p_col_id=column-1%26p_p_col_pos=1%26p_p_col_count=2&_15_struts_action=/journal/edit_article&p_p_col_count=2&p_p_col_pos=1
	['sponsors']

1346 / 1822 - https://www.liferay.com/web/events-symposium-france/why-attend
	['bref']

1347 / 1822 - https://www.liferay.com/en/web/events-symposium-france/why-attend
	['bref']

1348 / 1822 - https://www.liferay.com/web/events-symposium-north-america/sponsors?p_p_col_id=column-1&_15_groupId=231815926&p_p_id=15&_15_portletResource=56_INSTANCE_P8lKEzacJNRc&_15_struts_action=/journal/edit_article&p_p_lifecycle=0&_15_

	['dynatrace APM', 'liferay application performance management', 'liferay portal', 'digital performance management']

1377 / 1822 - https://www.liferay.com/en_AU/blog/digital-strategy/en-us/five-intranet-examples-that-boost-productivity
	['digital strategy', 'liferay blogs', 'intranet', 'productivity', 'team']

1378 / 1822 - https://www.liferay.com/events/liferay-roadshows?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=frankfurt+event
	['free solutions-focused seminars', 'roadshows']

1379 / 1822 - https://www.liferay.com/press-release?title=liferay-announces-100-year-over-year-growth
	['year']

1380 / 1822 - https://www.liferay.com/home?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=liferay+IDE+downloads
	['connected experiences', 'websites', 'digital experience software', 'modern

1405 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=workflow&p_p_state=normal
	['portal']

1406 / 1822 - https://www.liferay.com/en/resource?folderId=13811&tcid=X9Q1G7&title=Omnichannel+More+Than+a+Buzzword+for+Banks
	['customers', 'whitepaper', 'what channel', 'banks', 'digital touchpoint and details strategies', 'omnichannel', 'buzzword', 'need', 'omnichannel goal', 'bank', "today's customers"]

1407 / 1822 - https://customer.liferay.com/documentation/6.1/admin/-/official_documentation/portal/creating-new-workflow-definitions
	['portal', 'new workflow definitions']

1408 / 1822 - https://customer.liferay.com/documentation/knowledge-base/-/kb/15699
	['knowledge base', 'portal', 'weblogic', 'patching tool']

1409 / 1822 - https://www.liferay.com/resources/l?ads_cmpid=871293813&ads_targetid=kwd-10480451&ads_matchtype=b&title=portals&ads_network=s&ads_adid=52338392364&ads_creative=254384372545&gclid=

1438 / 1822 - https://www.liferay.com/en_AU/group/control_panel/manage?_15_groupId=10182&p_p_state=maximized&doAsGroupId=10182&p_p_lifecycle=0&_15_folderId=244992&refererPlid=231354466&p_p_mode=view&p_p_auth=BrmPCJ6i&p_p_id=15&_15_structureId=2354658&_15_struts_action=/journal/edit_article&_15_redirect=https://www.liferay.com:443/en_AU/group/control_panel/manage?p_p_auth=BrmPCJ6i%26p_p_id=15%26p_p_lifecycle=0%26p_p_state=maximized%26p_p_mode=view%26doAsGroupId=10182%26refererPlid=231354466%26controlPanelCategory=current_site.content%26_15_entryEnd=20%26_15_displayStyle=%26_15_viewEntries=1%26_15_viewFolders=1%26_15_folderStart=0%26_15_action=browseFolder%26_15_struts_action=%252Fjournal%252Fview%26_15_folderEnd=20%26_15_entryStart=0%26_15_folderId=244992&controlPanelCategory=current_site.content
	['web content']

1439 / 1822 - https://www.liferay.com/support/holiday-calendar
	['liferay support', 'critical incidents', 'holiday calendar', 'support region', 'support', 'observed holidays']

1469 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=lcs+client&p_p_state=normal
	['portal']

1470 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=single+page+application&p_p_state=normal
	['portal']

1471 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&p_p_mode=view&_1_WAR_osbknowledgebaseportlet_assetCategoryIds=80889&p_p_lifecycle=0&_1_WAR_osbknowledgebaseportlet_keywords=single+page+application&_1_WAR_osbknowledgebaseportlet_mvcPath=/search/view.jsp&p_p_state=normal
	['portal']

1472 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=custom+workflow&p_p_state=normal
	['portal']

1473 / 1822 - https://www.liferay.com/resource?title=copperpoint
	['worker compensation insurance pro

	['connected experiences', 'websites', 'landing page admin', 'digital experience software', 'modern portals', 'intranets']

1512 / 1822 - https://www.liferay.com/c/document_library/get_file?amp&folderId=472678&p_l_id=745576&name=DLFE-384.pdf
	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

1513 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=Websphere+liferay&p_p_state=normal
	['portal']

1514 / 1822 - https://www.liferay.com/resources?93350categoryIds=93352&17989categoryIds=18045
	["liferay's resource library", 'business', 'ebooks', 'IT and business leaders', 'case studies', 'IT', 'leadership resources', 'whitepapers']

1515 / 1822 - https://customer.liferay.com/documentation/search?p_p_id=1_WAR_osbknowledgebaseportlet&_1_WAR_osbknowledgebaseportlet_keywords=logging&p_p_state=normal
	['portal']

1516 / 1822 - https://www.liferay.com/en_AU/events?

1537 / 1822 - https://www.liferay.com/group/control_panel/manage?p_p_id=1_WAR_osbwwwmarketingeventsportlet&doAsGroupId=231869941&_1_WAR_osbwwwmarketingeventsportlet_mvcPath=/admin/edit_marketing_event_sponsor.jsp&p_p_lifecycle=0&_1_WAR_osbwwwmarketingeventsportlet_redirect=https://www.liferay.com:443/group/control_panel/manage?p_p_id=1_WAR_osbwwwmarketingeventsportlet%26p_p_lifecycle=0%26p_p_state=maximized%26p_p_mode=view%26doAsGroupId=231869941%26refererPlid=231869987%26controlPanelCategory=current_site.content%26_1_WAR_osbwwwmarketingeventsportlet_mvcPath=%252Fadmin%252Fedit_marketing_event.jsp%26_1_WAR_osbwwwmarketingeventsportlet_tabs1=sponsors%26_1_WAR_osbwwwmarketingeventsportlet_redirect=%252Fgroup%252Fcontrol_panel%252Fmanage%253FcontrolPanelCategory%253Dcurrent_site.content%2526doAsGroupId%253D231869941%2526p_p_id%253D1_WAR_osbwwwmarketingeventsportlet%2526p_p_lifecycle%253D0%2526p_p_mode%253Dview%2526p_p_state%253Dmaximized%2526refererPlid%253D231869987%26_1_WAR_osbwwwmarket


1542 / 1822 - https://www.liferay.com/group/control_panel/manage?_1_WAR_osbwwwmarketingeventsportlet_tabs1=sponsors&p_p_state=maximized&doAsGroupId=231869941&_1_WAR_osbwwwmarketingeventsportlet_mvcPath=/admin/edit_marketing_event.jsp&p_p_lifecycle=0&_1_WAR_osbwwwmarketingeventsportlet_advancedSearch=false&_1_WAR_osbwwwmarketingeventsportlet_redirect=/group/control_panel/manage?controlPanelCategory=current_site.content%26doAsGroupId=231869941%26p_p_id=1_WAR_osbwwwmarketingeventsportlet%26p_p_lifecycle=0%26p_p_mode=view%26p_p_state=maximized%26refererPlid=231869987&_1_WAR_osbwwwmarketingeventsportlet_keywords=&refererPlid=231869987&p_p_mode=view&_1_WAR_osbwwwmarketingeventsportlet_andOperator=true&p_p_id=1_WAR_osbwwwmarketingeventsportlet&_1_WAR_osbwwwmarketingeventsportlet_delta1=20&_1_WAR_osbwwwmarketingeventsportlet_marketingEventId=231868441&controlPanelCategory=current_site.content&_1_WAR_osbwwwmarketingeventsportlet_cur1=1
	[]

1543 / 1822 - https://www.liferay.com/group/control_p

	['PARTNERS', 'media', 'documents']

1557 / 1822 - https://www.liferay.com/en/group/control_panel/manage?p_p_id=20&doAsGroupId=10182&_20_struts_action=/document_library/view_file_entry&_20_redirect=https://www.liferay.com:443/en/group/control_panel/manage?p_p_id=20%26p_p_lifecycle=0%26p_p_state=maximized%26p_p_mode=view%26doAsGroupId=10182%26refererPlid=231887831%26controlPanelCategory=current_site.content%26_20_entryEnd=20%26_20_viewEntries=1%26_20_viewFolders=1%26_20_folderStart=0%26_20_action=browseFolder%26_20_struts_action=%252Fdocument_library%252Fview%26_20_folderEnd=20%26_20_entryStart=0%26_20_folderId=13992&p_p_lifecycle=0&_20_fileEntryId=231922745&controlPanelCategory=current_site.content&p_p_state=maximized&p_p_mode=view&refererPlid=231887831
	['media', 'documents']

1558 / 1822 - https://www.liferay.com/web/events-symposium-spain/why-attend
	[]

1559 / 1822 - https://www.liferay.com/web/events-symposium-spain/venue
	['teatrogoya']

1560 / 1822 - https://www.liferay.com/en/g

1585 / 1822 - https://www.liferay.com/services/partners?countryA2Codes=PK,IN
	['partners']

1586 / 1822 - https://www.liferay.com/home?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=render+url+link
	['connected experiences', 'websites', 'digital experience software', 'modern portals', 'intranets']

1587 / 1822 - https://www.liferay.com/events/liferay-conferences?countryIds=19&pastEvents=false&types=1
	['cutting-edge technology', 'partners', "liferay's team", 'community', 'innovative solutions', 'liferay conferences']

1588 / 1822 - https://customer.liferay.com/documentation/7.0/develop/tutorials/-/official_documentation/tutorials/writing-your-first-liferay-application
	['portal', 'first liferay DXP application', 'tutorials']

1589 / 1822 - https://customer.liferay.com/documentation/7.0/admin/-/official_documentation/portal/staging-content-for-publication
	['portal', 'publication', 's

1616 / 1822 - https://www.liferay.com/group/control_panel/manage?_15_groupId=10182&p_p_state=maximized&doAsGroupId=10182&p_p_lifecycle=0&_15_folderId=231918602&refererPlid=231354466&p_p_mode=view&p_p_auth=8dlDNs7D&p_p_id=15&_15_struts_action=/journal/edit_article&_15_redirect=https://www.liferay.com:443/group/control_panel/manage?p_p_auth=8dlDNs7D%26p_p_id=15%26p_p_lifecycle=0%26p_p_state=maximized%26p_p_mode=view%26doAsGroupId=10182%26refererPlid=231354466%26controlPanelCategory=current_site.content%26_15_advancedSearch=0%26_15_viewFolders=1%26_15_viewEntries=1%26_15_keywords=selfhelp%26_15_status=%26_15_folderStart=0%26_15_searchArticleId=%26_15_searchType=1%26_15_folderEnd=20%26_15_type=%26_15_andOperator=%26_15_folderId=159754595%26_15_entryStart=0%26_15_displayStyle=%26_15_entryEnd=20%26_15_content=%26_15_title=%26_15_description=%26_15_struts_action=%252Fjournal%252Fsearch%26_15_searchFolderId=159754595%26_15_showSearchInfo=1&_15_articleId=231918493&controlPanelCategory=current_s

1629 / 1822 - https://www.liferay.com/group/control_panel/manage?p_p_id=20&doAsGroupId=10182&_20_struts_action=/document_library/edit_file_entry&_20_redirect=https://www.liferay.com:443/group/control_panel/manage?p_p_id=20%26p_p_lifecycle=0%26p_p_state=maximized%26p_p_mode=view%26doAsGroupId=10182%26refererPlid=231354466%26controlPanelCategory=current_site.content%26_20_entryEnd=20%26_20_displayStyle=%26_20_viewEntries=1%26_20_viewFolders=1%26_20_folderStart=0%26_20_action=browseFolder%26_20_struts_action=%252Fdocument_library%252Fview%26_20_folderEnd=20%26_20_entryStart=0%26_20_folderId=231923218&p_p_lifecycle=0&_20_backURL=https://www.liferay.com:443/group/control_panel/manage?p_p_id=20%26p_p_lifecycle=0%26p_p_state=maximized%26p_p_mode=view%26doAsGroupId=10182%26refererPlid=231354466%26controlPanelCategory=current_site.content%26_20_entryEnd=20%26_20_displayStyle=%26_20_viewEntries=1%26_20_viewFolders=1%26_20_folderStart=0%26_20_action=browseFolder%26_20_struts_action=%252Fdocument_

1646 / 1822 - https://www.liferay.com/group/control_panel?controlPanelCategory=current_site.content&p_p_id=15&doAsGroupId=10182&refererPlid=231654361
	['web content']

1647 / 1822 - https://www.liferay.com/group/control_panel/manage?p_p_state=maximized&_15_displayStyle=&doAsGroupId=10182&_15_folderId=231918602&p_p_lifecycle=0&_15_folderStart=0&_15_viewEntries=1&_15_entryStart=0&refererPlid=231654361&p_p_mode=view&p_p_auth=8dlDNs7D&_15_viewFolders=1&p_p_id=15&_15_entryEnd=20&_15_action=browseFolder&_15_struts_action=/journal/view&_15_folderEnd=20&controlPanelCategory=current_site.content
	['web content']

1648 / 1822 - https://www.liferay.com/resources/l?p_p_col_id=column-1&_15_groupId=10182&p_p_id=15&_15_articleId=231918652&_15_redirect=https://www.liferay.com/resources/l?title=selfhelp-for-customers-and-partners&p_p_lifecycle=0&_15_struts_action=/journal/edit_article&p_p_col_count=1&p_p_state=maximized&p_p_auth=NfReI3fe
	['landing page']

1649 / 1822 - https://www.liferay.com/en/resou

	['portal', 'tutorials', 'liferay developer studio']

1671 / 1822 - https://www.liferay.com/training-refresh?p_p_col_id=column-1&_15_groupId=10182&p_p_state=maximized&_15_version=1.4&p_p_lifecycle=0&_15_folderId=0&p_p_mode=view&p_p_auth=zPp15U4Q&p_p_id=15&_15_articleId=231923395&_15_redirect=https://www.liferay.com:443/training-refresh?p_p_id=56_INSTANCE_CgQ319MF0NHt%26p_p_lifecycle=0%26p_p_state=normal%26p_p_mode=view%26p_p_col_id=column-1%26p_p_col_pos=1%26p_p_col_count=2&_15_struts_action=/journal/edit_article&p_p_col_count=2&p_p_col_pos=1
	['training refresh']

1672 / 1822 - https://www.liferay.com/training-refresh?p_p_col_id=column-1&_15_groupId=10182&p_p_state=maximized&_15_version=1.5&p_p_lifecycle=0&_15_folderId=0&p_p_mode=view&p_p_auth=zPp15U4Q&p_p_id=15&_15_articleId=231923395&_15_redirect=https://www.liferay.com:443/training-refresh?p_p_id=56_INSTANCE_CgQ319MF0NHt%26p_p_lifecycle=0%26p_p_state=normal%26p_p_mode=view%26p_p_col_id=column-1%26p_p_col_pos=1%26p_p_col_count=2&_15

1692 / 1822 - https://www.liferay.com/resources/l?p_p_col_id=column-1&_15_groupId=10182&p_p_id=15&_15_articleId=231918652&_15_redirect=https://www.liferay.com/resources/l?title=infosysfinacle&p_p_lifecycle=0&_15_struts_action=/journal/edit_article&p_p_col_count=1&p_p_state=maximized&p_p_auth=NfReI3fe
	['landing page']

1693 / 1822 - https://www.liferay.com/resources/l?p_p_col_id=column-1&_15_groupId=10182&p_p_id=15&_15_articleId=231918623&_15_redirect=https://www.liferay.com/resources/l?title=infosysfinacle&p_p_lifecycle=0&_15_struts_action=/journal/edit_article&p_p_col_count=1&p_p_state=maximized&p_p_auth=NfReI3fe
	['landing page']

1694 / 1822 - https://customer.liferay.com/documentation/7.0/develop/tutorials/-/official_documentation/tutorials/portlets
	['portal', 'portlets', 'tutorials']

1695 / 1822 - https://www.liferay.com/resources/l?p_p_col_id=column-1&_15_groupId=10182&p_p_id=15&_15_articleId=231918604&_15_redirect=https://www.liferay.com/resources/l?title=infosysfinacle&p_p_l

1722 / 1822 - https://www.liferay.com/group/control_panel/manage?p_p_state=maximized&doAsGroupId=231815926&_174_struts_action=/site_memberships_admin/edit_site_assignments&p_p_lifecycle=0&refererPlid=231815930&p_p_mode=view&p_p_auth=2uhButjZ&_174_tabs1=users&p_p_id=174&_174_groupId=231815926&_174_p_u_i_d=9153921&controlPanelCategory=current_site.users&_174_redirect=https://www.liferay.com:443/group/control_panel/manage?p_p_auth=2uhButjZ%26p_p_id=174%26p_p_lifecycle=0%26p_p_state=maximized%26p_p_mode=view%26doAsGroupId=231815926%26refererPlid=231815930%26controlPanelCategory=current_site.users%26_174_cur=0
	['site memberships']

1723 / 1822 - https://www.liferay.com/group/control_panel/manage?p_p_state=maximized&_174_cur=0&doAsGroupId=231815926&_174_tabs1=users&p_p_lifecycle=0&refererPlid=231815930&p_p_mode=view&p_p_auth=2uhButjZ&_174_struts_action=/site_memberships_admin/edit_site_assignments&p_p_id=174&_174_tabs2=current&_174_p_u_i_d=9153921&_174_groupId=231815926&controlPanelCategory

1745 / 1822 - https://www.liferay.com/group/control_panel/manage?_15_groupId=231815926&p_p_state=maximized&doAsGroupId=231815926&p_p_lifecycle=0&_15_folderId=0&refererPlid=231875596&p_p_mode=view&p_p_auth=Tu8iIiHS&p_p_id=15&_15_struts_action=/journal/edit_article&_15_redirect=https://www.liferay.com:443/group/control_panel/manage?p_p_auth=Tu8iIiHS%26p_p_id=15%26p_p_lifecycle=0%26p_p_state=maximized%26p_p_mode=view%26doAsGroupId=231815926%26refererPlid=231875596%26controlPanelCategory=current_site.content%26_15_advancedSearch=0%26_15_viewFolders=1%26_15_viewEntries=1%26_15_keywords=231921814%26_15_status=%26_15_folderStart=0%26_15_searchArticleId=%26_15_searchType=1%26_15_folderEnd=20%26_15_type=%26_15_andOperator=%26_15_folderId=0%26_15_entryStart=0%26_15_displayStyle=%26_15_entryEnd=20%26_15_content=%26_15_title=%26_15_description=%26_15_struts_action=%252Fjournal%252Fsearch%26_15_searchFolderId=0%26_15_showSearchInfo=1&_15_articleId=231921814&controlPanelCategory=current_site.content

1767 / 1822 - https://www.liferay.com/training-refresh?p_p_col_id=column-1&_15_groupId=10182&p_p_state=maximized&p_p_auth=ji1NcX7q&_15_version=1.0&p_p_lifecycle=0&_15_folderId=0&p_p_mode=view&_15_articleId=231924037&p_p_id=15&_15_redirect=https://www.liferay.com:443/training-refresh?p_p_id=56_INSTANCE_nxYGxIUEvqgP%26p_p_lifecycle=0%26p_p_state=normal%26p_p_mode=view%26p_p_col_id=column-1%26p_p_col_count=3&_15_struts_action=/journal/edit_article&p_p_col_count=3
	['training refresh']

1768 / 1822 - https://www.liferay.com/downloads?p_p_lifecycle=0&p_p_id=1_WAR_googlesearchapplianceportlet_INSTANCE_0000&p_p_state=maximized&_1_WAR_googlesearchapplianceportlet_INSTANCE_0000_keywords=download+dxp
	['liferay portal', 'download liferay DXP', 'liferay downloads']

1769 / 1822 - https://www.liferay.com/training-refresh?p_p_col_id=column-1&p_p_id=56_INSTANCE_nxYGxIUEvqgP&p_p_lifecycle=0&p_p_col_count=3&p_p_state=normal&p_p_mode=view
	['training refresh']

1770 / 1822 - https://www.liferay.com/dow

1789 / 1822 - https://www.liferay.com/training-refresh?p_p_col_id=column-1&_15_groupId=10182&p_p_state=maximized&p_p_auth=ji1NcX7q&_15_version=1.3&p_p_lifecycle=0&_15_folderId=0&p_p_mode=view&_15_articleId=231924037&p_p_id=15&_15_redirect=https://www.liferay.com:443/training-refresh?p_p_id=56_INSTANCE_nxYGxIUEvqgP%26p_p_lifecycle=0%26p_p_state=normal%26p_p_mode=view%26p_p_col_id=column-1%26p_p_col_count=3&_15_struts_action=/journal/edit_article&p_p_col_count=3
	['training refresh']

1790 / 1822 - https://www.liferay.com/training-refresh?p_p_col_id=column-1&_15_groupId=10182&p_p_state=maximized&p_p_auth=ji1NcX7q&_15_version=1.4&p_p_lifecycle=0&_15_folderId=0&p_p_mode=view&_15_articleId=231924037&p_p_id=15&_15_redirect=https://www.liferay.com:443/training-refresh?p_p_id=56_INSTANCE_nxYGxIUEvqgP%26p_p_lifecycle=0%26p_p_state=normal%26p_p_mode=view%26p_p_col_id=column-1%26p_p_col_count=3&_15_struts_action=/journal/edit_article&p_p_col_count=3
	['training refresh']

1791 / 1822 - https://ww

1812 / 1822 - https://www.liferay.com/company/gartner/magic-quadrant-digital-experience-platforms?p_p_col_id=column-1&_15_groupId=10182&p_p_id=15&_15_articleId=231769272&_15_redirect=https://www.liferay.com/company/gartner/magic-quadrant-digital-experience-platforms&p_p_lifecycle=0&_15_struts_action=/journal/edit_article&p_p_col_count=1&p_p_state=maximized&p_p_auth=YgoKdEpP
	["gartner's evaluation", 'ability', 'DXP', 'free download', 'completeness', 'vision']

1813 / 1822 - https://customer.liferay.com/documentation/7.0/develop/tutorials/-/official_documentation/tutorials/service-builder-persistence
	['service builder persistence', 'portal', 'tutorials']

1814 / 1822 - https://www.liferay.com/en_AU/solutions/industries/healthcare
	['healthcare', 'integrate systems', 'hospital', 'record and enhance collaboration', 'intranet', 'software platform', 'electronic health records', 'portal', 'hospital & healthcare organizations', 'healthcare intranet']

1815 / 1822 - https://customer.liferay.c

In [55]:
# Save URLs without any keywords
# This is meant to be a debugging output
with open('./output/URLs with NO keywords.txt', 'w', encoding='utf-8') as w:
    for counter, url in enumerate(sorted(urls_without_keywords_list)):
        w.write("{}) {}\n".format(counter, url))

In [56]:
from collections import defaultdict, Counter       

def compute_score_with_df(user_visits_df, global_visits_df, start_date, debug=False):
    """
    Description: This will take the sites that a user has visited, and perform TF-IDF calculations
    to obtain an output score. Note that we also factor in global visits as well.
    
    Input: 
    user_visits_df - This is the dataframe corresponding to an individual's activites
    glbal_visits_df - This is the dataframe for all user's activites
    
    Output:
    ranked_interest_df - Ranked interests. Format: Topic of Interest, Score, Corresponding URLs Visited
    user_visits_df - The user df, but added with keywords associated with the link
    
    """
    
    keyword_to_logscore = calculate_inverse_document_frequency(user_visits_df, global_visits_df, debug=False)
    
    columns = ['Topic of Interest', 'Score', 'Corresponding URLs Visited']
    ranked_interest_df = pd.DataFrame(columns=columns)

    # Iterate through all URLs the user has visited
    for index, entry in user_visits_df.iterrows():
        
        url = entry['normalized_url']        
        aggregate_keyword_list = url_lookup_cache.get(url, [])
        
        # Exponential Decay Factor - Calculate multiplier
        event_date = entry['eventdate']
        multiplier = calculateDecayMultiplier(event_date, start_date)
        
        # Iterate through the individual keywords extracted from the URL
        for keyword in aggregate_keyword_list:
            
            if not keyword:
                print("ERROR, EMPTY KEYWORD DETECTED!")
                print("URL: {}".format(url))
                print("aggregate_keyword_list: {}".format(aggregate_keyword_list))

            existing_row = ranked_interest_df[ranked_interest_df['Topic of Interest'] == keyword]

            if existing_row.empty:
                row = ranked_interest_df.shape[0]               
                ranked_interest_df.loc[row] = [keyword, (keyword_to_logscore[keyword] * multiplier), np.NaN]
                ranked_interest_df['Corresponding URLs Visited'] = ranked_interest_df['Corresponding URLs Visited'].astype(object)
                ranked_interest_df.at[row, 'Corresponding URLs Visited'] = [url]
            else:
                                
                index = ranked_interest_df.index[ranked_interest_df['Topic of Interest'] == keyword]
                column = ranked_interest_df.columns.get_loc('Score')
                updated_score = ranked_interest_df.iloc[index, column].values[0] + (keyword_to_logscore[keyword] * multiplier)
                ranked_interest_df.iloc[index, column] = updated_score
                
                column = ranked_interest_df.columns.get_loc('Corresponding URLs Visited')
                updated_urls = ranked_interest_df.iat[index.values[0], column]
                updated_urls.append(url)                
                ranked_interest_df.iat[index.values[0], column] = updated_urls

    # Sort by logscore before returning
    ranked_interest_df['Score'] = pd.to_numeric(ranked_interest_df['Score'])
    ranked_interest_df.sort_values(by=['Score'], ascending=False, inplace=True)
    
    #
    user_visits_df = pd.merge(user_visits_df, url_to_keyword_df, how='left', on='normalized_url', copy=True)
    user_visits_df = user_visits_df.drop(['analyticsclient.generated_keywords', \
                                          'manual.keywords', \
                                          'analyticsclient.merged_title', \
                                          'analyticsclient.merged_description', \
                                          'analyticsclient.merged_keywords'], axis=1)
        
    return ranked_interest_df, user_visits_df  

In [57]:
if False:
    temp_df = clean_df.sample(500)
    temp_df = temp_df[~temp_df['Ignore URL']]

    #df1, df2 = calculateTopicsOfInterestOnDfOfUsers(temp_df, clean_df, start_date, debug=False)

    df1, df2 = compute_score_with_df(temp_df, clean_df, start_date, debug=False)

    #df = pd.DataFrame(df)


    playFinishedSound()

    #display(type(df))

    with pd.option_context('display.max_rows', 200, 'display.max_columns', None, 'display.max_colwidth', 500):
        display(df2)
        display(df1)

In [58]:
# TODO: Future optimiziation, only count the user visited keywords
def calculate_inverse_document_frequency(user_visits_df, global_df, user_weight=1.0, global_weight=2.0, 
                                         debug=False, fast=True, save_results=False):
    import math
    import numpy
    import operator
    from collections import defaultdict
    from math import log
    
    label_document_count = defaultdict(float)
    label_document_idf = dict()
    document_count = (len(user_visits_df) * user_weight) + (len(global_df) * global_weight)
    
    user_keywords = set()

    if debug:
        print("Document Count: {}".format(document_count))
    
    keyword_to_weighted_frequency_per_document = defaultdict(float)
    
    ###############
    # User Counts #
    ###############
    
    # Iterate through URLs and extract keywords
    for index, user_visit_entry in user_visits_df.iterrows():
        
        # Skip "Ignore URL" entries
        if user_visit_entry['Ignore URL']:
            if debug:
                print("Ignoring URL, skipping: {}".format(user_visit_entry['normalized_url'] ))
            continue
        
        normalized_url = user_visit_entry['normalized_url']        
        aggregate_keyword_list = url_lookup_cache.get(normalized_url, [])
        
        # Iterate through list and update weights
        for keyword in aggregate_keyword_list:
            keyword_to_weighted_frequency_per_document[keyword] += user_weight
            user_keywords.add(keyword)
        
        if debug:
            if len(aggregate_keyword_list) == 0:
                print("[WARNING: User Counts] - 0 keywords detected for url: {}".format(normalized_url))
        
    if debug:
        print("User Counts:")
        for entry in sorted(keyword_to_weighted_frequency_per_document.items(), key=operator.itemgetter(1)):
            print("\t{} => {}".format(entry[0], entry[1]))
    
    #################
    # Global Counts #
    #################
    
    keyword_to_logscore = dict()
    
    counter = 1
    
    # XXX: maybe don't need to do unique, assume it's already unique?
    
    # Iterate through URLs and extract keywords
    for normalized_url in global_df['normalized_url'].unique():
        
        # Skip "Ignore URL" entries
        if user_visit_entry['Ignore URL']:
            if debug:
                print("Ignoring URL, skipping: {}".format(user_visit_entry['normalized_url'] ))
            continue
                
        #normalized_url = user_visit_entry['normalized_url']            
        aggregate_keyword_list = url_lookup_cache.get(normalized_url, [])
        if debug: print("Aggregate Keyword List: {}".format(aggregate_keyword_list))
        
        # Iterate through list and update weights
        for keyword in aggregate_keyword_list:
            keyword_to_weighted_frequency_per_document[keyword] += global_weight
            #print('Updating keyword count')
            #print("{} => {}".format(keyword, keyword_to_weighted_frequency_per_document[keyword]))
        
        if debug:
            if len(aggregate_keyword_list) == 0:
                print("[WARNING: Global Counts] - 0 keywords detected for url: {}".format(normalized_url))
        
        counter += 1
    
    if debug:
        print("Global Counts:")
        for entry in sorted(keyword_to_weighted_frequency_per_document.items(), key=operator.itemgetter(1)):
            print("\t{} => {}".format(entry[0], entry[1]))
                    
    # Convert to Inverse-Log-Scores
    if debug:
        print("Calculating Inverse Log Scores")

    for entry in sorted(keyword_to_weighted_frequency_per_document.items(), key=operator.itemgetter(1), reverse=True):
        
        if entry[0] in user_keywords:
            inverse_log_score = math.log((document_count + 1) / (entry[1] + 1))
            if debug:
                print("{} => {} ({})".format(entry[0], inverse_log_score, entry[1]))

            keyword_to_logscore[entry[0]] = inverse_log_score

    # This is if we want to write the results to an output file
    
    if save_results:
        with open('Inverse Document Frequency Results.txt', 'w', encoding='utf-8') as w: 
            sorted_x = sorted(keyword_to_logscore.items(), key=operator.itemgetter(1), reverse=True)
            
            for word, log_score in sorted_x:
                w.write("{:30} : {:>5.4f}\n".format(word, log_score))
        
        
    return keyword_to_logscore

In [59]:
def calculateTopicsOfInterestOnDfOfUsers(filter_grouped_user_df, global_df, start_date, debug=False):
    """
    Inputs:
        filter_grouped_user_df - This contains all the users who we're trying to calculate the topics of interest for.
                                 This should be pre-filtered by your own specified date range.
        global_df - This contains all user's viewing history, in the same time period as the filter_grouped_user_df
    
    Outputs:
        user_to_topics_of_interest_df - This is the list of (userid, analyticskey) to (Topics of Interest, scores)
        keyword_to_url_df - This is the user input with keyword list attached to it
    
    """
    
    counter = 1
    user_to_results = dict()
    columns = ['User ID', 'Analytics Key', 'Topic of Interest', 'Score', 'Corresponding URLs Visited']
    user_to_topics_of_interest_df = pd.DataFrame(columns=columns)
    all_keywords_to_url_df = pd.DataFrame()

    for userid_and_analytics_key_tuple, group in filter_grouped_user_df.groupby(['userid', 'analyticskey']):

        user_id = userid_and_analytics_key_tuple[0]
        analytics_key = userid_and_analytics_key_tuple[1]
        
        if debug: 
            print("\n{}) User ID: {} Analytics Key: {}".format(counter, user_id, analytics_key)) 
            
        score_df, user_with_keyword_df = compute_score_with_df(group, global_df, start_date)       
        score_df['User ID'] = user_id
        score_df['Analytics Key'] = analytics_key
        score_df = score_df[columns]
        user_to_topics_of_interest_df = user_to_topics_of_interest_df.append(score_df, ignore_index=True)

        if debug:
            display(user_with_keyword_df)
        
        all_keywords_to_url_df = all_keywords_to_url_df.append(user_with_keyword_df, ignore_index=True)
                
        if counter % 500 == 0:
            print('{} / {}'.format(counter, len(filter_grouped_user_df['userid'].unique())))

        counter += 1
    
    return user_to_topics_of_interest_df, all_keywords_to_url_df

In [60]:
from datetime import timedelta, datetime

def extractDateRange(df, start_date, date_range='day', debug=False):
    """
    Description:
    This takes in a dataframe, and extracts the rows where the eventdate field is within the date range specified.
    Note that the start_date is inclusive, so if you ask for start_date = Jan 1, and range='day', you get all the 
    data from only Jan 1.
    """
        
    end_date = start_date + DATE_RANGE_OPTIONS.get(date_range, date_range)
    
    if debug:
        print("Start Date: {}".format(start_date))
        print("Date Range: {}".format(date_range))
        print("End Date:   {}".format(end_date))
    
    df = df[(df['eventdate'] > start_date) & (df['eventdate'] < end_date)].sort_values(by='eventdate', ascending=True)
    
    if debug:
        print("Earliest Reported Date: {}".format(df.iloc[0]['eventdate']))
        print("Latest Reported Date:   {}".format(df.iloc[-1]['eventdate']))
    
    return df

# Testing code for function above:
#start_date = datetime(2018, 3, 14)
#end_date = datetime(2018, 4, 1)
#date_range = timedelta(30)

#temporary_df = extractDateRange(clean_df, start_date=start_date, date_range='week', debug=True)

#display(temporary_df)


def calculateDecayMultiplier(event_date, start_date, debug=False):
    day_difference = (start_date - event_date).days
    multiplier = DECAY_MULTIPLIER_BASE ** day_difference
    
    if debug:
        print("Start Date:   {}".format(start_date))
        print("Current Date: {}".format(event_date))
        print("Difference:   {}".format(day_difference))
        print("Multiplier:   {}".format(multiplier))
    
    return multiplier

In [61]:
def calculateInfoForAllIndividualUsers(user_df, global_df, start_date, end_date, time_period='day', debug=False):
    """
    This function will iterate through all the users from user_df, and return all the individual's scores
    """
    
    current_date = start_date
    all_users_to_topic_of_interest_df = pd.DataFrame()
    
    while current_date < end_date:
        print("current_date: {}".format(current_date))
        
        # We want to look 30-days back for calcuations
        date_range_filtered_user_df = extractDateRange(user_df, 
                                                       start_date=(current_date - INTEREST_CALCULATION_WINDOW_TIMEDELTA), 
                                                       date_range=(INTEREST_CALCULATION_WINDOW_TIMEDELTA + timedelta(1)), 
                                                       debug=False)
        date_range_filtered_global_df = extractDateRange(global_df, 
                                                         start_date=(current_date - INTEREST_CALCULATION_WINDOW_TIMEDELTA), 
                                                         date_range=(INTEREST_CALCULATION_WINDOW_TIMEDELTA + timedelta(1)), 
                                                         debug=False)
        
        # do Interest calculations for individuals
        user_to_topics_of_interest_df, user_keyword_subset_df = calculateTopicsOfInterestOnDfOfUsers(date_range_filtered_user_df, date_range_filtered_global_df, (current_date + timedelta(1)))        
        user_to_topics_of_interest_df['currdate'] = current_date

        # append to larger list
        all_users_to_topic_of_interest_df = all_users_to_topic_of_interest_df.append(user_to_topics_of_interest_df, ignore_index=True)

        current_date += timedelta(1)
        
    return all_users_to_topic_of_interest_df

def calculateInfoForAllIndividualUsersSaveToJSON(user_to_toi_and_score, file_location, debug=False):
    """
    
    
    """
    partition_key = datetime.today().strftime('%Y%m%d0000')
    output_file = open(file_location, 'w', encoding='utf-8')

    # Gameplan:
    # - Go through date/userid/analyticskey
    # - Go through each keyword & score
    # - Find all URLs & Counts that correspond to the keyword
    # - Save info as a JSON entry
    
    for curr_date, row in user_to_toi_and_score.groupby(['currdate']):
        
        partition_key = datetime.today().strftime('%Y%m%d0000')
        curr_date_string = curr_date.strftime("%Y-%m-%d")

        print("\ncurrdate: {}".format(curr_date_string))
        
        for userid_and_analytics_key, row2 in row.groupby(['User ID', 'Analytics Key']):
            user_id = userid_and_analytics_key[0]
            analytics_key = userid_and_analytics_key[1]
            
            if debug:
                print("User ID: {}".format(user_id))
                print("Analytics Key: {}".format(analytics_key))
                
            row2 = row2.sort_values(by=['Score'], ascending=False)
            
            user_to_keyword_info_list = []
                
            for toi_score, row3 in row2.groupby(['Topic of Interest', 'Score']):
                topic_of_interest = toi_score[0]
                score = toi_score[1]

                print("Topic of Interest: {}".format(topic_of_interest))
                print("Score: {}".format(score))
                
                # Generates [URL, View Count]
                url_to_view_count_df = row3['Corresponding URLs Visited'].apply(lambda x: pd.Series(x).value_counts()).T.reset_index()
                url_to_view_count_df.rename(columns={url_to_view_count_df.columns[0] : 'url', url_to_view_count_df.columns[1] : 'visitCount'}, inplace=True)
                
                if debug:
                    display(url_to_view_count_df)
                    
                # print(url_to_view_count_df.to_json(orient='records'))
                
                url_to_visit_count_list = []
                
                for index, url_visit_count in url_to_view_count_df.iterrows():
                    url = url_visit_count['url']
                    visit_count = url_visit_count['visitCount']
                    
                    if debug:
                        print("URL: {}".format(url))
                        print("visitCount: {}".format(visit_count))
                        
                    url_to_visit_count_list.append(OrderedDict([('url', url), 
                                                                ('visitCount', visit_count)]))


                user_to_keyword_info_list.append(OrderedDict([('name', topic_of_interest),
                                                              ('score', score), 
                                                              ('pagesVisited', url_to_visit_count_list)]))

            json_text = json.dumps(
                OrderedDict([('analyticsKey', analytics_key), 
                            ('partitionKey', partition_key),
                            ('userid', user_id),
                            ('Current Date', curr_date_string),
                            ('interests', user_to_keyword_info_list)]))

            output_file.write("{}\n".format(json_text))

In [62]:
def calculateInfoForAllSegmentsSaveToJSON(segment_to_toi_and_score_df, user_to_toi_and_score, score_threshold, file_location):
    
    partition_key = datetime.today().strftime('%Y%m%d0000')
    output_file = open(file_location, 'w', encoding='utf-8')
    
    for curr_date, row in segment_to_toi_and_score_df.groupby(['currdate']):
        curr_date_string = curr_date.strftime("%Y-%m-%d")
        print("\ncurrdate: {}".format(curr_date_string))       
        user_to_toi_and_score_filtered_by_date_df = user_to_toi_and_score[user_to_toi_and_score['currdate'] == curr_date]
        
        for segment_id, row2 in row.groupby(['segmentIdentifier']):
            print("\tsegmentIdentifier: {}".format(segment_id))
            user_to_toi_and_score_filtered_by_date_and_segment_id_df = getSegmentEntriesDf(user_to_toi_and_score_filtered_by_date_df, segment_id)
            #display(user_to_toi_and_score_filtered_by_date_and_segment_id_df)

            # Create veritcal list of [topic of interest, score]
            keyword_to_score_df = row2.drop(labels=['currdate', 'segmentIdentifier'], axis=1, inplace=False).T.reset_index().copy()
            keyword_to_score_df.columns.values[0] = 'Topic of Interest'
            keyword_to_score_df.columns.values[1] = 'Score'

            #with pd.option_context('display.max_rows', 1000, 'display.max_columns', None, 'display.max_colwidth', 2000):
            #    display(keyword_to_score_df)
                
            keyword_to_url_json_string_list = []

            # Iterate through the current date + segment users, to figure out corresponding URLs
            for index, row3 in keyword_to_score_df.iterrows():
                topic_of_interest = row3['Topic of Interest']
                score = row3['Score']

                

                # Skip NaN values
                if math.isnan(score):
                    #print("Skipping...")
                    continue
                    
                print("\t\t{} : {}".format(topic_of_interest, score))

                # Find corresponding users whose individual scores exceed the threshold
                # Need URL, uniqueVisitsCount
                url_to_counts = getUrlAndUniqueVisitsCount(user_to_toi_and_score_filtered_by_date_and_segment_id_df, topic_of_interest, score_threshold)

                #display(url_to_counts)
                url_to_view_count_list = []
    
                for url, view_count in url_to_counts.items():
                    url_to_view_count_entry = {
                        'url'        : url,
                        'visitCount' : view_count
                    }
                
                    url_to_view_count_list.append(url_to_view_count_entry)
                    
                keyword_entry = {
                    'name'         : topic_of_interest, 
                    'score'        : score, 
                    'pagesVisited' : url_to_view_count_list
                }
                
                keyword_to_url_json_string_list.append(keyword_entry)

            json_text = json.dumps({
                'partitionKey'          : partition_key,
                'segmentIdentifier'     : segment_id,
                'currdate'              : curr_date_string, 
                'interests'             : keyword_to_url_json_string_list
                })
            
            output_file.write("{}\n".format(json_text))
    
def getSegmentEntriesDf(df, segmentIdentifier):
    """
    This will only return rows that match the segmentIdentifier
    """
    
    only_segment_id_entries_df = pd.merge(segment_lookup_df, df, how='inner', left_on='datasourceindividualpk', right_on='User ID', sort=True)#.drop('value', 1)
    #display(only_segment_id_entries_df)
    
    return only_segment_id_entries_df
    
    
def getUrlAndUniqueVisitsCount(df, topic_of_interest, minimum_score_threshold, debug=False):
    
    url_to_unique_visits = OrderedDict()
    
    toi_df = df[(df['Topic of Interest'] == topic_of_interest) 
                & (df['Score'] >= minimum_score_threshold)]
    
    expanded_url_list = toi_df.set_index(['User ID'])['Corresponding URLs Visited'].apply(pd.Series).stack()
    expanded_url_list = pd.DataFrame(expanded_url_list).reset_index().drop(labels=['level_1'], axis=1, inplace=False)
    expanded_url_list.rename(columns={0 : 'Corresponding URLs Visited'}, inplace=True)
    # We are only getting unique: (userid, url) pairs
    no_duplicates_df = expanded_url_list.drop_duplicates(subset=['User ID', 'Corresponding URLs Visited'])
    count_url_visits = no_duplicates_df['Corresponding URLs Visited'].value_counts()
    
    if debug:
        display(toi_df)
        display(expanded_url_list)
        display(no_duplicates_df)
        display(count_url_visits)

    for url_count_tuple in count_url_visits.iteritems():
        url = url_count_tuple[0]
        count = url_count_tuple[1]
        url_to_unique_visits[url] = count
    
    return count_url_visits


In [63]:
def calculateSegmentWithDf(user_to_topic_of_interest_df, MINIMUM_SCORE_THRESHOLD):    
    user_to_toi_filtered_by_minimum_score = user_to_topic_of_interest_df[user_to_topic_of_interest_df['Score'] > MINIMUM_SCORE_THRESHOLD]
    keyword_to_count = user_to_topic_of_interest_df.groupby('Topic of Interest').count()
    keyword_to_count['Logscore'] = keyword_to_count['User ID'].apply(lambda x: math.log1p(x))
    keyword_to_count = keyword_to_count[['Logscore']]

    return keyword_to_count  

def calculateSegmentInfoFromIndividualDf(segment_name, user_to_toi_df, score_threshold, debug=False):
    """
    This will calculate the interest scores, and
    """
    
    user_to_toi_with_date_df = pd.DataFrame()
    
    # Filter by date
    for index, row in user_to_toi_df.groupby('currdate'):
        
        if debug:
            print("currdate: {}".format(index))
            display(row)
            
        segment_to_topic_of_interest_df = calculateSegmentWithDf(row, score_threshold)
        segment_to_topic_of_interest_transposed_df = segment_to_topic_of_interest_df.T
        segment_to_topic_of_interest_transposed_df['currdate'] = index
        user_to_toi_with_date_df = user_to_toi_with_date_df.append(segment_to_topic_of_interest_transposed_df, ignore_index=True)
        
        if debug:
            display(user_to_toi_with_date_df)

    # Move currdate column to front
    currdate_column = user_to_toi_with_date_df['currdate']
    user_to_toi_with_date_df.drop('currdate', axis=1, inplace=True)
    user_to_toi_with_date_df.insert(0, 'currdate', currdate_column)
    
    # Add Segment Name column
    user_to_toi_with_date_df.insert(1, 'segmentIdentifier', segment_name)
    
    return user_to_toi_with_date_df

def calculateAllSegmentInfo(user_to_toi_df, debug=False):
    """
    This function will return a DataFrame of all segments Topic of Interests and Scores
    """
    
    all_segment_info_df = pd.DataFrame()
    
    # Gameplan:
    # - Iterate through the list of segments
    #   * Filter user_to_toi_df so we only get the users from that segment
    # - Calculate segment toi & scores for that segment
    for segmentName, row in segment_lookup_df.groupby('segmentName'):
        display("segmentName: {}".format(segmentName))
        filtered_user_df = pd.merge(row, user_to_toi_df, how='inner', left_on='datasourceindividualpk', right_on='User ID')
        
        if debug:
            display(filtered_user_df)
            
        if filtered_user_df.shape[0] == 0:
            print("[WARNING] - Segment has 0 users! Skipping...")
            continue

        segment_toi_to_score_df = calculateSegmentInfoFromIndividualDf(segmentName, filtered_user_df, MINIMUM_TOPIC_OF_INTEREST_THRESHOLD_SCORE)
        all_segment_info_df = all_segment_info_df.append(segment_toi_to_score_df, ignore_index=True)
    
    
    
    # Move currdate & segmentIdentifier to front
    currdate_column = all_segment_info_df['currdate']
    segment_id_column = all_segment_info_df['segmentIdentifier']
    all_segment_info_df.drop('currdate', axis=1, inplace=True)
    all_segment_info_df.drop('segmentIdentifier', axis=1, inplace=True)
    all_segment_info_df.insert(0, 'currdate', currdate_column)
    all_segment_info_df.insert(1, 'segmentIdentifier', segment_id_column)
    
    return all_segment_info_df

## Pipeline with Output saved as JSON files

Steps:
* Filter out the group of users you want as a dataframe
* Pass in date range for calculations
* Write output files
 * Individual -> Topic of Interest (individual topics of interest.json)
 * Entire Segment -> Topic of Interest (segment topics of interest.json)
 * Segment URLs Contribution -> Topic of Interest (daily URL contribution to topics of interest.json)

In [None]:
%%time

start_date = START_DATE_DATETIME
end_date = END_DATE_DATETIME

temp_df = clean_df[(~clean_df['Ignore URL']) & (clean_df['eventdate'] >= (start_date - timedelta(1)))]

print("Calculating all Individual User's Info")
user_to_toi_and_score = calculateInfoForAllIndividualUsers(temp_df, clean_df, start_date, end_date, 'day', True)

print("Saving Individual Info to JSON file")
calculateInfoForAllIndividualUsersSaveToJSON(user_to_toi_and_score, './output/individual.json', debug=False)

print("Calculating all Segment Info")
all_segment_info_df = calculateAllSegmentInfo(user_to_toi_and_score, debug=False)

print("Saving Segment Info to JSON file")
calculateInfoForAllSegmentsSaveToJSON(all_segment_info_df, 
                                      user_to_toi_and_score, 
                                      MINIMUM_TOPIC_OF_INTEREST_THRESHOLD_SCORE,
                                     './output/segment.json')

playFinishedSound()

Calculating all Individual User's Info
current_date: 2018-05-04 00:00:00
500 / 887
current_date: 2018-05-05 00:00:00
500 / 1076
1000 / 1076
current_date: 2018-05-06 00:00:00
500 / 1261
1000 / 1261
current_date: 2018-05-07 00:00:00
500 / 1353
1000 / 1353
current_date: 2018-05-08 00:00:00
500 / 1462
1000 / 1462
current_date: 2018-05-09 00:00:00
500 / 1970
1000 / 1970
1500 / 1970
