# Challenge Description

Analyze user engagement and retention with respect to TripAdvisor links.
Are there particular features or pages that lead to higher engagement? 
Identify and visualize drop-off points or
areas where users might abandon the journey to TripAdvisor.

# Imports

In [1]:
import os
import pandas as pd
from collections import Counter

import plotly
import tldextract

# Functions

In [2]:
def get_retention(data: pd.DataFrame, 
                  n_file: int=0) -> pd.DataFrame:
    '''Returns a dataframe where to see with links of tripadvisor
    increments the engagement'''

    # start variables id and current journey
    engagement = []
    links = []
    
    # check every row
    data['targeturl'] = data['targeturl'].apply(get_clean_link)
    
    for idx in range(len(data.index) - 1):
        if data.domain.iloc[idx] == 'tripadvisor':
            if data.domain.iloc[idx + 1] == 'tripadvisor':
                engagement.append(True)
                links.append(data.targeturl.iloc[idx])
            else:
                engagement.append(False)
                links.append(data.targeturl.iloc[idx])
    
    # drop empty rows and save the file
    retention = pd.DataFrame()
    retention['link'] = links
    retention['engagement'] = engagement 
    retention = retention.dropna(how='all')
    retention.to_parquet(f'data/retention.parquet')
    
    return retention

In [3]:
def get_clean_data(file: str) -> pd.DataFrame:
    '''Returns the data cleaned 
       It also saves the new file for future iteration'''
    if os.path.exists(f'data/clean_{file}'):
        return pd.read_parquet(f'data/clean_{file}')

In [4]:
def get_clean_link(url: str) -> str:
    for word in ['https//', 'www.', 'tripadvisor', '.com', '.co.uk', '/', 'https:', '#']:
        url = url.replace(word, '')
    return url

# Data Import

In [5]:
# prepare and import all data.
retention = pd.DataFrame()
for n in range(48):
    file = f'data_{n}.parquet'
    data = get_clean_data(file)
    new_retention = get_retention(data)
    retention = pd.concat([retention, new_retention])

# Split Data

In [6]:
retention

Unnamed: 0,link,engagement
0,restaurant_review-g12909146-d4444636-reviews-t...,False
1,hotel_review-g44030-d295308-reviews-hotel_ches...,False
2,hotel_review-g44030-d295308-reviews-hotel_ches...,False
3,.well-knownchange-password,True
4,settings-cp,True
...,...,...
3124,ShowTopic-g147255-i208-k1017952-Medical_Insura...,False
3125,hotel_review-g319819-d8263739-reviews-the_lore...,False
3126,Attraction_Review-g45125-d12083987-Reviews-J_L...,False
3127,ShowUserReviews-g60713-d156469-r373115266-Gold...,False


In [7]:
higher_engagement = retention[retention['engagement'] == True]
higher_engagement

Unnamed: 0,link,engagement
3,.well-knownchange-password,True
4,settings-cp,True
5,forgotpassword-e__2f__settings__2d__cp,True
6,memberresetpassword?resetflowemail=REMOVED&tt=...,True
7,passwordreset-a_prq.g1f342387a3bd64a16982cd93d...,True
...,...,...
3107,Search?q=restaurants%20east%20asheville,True
3108,Restaurant_Review-g60742-d11045960-Reviews-Woo...,True
3118,Hotel_Review-g196508-d8274960-Reviews-Le_Casse...,True
3120,Restaurants-g187147-c8-zfn7236767-zfp6-Paris_I...,True


In [8]:
drop_off = retention[retention['engagement'] == False]
drop_off

Unnamed: 0,link,engagement
0,restaurant_review-g12909146-d4444636-reviews-t...,False
1,hotel_review-g44030-d295308-reviews-hotel_ches...,False
2,hotel_review-g44030-d295308-reviews-hotel_ches...,False
9,passwordreset,False
10,hotels-g61000-yosemite_national_park_californi...,False
...,...,...
3124,ShowTopic-g147255-i208-k1017952-Medical_Insura...,False
3125,hotel_review-g319819-d8263739-reviews-the_lore...,False
3126,Attraction_Review-g45125-d12083987-Reviews-J_L...,False
3127,ShowUserReviews-g60713-d156469-r373115266-Gold...,False


In [9]:
higher_engagement_patterns = Counter(higher_engagement.link).most_common(25)

In [10]:
drop_off_patterns = Counter(drop_off.link).most_common(25)

# Conclusion

tripadvisor loses engagements increases and drops off deppending on the review and comments from other people in the atraction/place they are seeing, a good review increase the engagement while a bad review tends to lead to a drop off