# Challenge Description

Analyze user engagement and retention with respect to TripAdvisor links.
Are there particular features or pages that lead to higher engagement? 
Identify and visualize drop-off points or
areas where users might abandon the journey to TripAdvisor.

# Imports

In [1]:
import os
import pandas as pd
from collections import Counter

import plotly
import tldextract

# Functions

In [2]:
def get_retention(data: pd.DataFrame, 
                  n_file: int=0) -> pd.DataFrame:
    '''Returns a dataframe where to see with links of tripadvisor
    increments the engagement'''

    # start variables id and current journey
    engagement = []
    links = []
    
    # check every row
    data['targeturl'] = data['targeturl'].apply(get_clean_link)
    
    for idx in range(len(data.index) - 1):
        if data.domain.iloc[idx] == 'tripadvisor':
            if data.domain.iloc[idx + 1] == 'tripadvisor':
                engagement.append(True)
                links.append(data.targeturl.iloc[idx])
            else:
                engagement.append(False)
                links.append(data.targeturl.iloc[idx])
    
    # drop empty rows and save the file
    retention = pd.DataFrame()
    retention['link'] = links
    retention['engagement'] = engagement 
    retention = retention.dropna(how='all')
    retention.to_parquet(f'data/retention.parquet')
    
    return retention

In [3]:
def get_clean_data(file: str) -> pd.DataFrame:
    '''Returns the data cleaned 
       It also saves the new file for future iteration'''
    if os.path.exists(f'data/clean_{file}'):
        return pd.read_parquet(f'data/clean_{file}')

In [4]:
def get_clean_link(url: str) -> str:
    for word in ['https//', 'www.', 'tripadvisor', '.com', '.co.uk', '/', 'https:', '#']:
        url = url.replace(word, '')
    return url

# Data Import

In [9]:
# prepare and import all data.
retention = pd.DataFrame()
for n in range(2):
    file = f'data_{n}.parquet'
    data = get_clean_data(file)
    new_retention = get_retention(data)
    retention = pd.concat([retention, new_retention])

# Split Data

In [7]:
retention

Unnamed: 0,link,engagement,link.1,engagement.1,link.2,engagement.2,link.3,engagement.3,link.4,engagement.4,...,link.5,engagement.5,link.6,engagement.6,link.7,engagement.7,link.8,engagement.8,link.9,engagement.9
0,restaurant_review-g12909146-d4444636-reviews-t...,False,ShowUserReviews-g44535-d120915-r613305115-Home...,False,Attractions-g34941-Activities-Fayetteville_Geo...,True,Attractions-g34126-Activities-Cedar_Key_Florid...,False,,False,...,Attractions-g49022-Activities-Charlotte_North_...,False,Attractions-g60745-Activities-Boston_Massachus...,False,attraction_review-g33026-d12785369-reviews-hel...,False,showtopic-g28958-i237-k3226107-visiting_portla...,False,Restaurant_Review-g34227-d898374-Reviews-Smoke...,False
1,hotel_review-g44030-d295308-reviews-hotel_ches...,False,Hotel_Review-g38834-d92157-Reviews-Holiday_Inn...,False,Attraction_Review-g34941-d4502071-Reviews-Lake...,False,Attraction_Review-g34618-d117349-Reviews-The_R...,False,Attraction_Review-g35805-d12814690-Reviews-Ult...,False,...,Attractions-g49022-Activities-c42-t228-Charlot...,False,Restaurant_Review-g60805-d4371533-Reviews-Char...,False,attractions-g33026-activities-san_luis_obispo_...,False,showtopic-g60878-i74-k10014740-good_seafood_bo...,True,Restaurant_Review-g34372-d21273749-Reviews-Viv...,False
2,hotel_review-g44030-d295308-reviews-hotel_ches...,False,Restaurant_Review-g49092-d462083-Reviews-Devin...,False,Attractions-g35148-Activities-Newnan_Georgia.html,True,Attraction_Review-g34675-d2056326-Reviews-The_...,False,RestaurantsNear-g49022-d3975989-Spectrum_Cente...,False,...,Attraction_Review-g49022-d108892-Reviews-Disco...,False,locationphotodirectlink-g55711-d114946-i871163...,True,attractions-g32814-activities-oceano_san_luis_...,True,showtopic-g60878-i74-k10014740-o10-good_seafoo...,False,Attraction_Products-g187870-d191175-Piazza_San...,False
3,.well-knownchange-password,True,SmartDeals-g60763-zft6226-New_York_City_New_Yo...,True,Attraction_Review-g35148-d15142148-Reviews-Sha...,True,ShowUserReviews-g1066443-d13505910-r568087527-...,False,RestaurantsNear-g49022-d3975989-Spectrum_Cente...,False,...,Hotel_Review-g3200043-d9762283-Reviews-Nickelo...,False,locationphotodirectlink-g55711-d114946-i871163...,True,attraction_review-g32814-d6211798-reviews-stev...,False,showtopic-g143011-i6453-k9023041-capitol_reef_...,False,Attraction_Products-g187870-d191175-Piazza_San...,False
4,settings-cp,True,SmartDeals?gclid=Cj0KCQiA0oagBhDHARIsAI-BbgdX4...,True,Attraction_Review-g35148-d8651403-Reviews-Carl...,True,Restaurants-g35546-Nampa_Idaho.html,False,Restaurant_Review-g49673-d7206790-Reviews-Cevi...,False,...,RestaurantsNear-g60864-d89054-Hyatt_Centric_Fr...,True,locationphotodirectlink-g55711-d114946-i560120...,True,Restaurant_Review-g60740-d4816053-Reviews-Sher...,False,showtopic-g143031-i2563-k8945968-road_trip_fro...,False,Attraction_Products-g187870-t11908-zfg11872-a_...,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5045,,,,,,,,,,,...,,,,,,,,,,
5046,,,,,,,,,,,...,,,,,,,,,,
5047,,,,,,,,,,,...,,,,,,,,,,
5048,,,,,,,,,,,...,,,,,,,,,,


In [10]:
higher_engagement = retention[retention['engagement'] == True]
higher_engagement

Unnamed: 0,link,engagement
3,.well-knownchange-password,True
4,settings-cp,True
5,forgotpassword-e__2f__settings__2d__cp,True
6,memberresetpassword?resetflowemail=REMOVED&tt=...,True
7,passwordreset-a_prq.g1f342387a3bd64a16982cd93d...,True
...,...,...
3097,Restaurant_Review-g776158-d9715523-Reviews-Coc...,True
3098,Restaurant_Review-g1202633-d2541274-Reviews-Ti...,True
3100,ShowTopic-g147327-i787-k13614073-How_is_the_we...,True
3103,Travel-g147327-s302Martinique:Caribbean:Gettin...,True


In [12]:
drop_off = retention[retention['engagement'] == False]
drop_off

Unnamed: 0,link,engagement
0,restaurant_review-g12909146-d4444636-reviews-t...,False
1,hotel_review-g44030-d295308-reviews-hotel_ches...,False
2,hotel_review-g44030-d295308-reviews-hotel_ches...,False
9,passwordreset,False
10,hotels-g61000-yosemite_national_park_californi...,False
...,...,...
3114,Attraction_Review-g42139-d272245-Reviews-Belle...,False
3115,Restaurants-g42139-Detroit_Michigan.html,False
3116,Restaurants-g29556-Ann_Arbor_Michigan.html,False
3117,Restaurant_Review-g42139-d629028-Reviews-Vicen...,False


In [13]:
higher_engagement_patterns = Counter(higher_engagement.link).most_common(25)

In [14]:
drop_off_patterns = Counter(drop_off.link).most_common(25)

# Conclusion

tripadvisor loses engagements increases and drops off deppending on the review and comments from other people in the atraction/place they are seeing, a good review increase the engagement while a bad review tends to lead to a drop off