# Challenge Description

Identify the longest journey a user
took before reaching TripAdvisor. \
Provide a comprehensive analysis of this journey, explaining what it
may reveal about the user's behavior and intentions.

# Imports

In [31]:
import pandas as pd
from collections import Counter

import plotly.express as px

# Functions

In [15]:
def get_longest_journey(user_journeys: pd.DataFrame) -> pd.Series:
    '''Returns the longest journey in user journeys'''
    column = user_journeys.count().idxmax()
    longest_journey = user_journeys[column]
    return longest_journey

In [39]:
def get_pages_visited(user_journeys: pd.DataFrame) -> list:
    '''Returns a list pages visited sorted alphabetically'''
    pages_visited = list(user_journeys.dropna())

    # clean and order
    pages_visited = [word for word in pages_visited if word not in ['tripadvisor']]
    pages_visited = sorted(pages_visited)
    return pages_visited

# Data

In [40]:
user_journeys = pd.read_parquet('data/complete_user_journeys.parquet')
longest_journey = get_longest_journey(user_journeys)
pages_visited = get_pages_visited(longest_journey)

# Visualizations 

In [56]:
def plot_most_common_by_category(pages_visited: list) -> None:
    # Create a sample list of values
    top_25 = dict(Counter(pages_visited).most_common(25))
    values = top_25.keys()

    # Create a DataFrame to count the occurrences
    value_counts = pd.DataFrame({'Value': values})
    value_counts['Count'] = list(top_25.values())

    size = list(top_25.values()) 

    # Create a bubble chart using Plotly Express
    color = ['search', 'travel', 'consumer / finance', 'travel', 'travel',
             'travel', 'travel', 'travel', 'travel', 'travel', 'consumer / finance',
             'travel', 'travel', 'consumer / finance', 'social media', 'travel', 'social media', 'travel',
             'travel', 'travel', 'travel', 'travel', 'consumer / finance', 'consumer / finance', 'travel']

    fig = px.scatter(value_counts,
                     x='Value', 
                     y='Count', 
                     size=size, 
                     title='Value Frequencies by category', 
                     color=color)
    fig.show()

In [55]:
plot_most_common_by_category(pages_visited)

# Conclusions

This longest journey goes through all needed to travel, 
everything the need from their visa and their drivers license, to supplies in amazon and costco, check the finance on chase and reserve the hotels and use their card in american express. He/She even visits airplane places to buy their tickets to go scuba diving. This is how an Indian person would prepare for their vacation, using the advice for their trip.