# Google Trends Visualization

In this notebook we will be concerned with interpreting the GTrends 

In [4]:
# Imports

import pandas as pd
import numpy as np
import sqlite3

from sqlalchemy import create_engine, text
from IPython.display import Image

from lets_plot import *
LetsPlot.setup_html()

from plotnine import ggplot, aes, geom_line, geom_point, geom_vline, geom_text, theme, element_text, labs, theme_minimal, element_rect

In this first block we will retrieve the sql database tables containing the Google Trends data for both the country and player data.

In [11]:
# Database pathway
db_name = "/files/ds105a-2024-project-fork_force/data/chess.db"

# Connect to the SQLite database
conn = sqlite3.connect(db_name)

# Load table into a DataFrame
df_country = pd.read_sql('SELECT * FROM country_gtrends', conn)
df_player = pd.read_sql('SELECT * FROM players_gtrends', conn)
df_fide = pd.read_sql('SELECT * FROM fide', conn)



      Unnamed: 0  fide_id                 name federation  \
0              0  1503014     Carlsen, Magnus      Norway   
1              1  1503014     Carlsen, Magnus      Norway   
2              2  1503014     Carlsen, Magnus      Norway   
3              3  1503014     Carlsen, Magnus      Norway   
4              4  1503014     Carlsen, Magnus      Norway   
...          ...      ...                  ...        ...   
1779        1779  5000017  Anand, Viswanathan       India   
1780        1780  5000017  Anand, Viswanathan       India   
1781        1781  5000017  Anand, Viswanathan       India   
1782        1782  5000017  Anand, Viswanathan       India   
1783        1783  5000017  Anand, Viswanathan       India   

      world_rank_active_players      date  standard   rapid   blitz  
0                             1  2025-Jan      2831  2819.0  2889.0  
1                             1  2024-Dec      2831  2838.0  2890.0  
2                             1  2024-Nov      2831  2825

In [None]:
def album_trend_graph(artist_name):
    df_tracks, df_albums, df_trends = SQL_to_DF(artist_name)
    
    # Convert date formats
    df_trends['date'] = pd.to_datetime(df_trends['date'], format='%b %Y', errors='coerce')
    df_albums['release_date'] = pd.to_datetime(df_albums['release_date'], errors='coerce')
    
    # Drop invalid rows
    df_trends = df_trends.dropna(subset=['date', 'trend_value'])
    df_albums = df_albums.dropna(subset=['release_date'])
    
    # Convert trend_value to numeric
    df_trends['trend_value'] = pd.to_numeric(df_trends['trend_value'], errors='coerce')
    df_albums['name'] = df_albums['name'].fillna('Unknown Album').astype(str)
    
    # Filter out singles for the text labels
    df_albums_filtered = df_albums[df_albums['album_type'] != 'single'].copy()  # Ensure we're working with a copy
    
    # Get the maximum trend_value for positioning the text labels near the top
    max_trend_value = df_trends['trend_value'].max()
    
    # Create an alternating y-position for text labels to avoid overlap
    # Every other label will be staggered by a small amount
    df_albums_filtered.loc[:, 'y_position'] = max_trend_value * 0.9 + (np.arange(len(df_albums_filtered)) % 2) * 0.05
    
    # Takes the artist's file name and replaces "_" with " "
    graph_artist_name = artist_name.replace('_', ' ')
    
    # Create graph
    plot = (
        ggplot(df_trends, aes(x='date', y='trend_value')) 
        + geom_line(size=1, color='black') # Google Trends data
        + geom_vline(
            data=df_albums, 
            mapping=aes(xintercept='release_date', color='album_type'), # Marks release dates
            linetype='dashed', 
            size=0.7
        ) 
        + geom_point( # Adds a dot at the intersection of album release dates and x axis
            data=df_albums,
            mapping=aes(x='release_date', y=0, color='album_type'),
            size=2
        ) 
        + geom_text( # Labels each of the album release with the titles
            data=df_albums_filtered,  # Only label albums that are not "single"
            mapping=aes(x='release_date', y='y_position', label='name'),  # Position text based on staggered y values
            color='black', 
            size=8,
            angle=45,  # Rotate the text to match the x-axis labels
            ha='center'  # Center horizontally
        )
        + theme_minimal() 
        + labs( # Labels
            title= f"{graph_artist_name} Trend Analysis with Album Releases",
            subtitle="Analyzing trends over time with key album drops",
            x="Date",
            y="Google Trend Value",
            caption=f"Source: {artist_name}_data.db",
            color="Release Type"
        ) 
        + theme(
            axis_text_x=element_text(rotation=45, hjust=1),  # Ensure x-axis labels are rotated as well
            figure_size=(10, 6),
            panel_background=element_rect(fill='white'),  # Ensure the panel background is white
            plot_background=element_rect(fill='white')  # Make sure the overall plot background is white
        )
    )
    return plot