# Import Libraries and Setup

In [4]:
# plotting
import matplotlib.pyplot as plt

# manipulate data
import pandas as pd

# more plotting
import seaborn as sns

# custom functions
from gtrends import og_get_daily_trend as get_daily_trend

# library to access google trends
from pytrends.request import TrendReq

# make pandas types compatibile with matplotlib
pd.plotting.register_matplotlib_converters()

# change pandas plotting backend
pd.options.plotting.backend = "plotly"

# create a dictionary to unpack to update plotly plots settings
# why can't it be as easy as with ggplot...
plotly_dict = dict(
    font_family="Yanone Kaffeesatz",
    template="none",
    title={"x": 0.05, "xanchor": "left"},
)

sns.set_style(style="darkgrid", rc={"axes.facecolor": "0.9", "grid.color": "0.8"})
sns.set_palette(palette="deep")
%matplotlib inline

plt.rcParams["figure.figsize"] = [12, 6]
plt.rcParams["figure.dpi"] = 100

# Create Functions

In [2]:
from typing import Any, List


def query_data(keyword: str, overlap: List[int], trendreq: Any, start: str, end: str):
    """Retrieves data form Google Trends"""
    
    index_range = pd.date_range(start=start, end=end, freq="D")
    
    df = pd.DataFrame()
    
    for window in overlap:
        df[f"{keyword}_{window}"] = get_daily_trend(
            trendreq, keyword=keyword, start=start, end=end, overlap=window
        ).drop(["overlap"], axis=1)


def create_df(first_trend, second_trend, kw: str):
    """Creates a DataFrame from Google Trend 'queries'"""
    return pd.merge(
        first_trend.drop(["overlap"], axis=1),
        second_trend.drop(["overlap"], axis=1),
        left_index=True,
        right_index=True,
    ).rename(
        columns={
            f"{kw}_x": f"{kw.lower().replace(' ', '_')}_30",
            f"{kw}_y": f"{kw.lower().replace(' ', '_')}_100",
        }
    )


def get_data(
    keyword: str,
    overlap: List[int],
    trendreq: Any,
    start: str,
    end: str,
    save_csv: bool = True,
):
    """Combines query_data and create_df"""
    
    df = pd.DataFrame()
    
    for lag in overlap:
        df[f"keyword_{lag}"] = query_data(
        keyword=keyword, overlap=overlap, trendreq=trendreq, start=start, end=end
    )
    
    df = create_df(first_trend=series1, second_trend=series2, kw=keyword)

    if save_csv:
        df.to_csv(f"./data/{keyword.lower().replace(' ', '_')}.csv")

    return df


def make_comparison_plot(
    keyword: str,
    df: pd.DataFrame,
    lags: List[int],
    save_fig: bool = True,
):
    """Plots two overlapping windows"""

    fig, ax = plt.subplots()

    def make_plot(data, col, lag, ax):
        sns.lineplot(
            x=data.index, y=col, data=data, alpha=0.5, label=f"{lag} days lag", ax=ax
        )

    make_plot(data=df, col=df.columns[0], lag=lags[0], ax=ax)
    make_plot(data=df, col=df.columns[1], lag=lags[1], ax=ax)

    plt.suptitle(f"{keyword} Google Trend Volume, 30 and 100 days overlap")

    if save_fig:
        fig.savefig(f"./figures/{keyword.lower().replace(' ', '_')}.png")

    return fig, ax

In [3]:
keyword = "Mario Draghi"
overlap = [30, 100]
pytrend = TrendReq()
start = "2012-01-01"
end = "2021-08-31"

In [None]:
query_data(keyword, , )