In [None]:
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import pandas as pd
from pytrends.request import TrendReq
from sendgrid import SendGridAPIClient
from sendgrid.helpers.mail import Mail, Attachment, FileContent, FileName, FileType, Disposition
import os

key = os.getenv("SG_API_KEY")
keywords_to_monitor = ["corduroy hat","corduroy pants","trail running pack"]

In [None]:
def get_interest_trends_kw(keyword, pytrends = TrendReq(hl='en-US')):
    kw=keyword
    pytrends.build_payload(kw, cat=0, timeframe='today 5-y', geo='US')
    df = pytrends.interest_over_time()
    df.reset_index(inplace=True)
    return df

In [None]:
trends = get_interest_trends_kw(keywords_to_monitor)
trends.head()

Unnamed: 0,date,corduroy hat,corduroy pants,trail running pack,isPartial
0,2018-11-18,2,48,0,False
1,2018-11-25,2,50,0,False
2,2018-12-02,2,45,0,False
3,2018-12-09,2,46,0,False
4,2018-12-16,2,38,2,False


In [None]:
# trends.describe()

Unnamed: 0,corduroy hat,corduroy pants,trail running pack
count,260.0,260.0,260.0
mean,4.0,32.038462,0.415385
std,2.331586,19.026259,0.538619
min,0.0,7.0,0.0
25%,2.0,17.75,0.0
50%,3.5,27.0,0.0
75%,6.0,44.0,1.0
max,11.0,100.0,2.0


In [None]:
# trends['corduroy hat'].quantile(0.75)

6.0

In [None]:
def get_anomalous_records(data, date_key : str, metric, quantile_threshold : float = 0.75):

    data[date_key] = pd.to_datetime(data[date_key])

    # Filter and sort the DataFrame
    data = data[[date_key, metric]].sort_values(by=date_key, ascending=False)

    # Set timestamp as index
    data.set_index(date_key, inplace=True)

    # Set 75th Percentile threshold
    threshold = data[metric].quantile(quantile_threshold)

    # Define a function to set the 'anomaly' column
    def set_anomaly(row):
        return (row[metric] > threshold)

    # Add an 'anomaly' column based on the defined function
    data['anomaly'] = data.apply(set_anomaly, axis=1)

    # Replace NaN metrics with 0
    data_anomaly = data.fillna(0)

    return data_anomaly

In [None]:
# df_anom = get_anomalous_records(trends, "date", "corduroy hat")
# df_anom

Unnamed: 0_level_0,corduroy hat,anomaly
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2023-11-05,10,True
2023-10-29,7,True
2023-10-22,11,True
2023-10-15,10,True
2023-10-08,9,True
...,...,...
2018-12-16,2,False
2018-12-09,2,False
2018-12-02,2,False
2018-11-25,2,False


In [None]:
def past_week_anomaly(df_anom):
    if df_anom[df_anom.index == df_anom.index.max()]["anomaly"][0] == True:
        return True

In [None]:
# past_week_anomaly(df_anom)

True

In [None]:
def get_anomaly_trend(df,metric):

    fig = px.line(
            df,
            x=df.index,
            y=metric,
            title=f"{metric.title()} Anomalies",
            template = 'plotly_dark')
    # create list of outlier_dates
    outlier_dates = df[df['anomaly'] == True].index
    # obtain y metric of anomalies to plot
    y_metrics = [df.loc[i][metric] for i in outlier_dates]
    fig.add_trace(
        go.Scatter(
            x=outlier_dates, 
            y=y_metrics, 
            mode = 'markers',
            name = 'anomaly',
            marker=dict(color='red',size=10)
            )
        )
    # fig_html = fig.to_html(full_html=False)
    fig.write_image(f"anomaly_{metric.replace(' ','_')}.png")
    return fig

In [None]:
# get_anomaly_trend(df_anom,"corduroy hat")

In [None]:
def send_mail(_from : str, _to : str, kwd : str, api_key : str):
    # Prepare the email content
    message = Mail(
        from_email=_from,
        to_emails=_to,
        subject='Weekly Keyword Search Volume Increase - Demand > 75th Percentile',
        html_content='<strong>Check out the trend visualization:</strong>')

    # Read the image file
    with open(f"./anomaly_{kwd.replace(' ','_')}.png", 'rb') as f:
        data = f.read()
        f.close()

    # Attach the image to the email
    encoded_file = os.path.basename(f"./anomaly_{kwd.replace(' ','_')}.png")
    attachedFile = Attachment(
        FileContent(encoded_file),
        FileName(encoded_file),
        FileType('image/png'),
        Disposition('attachment')
    )
    message.attachment = attachedFile

    # Send the email via SendGrid
    try:
        sg = SendGridAPIClient(api_key)
        response = sg.send(message)
        print(response.status_code)
        print(response.body)
        print(response.headers)
    except Exception as e:
        print(str(e))

In [None]:
# trends = get_interest_trends_kw(keywords_to_monitor)
# trends.head()

Unnamed: 0,date,corduroy hat,corduroy pants,trail running pack,isPartial
0,2018-11-18,2,48,0,False
1,2018-11-25,2,50,0,False
2,2018-12-02,2,45,0,False
3,2018-12-09,2,45,0,False
4,2018-12-16,2,38,2,False
...,...,...,...,...,...
255,2023-10-08,9,65,1,False
256,2023-10-15,10,66,0,False
257,2023-10-22,11,62,1,False
258,2023-10-29,7,69,0,False


In [None]:
for kwd in keywords_to_monitor:
    df_anom = get_anomalous_records(trends, "date", kwd)
    if past_week_anomaly(df_anom) == True:
        get_anomaly_trend(df_anom,kwd)
        send_mail(_from="yy@gmail.com", _to="xx@gmail.com", kwd=kwd, api_key=os.getenv("SG_API_KEY"))

202
b''
Server: nginx
Date: Mon, 13 Nov 2023 00:46:23 GMT
Content-Length: 0
Connection: close
X-Message-Id: zQNcBxDuSW-vmoGr97jWow
Access-Control-Allow-Origin: https://sendgrid.api-docs.io
Access-Control-Allow-Methods: POST
Access-Control-Allow-Headers: Authorization, Content-Type, On-behalf-of, x-sg-elas-acl
Access-Control-Max-Age: 600
X-No-CORS-Reason: https://sendgrid.com/docs/Classroom/Basics/API/cors.html
Strict-Transport-Security: max-age=600; includeSubDomains


HTTP Error 400: Bad Request
