In [5]:
from pmdarima import auto_arima
from pytrends.request import TrendReq
from statsmodels.tsa.statespace.sarimax import SARIMAX
import pandas as pd
import numpy as np

In [18]:
def forecastGoogleTrends(kw_list, geo= 'NO',timeframe = '2014-01-01 2021-04-17', option = 'TestPrediction', csv_file_name = 'toto'):

    pytrends = TrendReq(hl='en-US', tz=360)
    # Create payload and capture API tokens. Only needed for interest_over_time(), interest_by_region() & related_queries()
    pytrends.build_payload(kw_list=kw_list, timeframe = timeframe, geo=geo)

    # Interest Over Time
    interest_over_time_df = pytrends.interest_over_time()
    
    interest_over_time_df = interest_over_time_df[interest_over_time_df['isPartial']=="False"]
    interest_over_time_df.drop(labels=['isPartial'],axis='columns',inplace=True)
    interest_over_time_df.index.freq = 'MS'
    
    if option == 'TestPrediction':
        end_train_data = len(interest_over_time_df)-int(len(interest_over_time_df)*0.2)
        start_test_data = end_train_data
        result_df = pd.DataFrame()
        
        for keyword in kw_list:
            train_data = interest_over_time_df[keyword].iloc[:end_train_data]
            test_data = interest_over_time_df[keyword].iloc[start_test_data:] 
            auto = auto_arima(train_data,seasonal=True,m=12)
            model = SARIMAX(train_data,order=auto.get_params()['order'],seasonal_order=auto.get_params()['seasonal_order'],enforce_invertibility=False)
            result = model.fit()
            start = len(train_data)
            end = len(train_data) + len(test_data) -1
            predictions = result.predict(start,end).rename('SARIMA Model')
            result_series = train_data.append(test_data)
            if result_df.empty:
                result_df = result_series.to_frame(name=keyword)
                result_df.index
            else:
                result_df[keyword] =result_series 
            predictioncol_name = keyword + "_prediction"
            result_df[predictioncol_name] = predictions
    
    elif option == 'Predict':
        result_df = pd.DataFrame()
        for keyword in kw_list:
            train_data = interest_over_time_df[keyword]
            auto = auto_arima(train_data,seasonal=True,m=12)
            model = SARIMAX(train_data,order=auto.get_params()['order'],seasonal_order=auto.get_params()['seasonal_order'],enforce_invertibility=False)
            result = model.fit()
            start = len(train_data)
            end = start + int(len(interest_over_time_df)*0.2)
            predictions = result.predict(start,end).rename('SARIMA Model')
            result_series = train_data.append(predictions)
            if result_df.empty:
                result_df = result_series.to_frame(name=keyword)
                result_df.index
            else:
                result_df[keyword] =result_series 
            predictioncol_name = keyword + "_prediction"
            result_df[predictioncol_name] = predictions
    
    
    else:
        raise TypeError("Invalid option provided")
    csv_file_name = csv_file_name + '.csv'
    result_df.to_csv(csv_file_name)

In [22]:
forecastGoogleTrends(['bitcoin','equinor','aksjer'],geo='') ## Use geo = '' for worldwide

