In [113]:
import pandas as pd
import numpy as np
import pytrends
from pytrends.request import TrendReq
import matplotlib.pyplot as plt
import warnings 
import time as timer
warnings.filterwarnings('ignore')

# Establish connection to API
pytrends = TrendReq(hl='en-US', tz=360)

# Keywords
kw_list = ["George Floyd"]

# Get information about the list of keywords from between today and 1 year ago
pytrends.build_payload(kw_list, cat=0, timeframe='today 12-m', geo='', gprop='')
df = pytrends.interest_over_time()
print(df)


            George Floyd  isPartial
date                               
2020-05-17             0      False
2020-05-24            79      False
2020-05-31           100      False
2020-06-07            33      False
2020-06-14             9      False
2020-06-21             5      False
2020-06-28             3      False
2020-07-05             2      False
2020-07-12             2      False
2020-07-19             2      False
2020-07-26             2      False
2020-08-02             4      False
2020-08-09             2      False
2020-08-16             2      False
2020-08-23             2      False
2020-08-30             1      False
2020-09-06             1      False
2020-09-13             1      False
2020-09-20             1      False
2020-09-27             1      False
2020-10-04             1      False
2020-10-11             1      False
2020-10-18             1      False
2020-10-25             1      False
2020-11-01             1      False
2020-11-08             1    

**Table 1.** Above is an example of the time-indexed table returned by a PyTrends query. The week of data collection is shown in the "date" column; the relative search frequency is shown in the column labeled by the search term, "George Floyd"; the last column labeled "isPartial" wasn't needed for this investigation.

In [117]:

# Import search terms and country codes for queries

search_terms = pd.read_csv("search_terms.csv")
country_codes = pd.read_csv("country_code_data.csv")
print(search_terms)
print(country_codes)

countries = list(country_codes["country_code"])
terms = list(search_terms["search_terms"])
print(countries)
print(terms)

        search_terms
0   Election Results
1        Coronavirus
2           Stimulus
3       Unemployment
4               Iran
5          Elon Musk
6               Simp
7           Furlough
8           Among Us
9           Parasite
10     Black Panther
11        Tiger King
12             Ozark
13         Hurricane
14             Joker
15            Boomer
16            Brexit
     country  country_code
0          US           US
1      Brazil           BR
2      Mexico           MX
3          UK           GB
4      France           FR
5     Germany           DE
6      Russia           RU
7       Japan           JP
8   Australia           AU
9       Egypt           EG
10    Vietnam           VN
11      India           IN
12     Turkey           TR
13      China           CN
['US', 'BR', 'MX', 'GB', 'FR', 'DE', 'RU', 'JP', 'AU', 'EG', 'VN', 'IN', 'TR', 'CN']
['Election Results', 'Coronavirus', 'Stimulus', 'Unemployment', 'Iran', 'Elon Musk', 'Simp', 'Furlough', 'Among Us', 'Parasite', 'Bl

In [126]:
pd.set_option("display.max_rows", None, "display.max_columns", None)
def sum_of_square_diffs(list1, list2):
    """
    Sum of squared differences between lists
    """
    total = 0
    for i in range(len(list1)):
        total += ((list1[i]-list2[i])**2)
    return total

def avg_squared_diff(list1, list2):
    """
    Average squared difference between lists
    """
    sum_diffs = sum_of_square_diffs(list1, list2)
    return sum_diffs / len(list1)

def total_diffs(list1, list2):
    """
    Sum of absolute value of differences
    """
    if list2 == 0 or list1 == 0:
        return None
    total = 0
    for i in range(len(list1)):
        total += abs(list1[i]-list2[i])
    return total

def avg_total_diffs(list1, list2):
    """
    Average of abs value differences between lists
    """
    return total_diffs(list1, list2) / len(list1)


def retrieve_data(country, term, time="today 12-m"):
    """
    Retrieves the Google search data on the term starting from time
    """
    try:
        pytrends.build_payload([term], cat=0, timeframe = time, geo=country, gprop='')
        df = pytrends.interest_over_time()
    except KeyError: # No data available for this term
        df = 0
    except: # Have maxed out request frequency, so wait to try again
        print("Received response error or some other error. Waiting to try again.")
        timer.sleep(15)
        return(retrieve_data(country, term, time))
    return df
def get_activity_list(country, term, time="today 12-m"):
    """
    Returns the search trend for a country/term as a list
    """
    df = retrieve_data(country, term, time)
    return list(df.iloc[:,0])

def wait_for(seconds):
    print(f"Now waiting for {seconds} seconds")
    timer.sleep(seconds)

# Test the code
lst1 = [1, 2, 4, 5]
lst2 = [1, 2, 4, 5]
lst3 = [0, 0, 2, 5]
print(get_activity_list('GB', 'Simp'))
print(avg_total_diffs(lst1, lst2))
print(avg_total_diffs(lst1, lst3))

[76, 74, 70, 69, 67, 73, 61, 68, 61, 64, 74, 59, 77, 65, 60, 100, 81, 72, 63, 61, 50, 53, 42, 43, 45, 57, 95, 66, 46, 51, 55, 41, 42, 44, 46, 44, 38, 36, 33, 44, 34, 40, 31, 36, 26, 30, 28, 31, 31, 27, 20]
0.0
1.25


In [127]:

diff_df = []
for term in terms:
    diffs = []
    US_DATA = get_activity_list('US', term)
    for country in countries[1:]:
        print(f"Getting {term} for {country}")
        this_data = get_activity_list(country, term)
        diffs.append(avg_total_diffs(US_DATA, this_data))
    diff_df.append(diffs)

print(diff_df)

Getting Election Results for BR
Getting Election Results for MX
Getting Election Results for GB
Getting Election Results for FR
Getting Election Results for DE
Getting Election Results for RU
Getting Election Results for JP
Getting Election Results for AU
Getting Election Results for EG
Getting Election Results for VN
Getting Election Results for IN
Getting Election Results for TR
Getting Election Results for CN
Getting Coronavirus for BR
Getting Coronavirus for MX
Getting Coronavirus for GB
Getting Coronavirus for FR
Getting Coronavirus for DE
Getting Coronavirus for RU
Getting Coronavirus for JP
Getting Coronavirus for AU
Getting Coronavirus for EG
Getting Coronavirus for VN
Getting Coronavirus for IN
Getting Coronavirus for TR
Getting Coronavirus for CN
Getting Stimulus for BR
Getting Stimulus for MX
Getting Stimulus for GB
Getting Stimulus for FR
Getting Stimulus for DE
Getting Stimulus for RU
Getting Stimulus for JP
Getting Stimulus for AU
Getting Stimulus for EG
Getting Stimulus 

In [129]:
processed_df = pd.DataFrame(diff_df, columns=countries[1:])
processed_df.insert(0, "Search Term", terms)
print(processed_df)

         Search Term         BR         MX         GB         FR         DE  \
0   Election Results   0.235294   0.078431   0.490196   0.196078   0.078431   
1        Coronavirus  12.470588   9.078431  16.313725  13.294118  22.882353   
2           Stimulus   9.431373   7.549020  19.372549  18.156863   7.235294   
3       Unemployment  30.450980  27.137255  14.960784  21.490196  21.215686   
4               Iran  40.274510  12.137255  24.941176  13.098039  29.490196   
5          Elon Musk   7.686275  10.176471   9.901961   7.352941   6.901961   
6               Simp  19.156863  38.549020   7.058824  18.039216  11.705882   
7           Furlough  22.117647  20.431373  12.019608  16.490196  18.725490   
8           Among Us   4.960784   8.392157   8.039216  17.078431  10.823529   
9           Parasite  24.117647  36.803922  13.862745  10.607843  23.019608   
10     Black Panther   5.215686   2.392157   1.254902   2.784314   2.823529   
11        Tiger King  17.176471  23.803922   9.52941

In [130]:
processed_df.to_csv('/Users/alecnipp/Documents/NCSSM_senior/sem2/dig_hum/Python/search_results_terms_full.csv')

In [131]:
processed_df

Unnamed: 0,Search Term,BR,MX,GB,FR,DE,RU,JP,AU,EG,VN,IN,TR,CN
0,Election Results,0.235294,0.078431,0.490196,0.196078,0.078431,0.784314,0.27451,0.176471,0.078431,0.431373,1.333333,0.156863,0.156863
1,Coronavirus,12.470588,9.078431,16.313725,13.294118,22.882353,10.176471,12.784314,9.470588,10.843137,12.862745,14.196078,14.254902,8.901961
2,Stimulus,9.431373,7.54902,19.372549,18.156863,7.235294,14.960784,12.882353,17.764706,19.72549,23.333333,19.470588,13.705882,17.470588
3,Unemployment,30.45098,27.137255,14.960784,21.490196,21.215686,38.843137,30.45098,18.156863,48.313725,19.764706,32.686275,27.019608,43.960784
4,Iran,40.27451,12.137255,24.941176,13.098039,29.490196,16.647059,16.137255,17.294118,20.568627,16.176471,13.784314,18.470588,32.058824
5,Elon Musk,7.686275,10.176471,9.901961,7.352941,6.901961,16.411765,12.882353,6.960784,14.294118,10.137255,16.313725,16.235294,20.45098
6,Simp,19.156863,38.54902,7.058824,18.039216,11.705882,21.411765,25.921569,17.333333,29.039216,30.490196,24.176471,25.803922,47.901961
7,Furlough,22.117647,20.431373,12.019608,16.490196,18.72549,23.705882,17.823529,16.039216,20.490196,24.862745,7.509804,27.372549,24.490196
8,Among Us,4.960784,8.392157,8.039216,17.078431,10.823529,7.27451,32.294118,2.666667,6.745098,5.529412,4.45098,14.705882,16.078431
9,Parasite,24.117647,36.803922,13.862745,10.607843,23.019608,43.529412,31.960784,11.078431,26.529412,17.823529,7.019608,13.882353,50.333333


In [142]:
av_rowwise = processed_df.mean(axis=1)
processed_df["Avg"] = av_rowwise
processed_df


Unnamed: 0,Search Term,BR,MX,GB,FR,DE,RU,JP,AU,EG,VN,IN,TR,CN,Avg
0,Election Results,0.235294,0.078431,0.490196,0.196078,0.078431,0.784314,0.27451,0.176471,0.078431,0.431373,1.333333,0.156863,0.156863,0.343891
1,Coronavirus,12.470588,9.078431,16.313725,13.294118,22.882353,10.176471,12.784314,9.470588,10.843137,12.862745,14.196078,14.254902,8.901961,12.886878
2,Stimulus,9.431373,7.54902,19.372549,18.156863,7.235294,14.960784,12.882353,17.764706,19.72549,23.333333,19.470588,13.705882,17.470588,15.466063
3,Unemployment,30.45098,27.137255,14.960784,21.490196,21.215686,38.843137,30.45098,18.156863,48.313725,19.764706,32.686275,27.019608,43.960784,28.803922
4,Iran,40.27451,12.137255,24.941176,13.098039,29.490196,16.647059,16.137255,17.294118,20.568627,16.176471,13.784314,18.470588,32.058824,20.852187
5,Elon Musk,7.686275,10.176471,9.901961,7.352941,6.901961,16.411765,12.882353,6.960784,14.294118,10.137255,16.313725,16.235294,20.45098,11.977376
6,Simp,19.156863,38.54902,7.058824,18.039216,11.705882,21.411765,25.921569,17.333333,29.039216,30.490196,24.176471,25.803922,47.901961,24.352941
7,Furlough,22.117647,20.431373,12.019608,16.490196,18.72549,23.705882,17.823529,16.039216,20.490196,24.862745,7.509804,27.372549,24.490196,19.390649
8,Among Us,4.960784,8.392157,8.039216,17.078431,10.823529,7.27451,32.294118,2.666667,6.745098,5.529412,4.45098,14.705882,16.078431,10.695324
9,Parasite,24.117647,36.803922,13.862745,10.607843,23.019608,43.529412,31.960784,11.078431,26.529412,17.823529,7.019608,13.882353,50.333333,23.889894


In [145]:
av_column = list(processed_df.mean(axis=0))
av_column = ["Avg"] + av_column
processed_df.loc[17] = av_column
processed_df

Unnamed: 0,Search Term,BR,MX,GB,FR,DE,RU,JP,AU,EG,VN,IN,TR,CN,Avg
0,Election Results,0.235294,0.078431,0.490196,0.196078,0.078431,0.784314,0.27451,0.176471,0.078431,0.431373,1.333333,0.156863,0.156863,0.343891
1,Coronavirus,12.470588,9.078431,16.313725,13.294118,22.882353,10.176471,12.784314,9.470588,10.843137,12.862745,14.196078,14.254902,8.901961,12.886878
2,Stimulus,9.431373,7.54902,19.372549,18.156863,7.235294,14.960784,12.882353,17.764706,19.72549,23.333333,19.470588,13.705882,17.470588,15.466063
3,Unemployment,30.45098,27.137255,14.960784,21.490196,21.215686,38.843137,30.45098,18.156863,48.313725,19.764706,32.686275,27.019608,43.960784,28.803922
4,Iran,40.27451,12.137255,24.941176,13.098039,29.490196,16.647059,16.137255,17.294118,20.568627,16.176471,13.784314,18.470588,32.058824,20.852187
5,Elon Musk,7.686275,10.176471,9.901961,7.352941,6.901961,16.411765,12.882353,6.960784,14.294118,10.137255,16.313725,16.235294,20.45098,11.977376
6,Simp,19.156863,38.54902,7.058824,18.039216,11.705882,21.411765,25.921569,17.333333,29.039216,30.490196,24.176471,25.803922,47.901961,24.352941
7,Furlough,22.117647,20.431373,12.019608,16.490196,18.72549,23.705882,17.823529,16.039216,20.490196,24.862745,7.509804,27.372549,24.490196,19.390649
8,Among Us,4.960784,8.392157,8.039216,17.078431,10.823529,7.27451,32.294118,2.666667,6.745098,5.529412,4.45098,14.705882,16.078431,10.695324
9,Parasite,24.117647,36.803922,13.862745,10.607843,23.019608,43.529412,31.960784,11.078431,26.529412,17.823529,7.019608,13.882353,50.333333,23.889894


In [146]:
processed_df.to_csv('/Users/alecnipp/Documents/NCSSM_senior/sem2/dig_hum/Python/processed_search_trends_final.csv')