In [84]:
import pandas as pd                        
from pytrends.request import TrendReq

In [93]:
def get_data(year, terms):
    pytrend = TrendReq(retries=3, backoff_factor=0.4)

    # Because you can only request 5 teams at a time, we need to break the list into
    # chunks and find which team has the highest trend in the chunk.
    n = 5
    top_interest = []
    for i in range(0, len(terms), n):
        pytrend.build_payload(
            kw_list=terms[i : i + n],
            geo="US",
            timeframe=f"{str(year)}-01-01 {str(year)}-12-31",
        )
        df = pytrend.interest_over_time()
        top_interest.append(df.max().sort_values(ascending=False).index[0])

    # Now we take those top teams to find which had the highest search interest.
    pytrend.build_payload(
        kw_list=top_interest, geo="US", timeframe=f"{str(year)}-01-01 {str(year)}-12-31"
    )
    df = pytrend.interest_over_time()
    top = df.max().sort_values(ascending=False).index[0]

    # Finally, we request data for chunks of 4 teams with the top team added to
    # normalize.
    results = pd.DataFrame()
    for i in range(0, len(terms), n - 1):
        pytrend.build_payload(
            kw_list=list(set(terms[i : i + n - 1] + [top])),
            geo="US",
            timeframe=f"{str(year)}-01-01 {str(year)}-12-31",
        )
        df = pytrend.interest_over_time()
        results = pd.concat([results, df], axis=1)

    # Remove duplicate columns
    results = results.loc[:, ~results.columns.duplicated()].copy()

    # Return the ordered results
    return results[terms]

In [90]:
nwsl_teams_2022 = [
    "Angel City FC",
    "Chicago Red Stars",
    "Houston Dash",
    "Kansas City Current",
    "North Carolina Courage",
    "NJ/NY Gotham FC",
    "OL Reign",
    "Orlando Pride",
    "Portland Thorns",
    "Racing Louisville FC",
    "San Diego Wave FC",
    "Washington Spirit",
]

In [91]:
# Get the 2022 data for all NWSL teams
data = get_data(2022, nwsl_teams_2022)
# Transpose for easier plotting
df = data.transpose()
# Write to csv
df.to_csv("nwsl_2022_trends.csv")

data

Unnamed: 0_level_0,Angel City FC,Chicago Red Stars,Houston Dash,Kansas City Current,North Carolina Courage,NJ/NY Gotham FC,OL Reign,Orlando Pride,Portland Thorns,Racing Louisville FC,San Diego Wave FC,Washington Spirit
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2022-01-02,3,3,7,19,0,2,0,3,2,0,4,15
2022-01-09,3,2,2,11,2,0,2,5,5,3,0,14
2022-01-16,3,3,8,25,0,0,3,9,3,2,0,18
2022-01-23,2,4,5,47,0,0,4,4,6,3,2,15
2022-01-30,13,2,10,41,0,0,3,8,5,3,3,19
2022-02-06,11,5,3,13,4,0,8,7,4,1,2,30
2022-02-13,15,5,1,18,0,0,5,7,6,0,7,18
2022-02-20,9,5,9,11,0,0,4,10,6,2,5,17
2022-02-27,9,8,12,14,0,0,10,7,11,1,4,20
2022-03-06,12,8,4,16,0,0,11,8,20,3,4,20


In [94]:
# Get the 2021 data for all NWSL teams
data = get_data(2021, nwsl_teams_2022)
# Transpose for easier plotting
df = data.transpose()
# Write to csv
df.to_csv("nwsl_2021_trends.csv")

data

Unnamed: 0_level_0,Angel City FC,Chicago Red Stars,Houston Dash,Kansas City Current,North Carolina Courage,NJ/NY Gotham FC,OL Reign,Orlando Pride,Portland Thorns,Racing Louisville FC,San Diego Wave FC,Washington Spirit
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-01-03,0,2,2,9,0,0,4,7,0,0,0,15
2021-01-10,5,5,10,12,2,0,0,5,2,4,0,18
2021-01-17,2,4,8,17,2,0,4,5,5,4,1,14
2021-01-24,2,11,8,19,7,2,3,4,3,2,0,17
2021-01-31,4,6,6,10,0,0,2,9,0,2,0,11
2021-02-07,2,9,7,22,7,0,0,7,2,2,0,11
2021-02-14,5,15,7,22,3,0,2,7,0,0,0,18
2021-02-21,0,8,8,8,0,0,4,13,11,0,0,14
2021-02-28,2,11,11,7,2,0,0,15,4,3,3,11
2021-03-07,0,6,9,7,7,3,3,13,4,0,0,18
