# Trend Search from Collection Names

Requirements:
- Packages
 - pytrends, `pip install pytrends`
 - pandas
 - numpy
 - time (for sleep)
 - random (for sleep)
- Files
 - output from opensea-get-collections.ipynb
   - currently named 'initial_opensea_get_collections.csv

## Gather Collection Names

In [1]:
import pandas as pd
import numpy as np

In [3]:
# Loading the output from get_collections
# Only need the collection name
collections = pd.read_csv('../opensea/initial_opensea_get_collections.csv', usecols=['collection_name'])
collections = list(pd.unique(collections['collection_name']))

In [3]:
# Validate that extracting the collections worked
assert isinstance(collections, list)
assert type(collections[0])==str

In [4]:
collections

['Genesis Bloodshed Bears',
 '3Landers',
 'Shadow Quest',
 'Spotlight by Joshua Bagley',
 'Notorious Alien Space Agents',
 'T-O-S The Other Side',
 'Viral Mfers',
 'DegenWaifus',
 'Drippies']

# Google Trends

In [5]:
from time import sleep
from random import randint

## Keyword Expansion

- Summary: This is not a useful method and we should not use it.
- Explanation:
 - There are few to no relevant search terms from a small sample (n=9)
 - Relevant terms are actually not relevant

Resources:
- https://github.com/GeneralMills/pytrends
- https://pypi.org/project/pytrends/
- https://towardsdatascience.com/google-trends-api-for-python-a84bc25db88f

In [6]:
# pip install pytrends
import pytrends
from pytrends.request import TrendReq
pytrend = TrendReq()

In [7]:
# Initializing the base DF, just with "NFT" queries
keywords = pytrend.suggestions(keyword='NFT')
keyexp_df = pd.DataFrame(keywords)
keyexp_df['search_term'] = 'NFT'

In [8]:
# Searching the collection names
# Appending to keyexp_df 
# NOTE: Many of these will be empty DFs
# This is because most people don't search the names of collections?
for collection_name in collections:
    keywords = pytrend.suggestions(keyword=collection_name)
    temp_search_df = pd.DataFrame(keywords)
    temp_search_df['search_term'] = collection_name
    keyexp_df = keyexp_df.append(temp_search_df)
    sleep(randint(3, 10))

In [9]:
keyexp_df

Unnamed: 0,mid,title,type,search_term
0,/g/11g0g4sbp3,Non-fungible token,Topic,NFT
1,/m/06w57ds,nftables,Topic,NFT
2,/m/031d4k,Niagara Frontier Transportation Authority,Corporation,NFT
3,/m/02n0qb,NFTY,Youth organization,NFT
4,/g/1v9l8jyb,NFT,Topic,NFT
0,/g/11j67yd6rl,The Shadow of the Gods,Topic,Shadow Quest
1,/m/06fjvt4,The Law and The Promise,Topic,Shadow Quest
2,/m/0c1x527,The Yosemite,Book by John Muir,Shadow Quest
0,/m/03j5j,Heart of Darkness,Novella by Joseph Conrad,Notorious Alien Space Agents
0,/m/0n073,Common cold,Disease,T-O-S The Other Side


In [10]:
keywords_expanded = keyexp_df['title'].unique()
keywords_expanded

array(['Non-fungible token', 'nftables',
       'Niagara Frontier Transportation Authority', 'NFTY', 'NFT',
       'The Shadow of the Gods', 'The Law and The Promise',
       'The Yosemite', 'Heart of Darkness', 'Common cold',
       'Strange Case of Dr Jekyll and Mr Hyde', 'Holi'], dtype=object)

### Output

In [11]:
keyexp_df.to_csv('data/trends_expanded_keywords.csv', index=False)

## Top 25 Related Queries

Notes:
- Can only query 5 words at a time
- Be careful about rate limits. TBD on what those are.

### Initial Formatting

In [12]:
# The initial list of collection names
collections

['Genesis Bloodshed Bears',
 '3Landers',
 'Shadow Quest',
 'Spotlight by Joshua Bagley',
 'Notorious Alien Space Agents',
 'T-O-S The Other Side',
 'Viral Mfers',
 'DegenWaifus',
 'Drippies']

In [13]:
# Group the collection names into sets of 5
grouped_collections = [collections[i:i + 5] for i in range(0, len(collections), 5)]

In [14]:
print(grouped_collections[0])
print(len(grouped_collections))

['Genesis Bloodshed Bears', '3Landers', 'Shadow Quest', 'Spotlight by Joshua Bagley', 'Notorious Alien Space Agents']
2


### API Calls

#### Initial DF - Default Keyword = "NFT"

In [15]:
pytrend = TrendReq()
pytrend.build_payload(kw_list=['NFT'])

# .related_queries() Returns a dictionary of dataframes
related_queries = pytrend.related_queries()

# Formatting into intended DF
top_queries = related_queries['NFT']['top']
top_queries['init_query'] = 'NFT'

rising_queries = related_queries['NFT']['rising']
rising_queries['init_query'] = 'NFT'

In [16]:
print(top_queries.shape)
print(rising_queries.shape)

(25, 3)
(25, 3)


In [17]:
top_queries.head()

Unnamed: 0,query,value,init_query
0,nft meaning,100,NFT
1,what is nft,85,NFT
2,nft art,75,NFT
3,nft crypto,57,NFT
4,crypto,57,NFT


In [18]:
rising_queries.head()

Unnamed: 0,query,value,init_query
0,opensea,453350,NFT
1,opensea nft,453350,NFT
2,nft marketplace,344400,NFT
3,binance nft,170250,NFT
4,binance,170250,NFT


#### Search the rest of the collection names

In [19]:
for set_5 in grouped_collections:
    pytrend = TrendReq()
    # Call the API.
    pytrend.build_payload(kw_list=set_5)
    # .related_queries() Returns a dictionary of dataframes
    related_queries = pytrend.related_queries()
    
    for key in list(related_queries.keys()):
        try:
            temp_top_queries = related_queries[key]['top']
            temp_top_queries['init_query'] = key

            temp_rising_queries = related_queries[key]['rising']
            temp_rising_queries['init_query'] = key

            top_queries = top_queries.append(temp_top_queries)
            rising_queries = rising_queries.append(temp_rising_queries)
        except:
            pass
    sleep(randint(5, 15))

In [20]:
print(top_queries.shape)
print(rising_queries.shape)

(51, 3)
(51, 3)


In [21]:
top_queries.head()

Unnamed: 0,query,value,init_query
0,nft meaning,100,NFT
1,what is nft,85,NFT
2,nft art,75,NFT
3,nft crypto,57,NFT
4,crypto,57,NFT


In [22]:
top_queries.tail()

Unnamed: 0,query,value,init_query
21,shadow vr,3,Shadow Quest
22,shadow of the tomb raider,3,Shadow Quest
23,side quest,3,Shadow Quest
24,oculus quest 2,3,Shadow Quest
0,drippies nft,100,Drippies


In [23]:
rising_queries.head()

Unnamed: 0,query,value,init_query
0,opensea,453350,NFT
1,opensea nft,453350,NFT
2,nft marketplace,344400,NFT
3,binance nft,170250,NFT
4,binance,170250,NFT


In [24]:
rising_queries.tail()

Unnamed: 0,query,value,init_query
21,the looming shadow,61950,Shadow Quest
22,war of light and shadow,61250,Shadow Quest
23,shadow of war bruz quest,59450,Shadow Quest
24,shadow of war the bright lord quest,59400,Shadow Quest
0,drippies nft,3333250,Drippies


# Output

In [25]:
rising_queries.to_csv('data/trends_rising_queries.csv', index=False)
top_queries.to_csv('data/trends_top_queries.csv', index=False)

## Historical Hourly Interest
- TO DO

In [26]:
# pytrends.get_historical_interest(kw_list, year_start=2018, month_start=1, day_start=1, hour_start=0, year_end=2018, month_end=2, day_end=1, hour_end=0, cat=0, geo='', gprop='', sleep=0)