# Trend Search from Collection Names

Requirements:
- Packages
 - pytrends, `pip install pytrends`
 - pandas
 - numpy
 - time (for sleep)
 - random (for sleep)
- Files
 - output from opensea-get-collections.ipynb
   - currently named 'initial_opensea_get_collections.csv

## Gather Collection Names

In [133]:
import pandas as pd
import numpy as np

In [134]:
# Loading the output from get_collections
# Only need the collection name
init_many_collections = pd.read_csv('../opensea/data/opensea_get_many_collections.csv', usecols=['collection_name']).dropna()
init_many_collections = list(pd.unique(init_many_collections['collection_name']))


In [135]:
collections = []
for name in init_many_collections:
    collections.append(name)
    collections.append(name + " NFT")

In [136]:
manual_collections = ['Meebits','dotdotdots','CyberBrokersV','VeeFriends','CloneX','BoredApeYachtClub',
                  'MutantApeYachtClub','Azuki','InvisibleFriends','CryptoPunks','Landers','NFTWorlds',
                  'World of Women']

In [137]:
# Add "NFT" to the end of each collection name, as another item in the collections list
for name in manual_collections:
    name = name.strip()
    if name not in collections:
        collections.append(name)
    if (name + " NFT") not in collections:
        collections.append(name + " NFT")

In [138]:
len(collections)

1408

In [139]:
f = open('data/collection_names_only.txt', "w")
for name in collections:
    f.write(name+'\n')
f.close()

In [140]:
# Validate that extracting the collections worked
assert isinstance(collections, list)
assert type(collections[0])==str

# Google Trends

In [85]:
import time
from time import sleep
from random import randint

## Keyword Expansion

- Summary: This is not a useful method and we should not use it.
- Explanation:
 - There are few to no relevant search terms from a small sample (n=9)
 - Relevant terms are actually not relevant

Resources:
- https://github.com/GeneralMills/pytrends
- https://pypi.org/project/pytrends/
- https://towardsdatascience.com/google-trends-api-for-python-a84bc25db88f

In [86]:
# pip install pytrends
import pytrends
from pytrends.request import TrendReq
pytrend = TrendReq()

In [87]:
# Initializing the base DF, just with "NFT" queries
keywords = pytrend.suggestions(keyword='NFT')
keyexp_df = pd.DataFrame(keywords)
keyexp_df['search_term'] = 'NFT'

In [88]:
# Start time
# SOURCE: https://stackoverflow.com/questions/12444004/how-long-does-my-python-application-take-to-run
start_time = time.time()

# Searching the collection names
# Appending to keyexp_df 
# NOTE: Many of these will be empty DFs
# This is because most people don't search the names of collections?
for collection_name in collections:
    keywords = pytrend.suggestions(keyword=collection_name)
    temp_search_df = pd.DataFrame(keywords)
    temp_search_df['search_term'] = collection_name
    keyexp_df = keyexp_df.append(temp_search_df)
    sleep(randint(2, 6))

print('This took', time.time() - start_time, "to run")

This took 105.59816908836365 to run


In [90]:
keywords_expanded = keyexp_df['title'].unique()
keywords_expanded

array(['nftables', 'Niagara Frontier Transportation Authority', 'NFT',
       'Network For Teaching Entrepreneurship',
       'National Film & Television School (NFTS)', 'Clonex Rooting Gel',
       'Clonex Mist', 'CLONEX Print Experience', 'Azuki-chan',
       'Azukiarai', 'Adzuki bean', 'Azuki Moeno', 'Yashiro Azuki',
       'CryptoPunks', 'Landers', 'David L. Lander', 'Audrey Landers',
       'Paul Landers', 'SSG Landers', 'The Great Lie',
       'White Chrysanthemum', 'Women of the World',
       'Inseparable: A Never-Before-Published Novel'], dtype=object)

### Output

In [91]:
keyexp_df.to_csv('data/trends_expanded_keywords.csv', index=False)

## Top 25 Related Queries

Notes:
- Can only query 5 words at a time
- Be careful about rate limits. TBD on what those are.

### Initial Formatting

In [92]:
# Group the collection names into sets of 5
grouped_collections = [collections[i:i + 5] for i in range(0, len(collections), 5)]

### API Calls

#### Initial DF - Default Keyword = "NFT"

In [94]:
pytrend = TrendReq()
pytrend.build_payload(kw_list=['NFT'])

# .related_queries() Returns a dictionary of dataframes
related_queries = pytrend.related_queries()

# Formatting into intended DF
top_queries = related_queries['NFT']['top']
top_queries['init_query'] = 'NFT'

rising_queries = related_queries['NFT']['rising']
rising_queries['init_query'] = 'NFT'

#### Search the rest of the collection names

In [98]:
start_time = time.time()

for set_5 in grouped_collections:
    pytrend = TrendReq()
    # Call the API.
    pytrend.build_payload(kw_list=set_5)
    # .related_queries() Returns a dictionary of dataframes
    related_queries = pytrend.related_queries()
    
    for key in list(related_queries.keys()):
        try:
            temp_top_queries = related_queries[key]['top']
            temp_top_queries['init_query'] = key

            temp_rising_queries = related_queries[key]['rising']
            temp_rising_queries['init_query'] = key

            top_queries = top_queries.append(temp_top_queries)
            rising_queries = rising_queries.append(temp_rising_queries)
        except:
            pass
    sleep(randint(5, 15))
    
print('This took', time.time() - start_time, "to run")

This took 72.75786876678467 to run


### Output

In [104]:
rising_queries.to_csv('data/trends_rising_queries.csv', index=False)
top_queries.to_csv('data/trends_top_queries.csv', index=False)

## Interest_Over_Time

In [105]:
# Build the initial dataframe
pytrend = TrendReq()
pytrend.build_payload(kw_list=grouped_collections[0])
iot = pytrend.interest_over_time()
iot = iot.drop(columns=['isPartial'])

In [106]:
start_time = time.time()

for set_5 in grouped_collections[1:]:
    pytrend = TrendReq()
    # Call the API.
    pytrend.build_payload(kw_list=set_5)
    # .related_queries() Returns a dictionary of dataframes
    temp_iot = pytrend.interest_over_time()
    temp_iot = temp_iot.drop(columns=['isPartial'])
    iot = pd.merge(iot, temp_iot, left_index=True, right_index=True, how='outer')

    sleep(randint(5, 15))
    
print('This took', time.time() - start_time, "to run")

This took 55.31574010848999 to run


In [107]:
iot.shape

(261, 26)

In [108]:
iot

Unnamed: 0_level_0,Meebits,Meebits NFT,dotdotdots,dotdotdots NFT,CyberBrokersV,CyberBrokersV NFT,VeeFriends,VeeFriends NFT,CloneX,CloneX NFT,...,InvisibleFriends,InvisibleFriends NFT,CryptoPunks,CryptoPunks NFT,Landers,Landers NFT,NFTWorlds,NFTWorlds NFT,World of Women,World of Women NFT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-03-12,0,0,0,0,0,0,0,0,22,0,...,0,0,0,0,46,0,0,0,30,0
2017-03-19,0,0,0,0,0,0,0,0,24,0,...,0,0,0,0,44,0,0,0,31,0
2017-03-26,0,0,0,0,0,0,0,0,18,0,...,0,0,0,0,44,0,0,0,30,0
2017-04-02,0,0,0,0,0,0,0,0,24,0,...,0,0,0,0,57,0,0,0,31,0
2017-04-09,0,0,0,0,0,0,0,0,25,0,...,0,0,0,0,47,0,0,0,29,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-02-06,13,2,1,1,0,0,20,8,44,13,...,2,1,44,8,41,0,1,0,34,34
2022-02-13,8,1,1,1,0,0,8,4,36,5,...,0,0,47,15,47,0,1,0,30,28
2022-02-20,6,4,0,0,0,0,14,6,33,5,...,3,0,27,6,43,0,1,0,32,12
2022-02-27,8,2,2,0,0,0,10,2,31,4,...,1,0,18,6,44,0,1,0,39,27


In [116]:
iot.to_csv('data/interest_over_time.csv', index=False)

# Hourly Interest - HOLD

NOTE
- Holding off on this until we have minting dates for each NFT
- This would be super cool to run the month before and the month after the minting date for each NFT

In [None]:
# hours = pytrend.get_historical_interest(['CryptoPunks', 'Bored Ape Yacht Club'], year_start=2022, month_start=1, day_start=1, hour_start=0, year_end=2022, month_end=2, day_end=1, hour_end=0, cat=0, geo='', gprop='', sleep=0)
# hours = hours.drop(columns=['isPartial'])