# Trend Search from Collection Names

Requirements:
- Packages
 - pytrends, `pip install pytrends`
 - pandas
 - numpy
 - time (for sleep)
 - random (for sleep)
- Files
 - `data/collection_names.txt`

## 1. Import Data

In [1]:
# Basics
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import json
from tqdm.notebook import tqdm_notebook

# !pip install pytrends
import pytrends
from pytrends.request import TrendReq
from datetime import date, datetime, timedelta

In [2]:
# Loading the dictionary
with open('data/mint_json.json', 'rb') as f:
    init_mint_dict = json.load(f)
init_mint_dict = json.loads(init_mint_dict)

In [3]:
# Getting rid of those pesky " NFT" items
# Also converting to datetime
mint_dict = {}
mint_dict_nft = {}
for key in init_mint_dict.keys():
    if key[-4:] !=' NFT':
        mint_dict[key] = pd.to_datetime(init_mint_dict[key])
    else:
        mint_dict_nft[key] = pd.to_datetime(init_mint_dict[key])

In [5]:
print(mint_dict)

{'Anonymice': Timestamp('2021-09-16 00:00:00'), 'Vox Collectibles': Timestamp('2021-08-10 00:00:00'), 'Galacticapes': Timestamp('2021-09-26 00:00:00'), 'Irenedao': Timestamp('2022-01-14 00:00:00'), 'Byoland': Timestamp('2021-12-16 00:00:00'), 'Lonely Alien Space Club': Timestamp('2021-07-23 00:00:00'), '888 Inner Circle': Timestamp('2021-09-09 00:00:00'), 'Mfer': Timestamp('2021-11-30 00:00:00'), 'Farmers World': Timestamp('2021-07-24 00:00:00'), 'Duskbreakers': Timestamp('2021-12-04 00:00:00'), 'Lobsterdao': Timestamp('2021-10-08 00:00:00'), 'Lil Heroes': Timestamp('2022-01-16 00:00:00'), 'Jenkins The Valet The Writers Room': Timestamp('2021-08-04 00:00:00'), 'Bored Ape Kennel Club': Timestamp('2021-06-18 00:00:00'), 'Mutant Ape Yacht Club': Timestamp('2021-08-28 00:00:00'), 'Pudgy Penguins': Timestamp('2021-07-22 00:00:00'), 'Gft Atari 50Th Anniversary': Timestamp('2022-02-01 00:00:00'), 'Stackedtoadz': Timestamp('2021-10-01 00:00:00'), 'Ethereum Name Service': Timestamp('2020-02-06 

## 2. Google Trends - Interest_Over_Time

Description:
- For each collection, grab the search frequency over time.
- Timeframe: 268 days after minting date, or today (April 10, 2022)
- Returns large dataframe with 3 columns and many rows.
 - date col has duplicate values

### Interest_Over_Time

In [6]:
collection_info = list(mint_dict.items())
# Build the initial dataframe
init_search = collection_info[0]
start_date = init_search[1].date()


end_date = start_date + timedelta(days=268)
if end_date > date.today():
    end_date = date.today().strftime('%Y-%m-%d')
else: 
    end_date = end_date.date().strftime('%Y-%m-%d')

# # Trends work
pytrend = TrendReq()

tf = str(start_date) + " " + str(end_date)

# The interest_over_time function gives daily data within a timeframe of 269 days.
# 134 on each side of minting date
pytrend.build_payload(kw_list=[init_search[0]], timeframe=tf)
df = pytrend.interest_over_time()

# Formatting
df = df.drop(columns=['isPartial'])
df = df.reset_index()
df = df.rename(columns={init_search[0]:"trends"})
# Adding collection_name as a column
df['collection_name'] = init_search[0]

In [7]:
df

Unnamed: 0,date,trends,collection_name
0,2021-09-16,34,Anonymice
1,2021-09-17,17,Anonymice
2,2021-09-18,38,Anonymice
3,2021-09-19,19,Anonymice
4,2021-09-20,49,Anonymice
...,...,...,...
203,2022-04-07,0,Anonymice
204,2022-04-08,0,Anonymice
205,2022-04-09,0,Anonymice
206,2022-04-10,0,Anonymice


In [8]:
# Do it for the rest of them
for search_index in tqdm_notebook(range(len(collection_info[1:])), desc='Progress gathering Trends'):
    try:
        name = collection_info[search_index][0]
        # Date formatting
        start_date = collection_info[search_index][1].date()
        end_date = start_date + timedelta(days=268)
        
        # If minting_date + 268 days is after today (April 10, 2022), then set end_date to today
        if end_date > date.today():
            end_date = date.today().strftime('%Y-%m-%d')
        else: 
            end_date = end_date.strftime('%Y-%m-%d')

        # Trends work
        pytrend = TrendReq()
        tf = str(start_date)+' '+str(end_date)
        pytrend.build_payload(kw_list=[name], timeframe=tf)
        temp_df = pytrend.interest_over_time()
        
        # Initial formatting
        temp_df = temp_df.drop(columns=['isPartial'])
        temp_df = temp_df.reset_index()
        
        # Adding collection_name as a column
        temp_df['collection_name'] = name
        
        # Convert trend data to "trends" col name
        temp_df = temp_df.rename(columns={name:"trends"})

        # Add to the main df        
        df = df.append(temp_df[['date', "trends", "collection_name"]])
        
    except KeyError as err:
        pass

Progress gathering Trends:   0%|          | 0/265 [00:00<?, ?it/s]

In [9]:
df

Unnamed: 0,date,trends,collection_name
0,2021-09-16,34,Anonymice
1,2021-09-17,17,Anonymice
2,2021-09-18,38,Anonymice
3,2021-09-19,19,Anonymice
4,2021-09-20,49,Anonymice
...,...,...,...
62,2022-04-07,12,Tronwars
63,2022-04-08,0,Tronwars
64,2022-04-09,0,Tronwars
65,2022-04-10,0,Tronwars


In [10]:
# Scaling
# All the trends data is already on a 0 to 100 scale;
# Just dividing the values by 100
df['trends'] = df['trends']/100

In [11]:
df = df.reset_index(drop=True)

In [12]:
df['trends_previous_day'] = df['trends'].shift(1)
df['trends_two_days_prior'] = df['trends'].shift(2)

In [13]:
df

Unnamed: 0,date,trends,collection_name,trends_previous_day,trends_two_days_prior
0,2021-09-16,0.34,Anonymice,,
1,2021-09-17,0.17,Anonymice,0.34,
2,2021-09-18,0.38,Anonymice,0.17,0.34
3,2021-09-19,0.19,Anonymice,0.38,0.17
4,2021-09-20,0.49,Anonymice,0.19,0.38
...,...,...,...,...,...
46838,2022-04-07,0.12,Tronwars,0.00,0.00
46839,2022-04-08,0.00,Tronwars,0.12,0.00
46840,2022-04-09,0.00,Tronwars,0.00,0.12
46841,2022-04-10,0.00,Tronwars,0.00,0.00


In [None]:
# df.to_csv('data/daily_interest.csv', index=False)