# Trend Search from Collection Names

Requirements:
- Packages
 - pytrends, `pip install pytrends`
 - pandas
 - numpy
 - time (for sleep)
 - random (for sleep)
- Files
 - `data/collection_names.txt`

## 1. Import Data

In [1]:
# Basics
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import json

# PyTrends
import pytrends
from pytrends.request import TrendReq
from datetime import datetime, timedelta

# Sleeping
import time
from time import sleep
from random import randint

In [2]:
# Loading the dictionary
with open('data/mint_json.json', 'rb') as f:
    mint_dict = json.load(f)
mint_dict = json.loads(mint_dict)

In [3]:
# Convert back to datetimes
for key in mint_dict.keys():
    mint_dict[key] = pd.to_datetime(mint_dict[key])

## 2. Google Trends - Interest_Over_Time

Description:
- For each collection, grab the search frequency over time.
- Timeframe: 134 days before minting date, 134 days after minting date
- Returns large dataframe with every collection name (and name+" NFT"), ranging from mint-134 days to mint+134 days

### Interest_Over_Time

In [6]:
# !pip install pytrends
import pytrends
from pytrends.request import TrendReq
from datetime import datetime, timedelta

In [7]:
collection_info = list(mint_dict.items())
# Build the initial dataframe
init_search = collection_info[0]
minting_date = init_search[1]

start_date = (minting_date - timedelta(days=134)).date().strftime('%Y-%m-%d')


end_date = (minting_date + timedelta(days=134)).date().strftime('%Y-%m-%d')

# # Trends work
pytrend = TrendReq()

tf = start_date+' '+end_date

# The interest_over_time function gives daily data within a timeframe of 269 days.
# 134 on each side of minting date
pytrend.build_payload(kw_list=[init_search[0]], timeframe=tf)
df = pytrend.interest_over_time()


# Initial formatting
df = df.drop(columns=['isPartial'])
df = df.reset_index()
df['{}_date'.format(init_search[0])] = df['date'].apply(lambda x: x.date())
# Don't need date col anymore
df = df.drop(columns=['date'])
# creating relative index
index_dates = []
count = 135
for val in df.index:
    if val < 134:
        count -= 1
        index_dates.append('{}_before'.format(count))
    elif val == 134:
        index_dates.append('minting_date')
        count = 0
    else:
        count += 1
        index_dates.append('{}_after'.format(count))
df.index = index_dates

In [9]:
from tqdm.notebook import tqdm_notebook

In [10]:
# Do it for the rest of them
for search_index in tqdm_notebook(range(len(collection_info[1:])), desc='Progress gathering Trends'):
    try:
        name = collection_info[search_index][0]
        minting_date = collection_info[search_index][1]

        start_date = (minting_date - timedelta(days=134)).date().strftime('%Y-%m-%d')
        end_date = (minting_date + timedelta(days=134)).date().strftime('%Y-%m-%d')


        # # Trends work
        pytrend = TrendReq()
        tf = start_date+' '+end_date
        
        # The interest_over_time function gives daily data within a timeframe of 269 days.
        # 134 on each side of minting date
        pytrend.build_payload(kw_list=[name], timeframe=tf)
        temp_df = pytrend.interest_over_time()
        
        # Initial formatting
        temp_df = temp_df.drop(columns=['isPartial'])
        temp_df = temp_df.reset_index()
        temp_df['{}_date'.format(name)] = temp_df['date'].apply(lambda x: x.date())
        # Don't need date col anymore
        temp_df = temp_df.drop(columns=['date'])
        # creating relative index
        index_dates = []
        count = 135
        for val in temp_df.index:
            if val < 134:
                count -= 1
                index_dates.append('{}_before'.format(count))
            elif val == 134:
                index_dates.append('minting_date')
                count = 0
            else:
                count += 1
                index_dates.append('{}_after'.format(count))
        temp_df.index = index_dates

        # Add to the hours df
        for col in temp_df.columns:
            df[col] = temp_df[col]
        df.to_csv('data/daily_interest.csv')
        sleep(randint(2, 6))
        
    except KeyError as err:
        pass

Progress gathering Trends:   0%|          | 0/531 [00:00<?, ?it/s]

This took 2316.1361532211304 to run


In [12]:
df.to_csv('data/daily_interest.csv')