# Trend Search from Collection Names

Requirements:
- Packages
 - pytrends, `pip install pytrends`
 - pandas
 - numpy
 - time (for sleep)
 - random (for sleep)
- Files
 - `data/collection_names.txt`

## 1. Import Data

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import pickle

In [2]:
# Loading the dictionary
with open('data/mint_dict.pkl', 'rb') as f:
    mint_dict = pickle.load(f)

## 2. Google Trends - Interest_Over_Time

Description:
- For each collection, grab the search frequency over time starting at the minting date.
- Returns large dataframe with every collection name, ranging from X time before minting date

In [3]:
# Imports
import time
from time import sleep
from random import randint

### Interest_Over_Time

In [4]:
import pytrends
from pytrends.request import TrendReq
from datetime import datetime, timedelta

In [5]:
def create_relative_dates(date_series, minting_date):
    col_names = []
    num_weeks = 0
    for i in range(len(date_series)):
        if (date_series[i] < minting_date):
            num_weeks += 2
            col_names.append("{}_before".format(52-i))
        elif (date_series[i-1] < minting_date) and (date_series[i+1] > minting_date):
            num_weeks = 0
            col_names.append("minting_week")
        else:
            num_weeks += 1
            col_names.append("{}_after".format(num_weeks))
    return(col_names)

In [6]:
collection_info = list(mint_dict.items())
# Build the initial dataframe
init_search = collection_info[0]
minting_date = init_search[1]
one_year_pre = (init_search[1] - timedelta(days=365)).date().strftime('%Y-%m-%d')
seventy_days_post = (init_search[1] + timedelta(days=70)).date().strftime('%Y-%m-%d')

# Trends work
pytrend = TrendReq()
pytrend.build_payload(kw_list=[init_search[0]], timeframe=(one_year_pre + ' ' + seventy_days_post))
iot = pytrend.interest_over_time()

# Crazy formatting
iot = iot.drop(columns=['isPartial'])
# reset index so date is its own column
iot = iot.reset_index()
# set relative_dates col equal to output of create_relative_dates
# see png below for demonstration
iot['relative_dates'] = create_relative_dates(list(iot['date']), minting_date)
# make relative dates the new index
iot = iot.set_index('relative_dates')
# rename the original date column to keep it safe, just in case we need it later
iot = iot.rename(columns={'date':'{}_date'.format(init_search[0])})

In [7]:
iot.head()

Unnamed: 0_level_0,Anonymice_date,Anonymice
relative_dates,Unnamed: 1_level_1,Unnamed: 2_level_1
52_before,2020-09-20,0
51_before,2020-09-27,0
50_before,2020-10-04,0
49_before,2020-10-11,0
48_before,2020-10-18,0


In [8]:
iot.tail(7)

Unnamed: 0_level_0,Anonymice_date,Anonymice
relative_dates,Unnamed: 1_level_1,Unnamed: 2_level_1
3_after,2021-10-10,94
4_after,2021-10-17,69
5_after,2021-10-24,15
6_after,2021-10-31,39
7_after,2021-11-07,8
8_after,2021-11-14,30
9_after,2021-11-21,38


In [9]:
start_time = time.time()

for search in collection_info[1:]:
    minting_date = search[1]
    one_year_pre = (search[1] - timedelta(days=365)).date().strftime('%Y-%m-%d')
    seventy_days_post = (search[1] + timedelta(days=70)).date().strftime('%Y-%m-%d')

    # Trends work
    pytrend = TrendReq()
    pytrend.build_payload(kw_list=[search[0]], timeframe=(one_year_pre + ' ' + seventy_days_post))
    temp_iot = pytrend.interest_over_time()

    # Crazy formatting
    try:
        temp_iot = temp_iot.drop(columns=['isPartial'])
        # reset index so date is its own column
        temp_iot = temp_iot.reset_index()
        # set relative_dates col equal to output of create_relative_dates
        # see png below for demonstration
        temp_iot['relative_dates'] = create_relative_dates(list(temp_iot['date']), minting_date)
        # make relative dates the new index
        temp_iot = temp_iot.set_index('relative_dates')
        # rename the original date column to keep it safe, just in case we need it later
        temp_iot = temp_iot.rename(columns={'date':'{}_date'.format(search[0])})

        # Add to the iot df
        for col in temp_iot.columns:
            iot[col] = temp_iot[col]
            
        sleep(randint(3, 9))
    
    # KeyError would happen if the query returned None
    except KeyError as err:
        pass

print('This took', time.time() - start_time, "to run")

This took 2916.200697183609 to run


In [12]:
len(collection_info)

532

In [13]:
iot.shape

(62, 862)

In [14]:
iot.to_csv('data/monthly_interest_over_time.csv')

# Hourly Interest - HOLD

NOTE
- Holding off on this until we have minting dates for each NFT
- This would be super cool to run the month before and the month after the minting date for each NFT

In [None]:
collection_info = list(mint_dict.items())
# Build the initial dataframe
init_search = collection_info[0]
minting_date = init_search[1]

one_week_pre_date = (init_search[1] - timedelta(days=7)).date().strftime('%Y-%m-%d').split('-')
pre_year = int(one_week_pre_date[0])
pre_month = int(one_week_pre_date[1])
pre_day = int(one_week_pre_date[2])


one_week_post = (init_search[1] + timedelta(days=7)).date().strftime('%Y-%m-%d').split('-')
post_year = int(one_week_post[0])
post_month = int(one_week_post[1])
post_day = int(one_week_post[2])


# # Trends work
pytrend = TrendReq()
pytrend.build_payload(kw_list=[init_search[0]])
hours = pytrend.get_historical_interest(keywords=[init_search[0]], year_start=pre_year, month_start=pre_month,
                                       day_start=pre_day, hour_start=0,
                                       year_end=post_year, month_end=post_month, day_end=post_day, hour_end=0,
                                       cat=0, geo='', gprop='', sleep=0)

hours = hours.drop(columns=['isPartial'])
hours = hours.reset_index()
hours['{}_time'.format(init_search[0])] = hours['date'].apply(lambda x: x.time())
hours['{}_date'.format(init_search[0])] = hours['date'].apply(lambda x: x.date())
hours = hours.drop(columns=['date'])

In [None]:
hours.head()

In [None]:
start_time = time.time()


# Do it for the rest of them
for search in collection_info[1:]:
    try:
        name = search[0]
        minting_date = search[1]

        one_week_pre_date = (minting_date - timedelta(days=7)).date().strftime('%Y-%m-%d').split('-')
        pre_year = int(one_week_pre_date[0])
        pre_month = int(one_week_pre_date[1])
        pre_day = int(one_week_pre_date[2])


        one_week_post = (minting_date + timedelta(days=7)).date().strftime('%Y-%m-%d').split('-')
        post_year = int(one_week_post[0])
        post_month = int(one_week_post[1])
        post_day = int(one_week_post[2])


        # # Trends work
        pytrend = TrendReq()
        pytrend.build_payload(kw_list=[name])
        temp_hours = pytrend.get_historical_interest(keywords=[name], year_start=pre_year, month_start=pre_month,
                                               day_start=pre_day, hour_start=0,
                                               year_end=post_year, month_end=post_month, day_end=post_day, hour_end=0,
                                               cat=0, geo='', gprop='', sleep=0)

        temp_hours = temp_hours.drop(columns=['isPartial'])
        temp_hours = temp_hours.reset_index()
        temp_hours['{}_time'.format(name)] = temp_hours['date'].apply(lambda x: x.time())
        temp_hours['{}_date'.format(name)] = temp_hours['date'].apply(lambda x: x.date())
        temp_hours = temp_hours.drop(columns=['date'])
        
        # Add to the hours df
        for col in temp_hours.columns:
            hours[col] = temp_hours[col]
        
        sleep(randint(5, 25))
        
    except KeyError as err:
        pass
    
    
print('This took', time.time() - start_time, "to run")

In [None]:
hours.head()

In [None]:
hours.tail()

In [None]:
hours.shape

In [None]:
hours.to_csv('data/hourly-data.csv', index=False)