# Trend Search from Collection Names

Requirements:
- Packages
 - pytrends, `pip install pytrends`
 - pandas
 - numpy
 - time (for sleep)
 - random (for sleep)
- Files
 - `data/collection_names.txt`

## 1. Import Data

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import json

In [2]:
# Loading the dictionary
with open('data/mint_json.json', 'rb') as f:
    mint_dict = json.load(f)
mint_dict = json.loads(mint_dict)

In [3]:
# Convert back to datetimes
for key in mint_dict.keys():
    mint_dict[key] = pd.to_datetime(mint_dict[key])

In [4]:
# The oldest NFT is from 2017-06-23
# Let's make the trends gather data from 2017-01-01
oldest = sorted(mint_dict.items(), key=lambda x: x[1], reverse=False)
oldest[:3]

[('CryptoPunks', Timestamp('2017-06-23 00:00:00')),
 ('CryptoPunks NFT', Timestamp('2017-06-23 00:00:00')),
 ('CryptoKitties', Timestamp('2017-11-23 00:00:00'))]

## 2. Google Trends - Interest_Over_Time

Description:
- For each collection, grab the search frequency over time starting at the minting date.
- Returns large dataframe with every collection name, ranging from X time before minting date

In [5]:
# Imports
import time
from time import sleep
from random import randint

### Interest_Over_Time

In [6]:
# !pip install pytrends
import pytrends
from pytrends.request import TrendReq
from datetime import datetime, timedelta

In [7]:
# Commenting this out because relative dates are causing too much headache
# We should just stick to absolute dates

# def create_relative_dates(date_series, minting_date):
#     col_names = []
#     num_weeks = 0
#     for i in range(len(date_series)):
#         if (date_series[i] < minting_date):
#             num_weeks += 2
#             col_names.append("{}_before".format(52-i))
#         elif (date_series[i-1] < minting_date) and (date_series[i+1] > minting_date):
#             num_weeks = 0
#             col_names.append("minting_week")
#         else:
#             num_weeks += 1
#             col_names.append("{}_after".format(num_weeks))
#     return(col_names)

In [8]:
collection_info = list(mint_dict.items())
# Build the initial dataframe
init_search = collection_info[0]
# minting_date = init_search[1]
# six_months_pre = (init_search[1] - timedelta(weeks=26)).date().strftime('%Y-%m-%d')
# seventy_days_post = (init_search[1] + timedelta(days=70)).date().strftime('%Y-%m-%d')

# Trends work
pytrend = TrendReq()
pytrend.build_payload(kw_list=[init_search[0]], timeframe=('2017-01-01 2022-03-27'))
iot = pytrend.interest_over_time()
iot = iot.reset_index()
iot = iot.drop(columns=['isPartial'])

ResponseError: The request failed: Google returned a response with code 429.

In [None]:
start_time = time.time()

for search in collection_info:
    minting_date = search[1]
#     six_months_pre = (search[1] - timedelta(weeks=26)).date().strftime('%Y-%m-%d')

    # Trends work
    try:
        pytrend = TrendReq()
        pytrend.build_payload(kw_list=[search[0]], timeframe=('2017-01-01 2022-03-27'))
        temp_iot = pytrend.interest_over_time()
        temp_iot = temp_iot.reset_index()

        temp_iot = temp_iot.drop(columns=['isPartial', 'date'])
        iot = iot.merge(temp_iot, on='date')

        sleep(randint(2, 8))

    # KeyError would happen if the query returned None
    except KeyError as e:
        pass

print('This took', time.time() - start_time, "to run")

len(collection_info)

In [None]:
iot.shape

In [None]:
iot

In [None]:
iot.to_csv('data/monthly_interest_over_time.csv')

# Hourly Interest - HOLD

NOTE
- Holding off on this until we have minting dates for each NFT
- This would be super cool to run the month before and the month after the minting date for each NFT

In [None]:
collection_info = list(mint_dict.items())
# Build the initial dataframe
init_search = collection_info[0]
minting_date = init_search[1]

one_week_pre_date = (init_search[1] - timedelta(days=7)).date().strftime('%Y-%m-%d').split('-')
pre_year = int(one_week_pre_date[0])
pre_month = int(one_week_pre_date[1])
pre_day = int(one_week_pre_date[2])


one_week_post = (init_search[1] + timedelta(days=7)).date().strftime('%Y-%m-%d').split('-')
post_year = int(one_week_post[0])
post_month = int(one_week_post[1])
post_day = int(one_week_post[2])


# # Trends work
pytrend = TrendReq()
pytrend.build_payload(kw_list=[init_search[0]])
hours = pytrend.get_historical_interest(keywords=[init_search[0]], year_start=pre_year, month_start=pre_month,
                                       day_start=pre_day, hour_start=0,
                                       year_end=post_year, month_end=post_month, day_end=post_day, hour_end=0,
                                       cat=0, geo='', gprop='', sleep=0)

hours = hours.drop(columns=['isPartial'])
hours = hours.reset_index()
hours['{}_time'.format(init_search[0])] = hours['date'].apply(lambda x: x.time())
hours['{}_date'.format(init_search[0])] = hours['date'].apply(lambda x: x.date())
hours = hours.drop(columns=['date'])

In [None]:
start_time = time.time()


# Do it for the rest of them
for search in collection_info[1:]:
    try:
        name = search[0]
        minting_date = search[1]

        one_week_pre_date = (minting_date - timedelta(days=7)).date().strftime('%Y-%m-%d').split('-')
        pre_year = int(one_week_pre_date[0])
        pre_month = int(one_week_pre_date[1])
        pre_day = int(one_week_pre_date[2])


        one_week_post = (minting_date + timedelta(days=7)).date().strftime('%Y-%m-%d').split('-')
        post_year = int(one_week_post[0])
        post_month = int(one_week_post[1])
        post_day = int(one_week_post[2])


        # # Trends work
        pytrend = TrendReq()
        pytrend.build_payload(kw_list=[name])
        temp_hours = pytrend.get_historical_interest(keywords=[name], year_start=pre_year, month_start=pre_month,
                                               day_start=pre_day, hour_start=0,
                                               year_end=post_year, month_end=post_month, day_end=post_day, hour_end=0,
                                               cat=0, geo='', gprop='', sleep=0)

        temp_hours = temp_hours.drop(columns=['isPartial'])
        temp_hours = temp_hours.reset_index()
        temp_hours['{}_time'.format(name)] = temp_hours['date'].apply(lambda x: x.time())
        temp_hours['{}_date'.format(name)] = temp_hours['date'].apply(lambda x: x.date())
        temp_hours = temp_hours.drop(columns=['date'])
        
        # Add to the hours df
        for col in temp_hours.columns:
            hours[col] = temp_hours[col]
        
        sleep(randint(8, 25))
        
    except KeyError as err:
        pass
    
    
print('This took', time.time() - start_time, "to run")

In [None]:
hours.head()

In [None]:
hours.tail()

In [None]:
hours.shape

In [None]:
hours.to_csv('data/hourly-data.csv', index=False)