In [1]:
import glob
import pickle
import pandas as pd
import json

# Step 1 - Grabbing Collection Names

In [2]:
# Make sure the sales data is in the sales_records folder at the same location as this notebook
file_names = glob.glob('sales_records/*')
# Creating the collection_names file, overwriting if already exists
file = open('collection_names.txt', "w")
# List just for printing after, so you don't need to open the file just for a quick check
collection_names = []
for name in file_names:
    # CHECK: make sure no collection names include a comma.
    # If one does, we will need to write \n to the list instead of a comma.
    if ',' in name:
        raise ValueError("Comma detected in {}".format(name))
    clean_name = name.split('sales_records/')[1].split(' - Historical Sales')[0].strip()
    file.write(clean_name)
    file.write(',')
    collection_names.append(clean_name)
file.close()

In [3]:
assert collection_names[-5:] == ['Sorare', 'Winter Bears', 'Tronwars', 'Genesisapostle', 'Nanopass'], 'Make sure you are using the most updated list of collection names.'

In [4]:
# For each collection name, create another record with " NFT" at the end.
# Doubling the number of collection names
collections = []
for name in collection_names:
    name = name.strip()
    collections.append(name)
    collections.append(name + " NFT")
len(collections)

532

In [5]:
# Saving this output to a new, trends-only file
f = open('trends/data/collection_names_plus_nft.txt', "w")
for name in collections:
    if name != collections[-1]:
        f.write(name+'\n')
    else:
        f.write(name)
f.close()

# Step 2 - Creating Minting Date Dictionary

#### Initial Formatting
- Collecting minting date for each collection
- Storing in a dictionary, `mint_dict`
 - key is the collection name, value is the minting date (format: 'YYYY-MM-DD')

In [6]:
# Input: individual file path ("../sales_records/3Landers - Historical Sales.csv")
# Output: string of earliest transaction date, which we will assume is the minting date
# Example output: '2021-09-26'
def grab_mint_date(file_name):
    temp_df = pd.read_csv(file_name)
    # Converting to DateTime format to simplify and standardize finding the earliest date
    temp_df['DateTime'] = pd.to_datetime(temp_df['DateTime'])
    minting_date = temp_df['DateTime'].min()
    return(minting_date)

In [7]:
# Apply grab_mint_date to each sales records file
file_names = glob.glob('sales_records/*')
mint_dict = {}
for name in file_names:
    clean_name = name.split('sales_records/')[1].split(' - Historical Sales')[0].strip()
    mint_dict[clean_name] = grab_mint_date(name)
    mint_dict[clean_name+" NFT"] = grab_mint_date(name)

In [15]:
# Prep for json
mint_dict_j = {}
for item in mint_dict.items():
    mint_dict_j[item[0]] = str(item[1].date())
    
mint_json = json.dumps(mint_dict_j)
with open('trends/data/mint_json.json', 'w') as f:
    json.dump(mint_json, f)

In [8]:
# Saving the dictionary
with open('trends/data/mint_dict.pkl', 'wb') as f:
    pickle.dump(mint_dict, f)

In [9]:
# # To load the dictionary:
# with open('trends/data/mint_dict.pkl', 'rb') as f:
#     mint_dict = pickle.load(f)