In [1]:
import os
import json

import numpy as np
import pandas as pd

# Import MongoDB libraries for database management 
import pymongo
from pymongo import MongoClient

## Load the Crypto Punks dataset

In [2]:
# Read the crypto_punks.csv
crypto_punks_df = pd.read_csv("Data/crypto_punks.csv")# Read the trading data json file
crypto_punks_df.head()

Unnamed: 0,punk_id,type,accessories
0,0,Female,"['Green Eye Shadow', 'Earring', 'Blonde Bob']"
1,1,Male,"['Smile', 'Mohawk']"
2,2,Female,['Wild Hair']
3,3,Male,"['Wild Hair', 'Nerd Glasses', 'Pipe']"
4,4,Male,"['Big Shades', 'Wild Hair', 'Earring', 'Goat']"


In [3]:
crypto_punks_df.dtypes

punk_id         int64
type           object
accessories    object
dtype: object

In [4]:
crypto_punks_df.at[0,"accessories"]

"['Green Eye Shadow', 'Earring', 'Blonde Bob']"

## See how many other Crypto Punks share the same type and accessories

### 1. Create a Dataframe for the Types

In [5]:
# See how many share the same type 
types_df = crypto_punks_df.groupby("type").count()
types_df.reset_index(drop=False, inplace=True)
types_df = types_df.drop(columns=['punk_id'])
types_df.rename(columns={"type": "Attribute","accessories":"counts"}, inplace=True)
types_df

Unnamed: 0,Attribute,counts
0,Alien,9
1,Ape,24
2,Female,3840
3,Male,6039
4,Zombie,88


### 2. Create a Dataframe for the Accessories

In [6]:
# Read the trading data json file
df = pd.read_json("Data/txn_history-2021-10-07.jsonl", lines=True)
df.head()

Unnamed: 0,txn_type,from,to,date,timestamp,source,eth,punk_id,from_wallet_address,to_wallet_address,type,accessories
0,Bid Withdrawn,0xe73a1d,,2021-09-04,2021-09-04,larvalabs,321.0,0,,,[Female],"[Green Eye Shadow, Earring, Blonde Bob]"
1,Bid,0xe73a1d,,2021-09-01,2021-09-01,larvalabs,321.0,0,,,[Female],"[Green Eye Shadow, Earring, Blonde Bob]"
2,Bid Withdrawn,0x2e5e22,,2021-09-01,2021-09-01,larvalabs,320.0,0,,,[Female],"[Green Eye Shadow, Earring, Blonde Bob]"
3,Bid,0x2e5e22,,2021-09-01,2021-09-01,larvalabs,320.0,0,,,[Female],"[Green Eye Shadow, Earring, Blonde Bob]"
4,Bid Withdrawn,0x2e5e22,,2021-09-01,2021-09-01,larvalabs,263.0,0,,,[Female],"[Green Eye Shadow, Earring, Blonde Bob]"


In [7]:
# Create a list of the individual accessories
# Capture all individual types and accessories and store them in a their respective lists
accessories_list =[]

# Loop through the dataframe
for row in range(len(df)):

    # Add a new field with the count of accessories
    accessory_count = len(df.at[row,"accessories"])
    df.at[row,"accessory_count"]=accessory_count
    
    # Add each new accessory description to a list of unique values
    for i in df.at[row,"accessories"]:
        
        if i not in accessories_list:
            accessories_list.append(i)

# Print all the unique accessories
print("There are "+str(len(accessories_list))+" distinct accessories:")
print(accessories_list)

There are 87 distinct accessories:
['Green Eye Shadow', 'Earring', 'Blonde Bob', 'Smile', 'Mohawk', 'Wild Hair', 'Nerd Glasses', 'Pipe', 'Big Shades', 'Goat', 'Purple Eye Shadow', 'Half Shaved', 'Do-rag', 'Wild White Hair', 'Spots', 'Clown Eyes Blue', 'Messy Hair', 'Luxurious Beard', 'Clown Nose', 'Police Cap', 'Big Beard', 'Blue Eye Shadow', 'Black Lipstick', 'Clown Eyes Green', 'Straight Hair Dark', 'Blonde Short', 'Purple Lipstick', 'Straight Hair Blonde', 'Pilot Helmet', 'Hot Lipstick', 'Regular Shades', 'Stringy Hair', 'Small Shades', 'Frown', 'Eye Mask', 'Muttonchops', 'Bandana', 'Horned Rim Glasses', 'Crazy Hair', 'Classic Shades', 'Handlebars', 'Mohawk Dark', 'Dark Hair', 'Peak Spike', 'Normal Beard Black', 'Cap', 'VR', 'Frumpy Hair', 'Cigarette', 'Normal Beard', 'Red Mohawk', 'Shaved Head', 'Chinstrap', 'Mole', 'Knitted Cap', 'Fedora', 'Shadow Beard', 'Straight Hair', 'Hoodie', 'Eye Patch', 'Headband', 'Cowboy Hat', 'Tassle Hat', '3D Glasses', 'Mustache', 'Vape', 'Choker', 'Pi

In [8]:
# Create a dataframe from the list
accessories_df = pd.DataFrame(accessories_list)
accessories_df

Unnamed: 0,0
0,Green Eye Shadow
1,Earring
2,Blonde Bob
3,Smile
4,Mohawk
...,...
82,Orange Side
83,Wild Blonde
84,Buck Teeth
85,Top Hat


In [9]:
# Rename the first column
accessories_df.rename(columns={0: "Attribute"}, inplace=True)
accessories_df

Unnamed: 0,Attribute
0,Green Eye Shadow
1,Earring
2,Blonde Bob
3,Smile
4,Mohawk
...,...
82,Orange Side
83,Wild Blonde
84,Buck Teeth
85,Top Hat


In [10]:
# Add a new column for count and initialize it to cero
for row in range(len(accessories_df)):
    accessories_df["counts"]=0
    
accessories_df

Unnamed: 0,Attribute,counts
0,Green Eye Shadow,0
1,Earring,0
2,Blonde Bob,0
3,Smile,0
4,Mohawk,0
...,...,...
82,Orange Side,0
83,Wild Blonde,0
84,Buck Teeth,0
85,Top Hat,0


In [11]:
# Loop through the crypto punk id's to establish a count of all the accessories    
for row in range(len(crypto_punks_df)):

    # read the string of individual accessories
    individual_accessory = crypto_punks_df.at[row,"accessories"]
    
    # clean the string by removing special characters
    characters_to_remove="[']"
    for character in characters_to_remove:
        individual_accessory = individual_accessory.replace(character, "")
    
    # convert the string back to a list
    individual_list = list(individual_accessory.split(", "))
    
    # loop through the individual accessories associated with the punk_id
    for accessory in individual_list:
        
        # look for the index of the accessory in the accessories_df and add to the count
        accessory_index = accessories_df.index[accessories_df['Attribute']==accessory]
        accessories_df.at[accessory_index,'counts']=accessories_df.loc[accessory_index,'counts']+1
        
accessories_df

Unnamed: 0,Attribute,counts
0,Green Eye Shadow,271
1,Earring,2459
2,Blonde Bob,147
3,Smile,238
4,Mohawk,441
...,...,...
82,Orange Side,68
83,Wild Blonde,144
84,Buck Teeth,78
85,Top Hat,115


### 3. Create a Dataframe for the Number of Accessories

In [12]:
# Create a dataframe grouped by number of accessory counts
df['Attribute'] = df.accessories.apply(lambda x: len(x))
new_df = df.drop_duplicates("punk_id")['Attribute'].value_counts().rename_axis('Attribute').reset_index(name='counts')
number_accessories_df = new_df.sort_values(by='Attribute')
number_accessories_df.reset_index(drop=True, inplace=True)
number_accessories_df

Unnamed: 0,Attribute,counts
0,0,8
1,1,333
2,2,3560
3,3,4501
4,4,1420
5,5,166
6,6,11
7,7,1


In [13]:
for row in range(len(number_accessories_df)):
    number_accessories_df.loc[row,"Attribute"] = str(row) + " accessories"
    
number_accessories_df

Unnamed: 0,Attribute,counts
0,0 accessories,8
1,1 accessories,333
2,2 accessories,3560
3,3 accessories,4501
4,4 accessories,1420
5,5 accessories,166
6,6 accessories,11
7,7 accessories,1


### 4. Merge the Three Dataframes into One

In [14]:
# Inspect the datatypes for the 3 Dataframes for compatibility
types_df.dtypes

Attribute    object
counts        int64
dtype: object

In [15]:
accessories_df.dtypes

Attribute    object
counts        int64
dtype: object

In [16]:
number_accessories_df.dtypes

Attribute    object
counts        int64
dtype: object

In [17]:
# Merge the Dataframes
attributes_df = pd.concat([types_df,number_accessories_df,accessories_df])
# Reset the index
attributes_df.reset_index(drop=True, inplace=True)
attributes_df.head(30)

Unnamed: 0,Attribute,counts
0,Alien,9
1,Ape,24
2,Female,3840
3,Male,6039
4,Zombie,88
5,0 accessories,8
6,1 accessories,333
7,2 accessories,3560
8,3 accessories,4501
9,4 accessories,1420


## Export the Dataframe

### 1. Export as a csv

In [18]:
######################################################################
# EXPORT THE DATAFRAMES AS A CSV FILE
######################################################################

attributes_df.to_csv(r'Data/attributes.csv', index = False)

### 2. Export as a json file

### 3. Export as a MongoDB collection to ATLAS

In [19]:
# Make a connection to Mongo with MongoClient (use this URI to establish 
# connection with MongoDB Atlas)

# 1. Get user and password for Atlas cluster connection
from config import user, password

# 2. Establish which database in the Atlas cluster to connect to
my_database = "crypto_punks_mdb"

atlas_path = "mongodb+srv://"+user+":"+password+"@cluster0.wddnt.mongodb.net/"+my_database+"?retryWrites=true&w=majority"


In [20]:
# 3. Establish the connection as a pyMongo client using the
# path to the cluster provided by your Atlas account
atlas_client = pymongo.MongoClient(atlas_path)

# 4. Verify connection was established
print(atlas_client.test)

Database(MongoClient(host=['cluster0-shard-00-00.wddnt.mongodb.net:27017', 'cluster0-shard-00-02.wddnt.mongodb.net:27017', 'cluster0-shard-00-01.wddnt.mongodb.net:27017'], document_class=dict, tz_aware=False, connect=True, retrywrites=True, w='majority', authsource='admin', replicaset='atlas-8x2to2-shard-0', tls=True), 'test')


In [21]:
# Create an object to point to the database
crypto_punks_mdb = atlas_client[my_database]

In [22]:
# Create an empty list of dictionaries
attributes_list=[]

In [23]:
# Loop through the dataframe to populate the list while also adding the bitmap image
for row in range(len(attributes_df)):

    # Add the attributes data to the attributes_list as
    # a dictionary
    attributes_list.append({"Attribute": attributes_df.at[row,"Attribute"],
                         "counts": str(attributes_df.at[row,"counts"])
                        })

In [24]:
# Inspect the list
attributes_list

[{'Attribute': 'Alien', 'counts': '9'},
 {'Attribute': 'Ape', 'counts': '24'},
 {'Attribute': 'Female', 'counts': '3840'},
 {'Attribute': 'Male', 'counts': '6039'},
 {'Attribute': 'Zombie', 'counts': '88'},
 {'Attribute': '0 accessories', 'counts': '8'},
 {'Attribute': '1 accessories', 'counts': '333'},
 {'Attribute': '2 accessories', 'counts': '3560'},
 {'Attribute': '3 accessories', 'counts': '4501'},
 {'Attribute': '4 accessories', 'counts': '1420'},
 {'Attribute': '5 accessories', 'counts': '166'},
 {'Attribute': '6 accessories', 'counts': '11'},
 {'Attribute': '7 accessories', 'counts': '1'},
 {'Attribute': 'Green Eye Shadow', 'counts': '271'},
 {'Attribute': 'Earring', 'counts': '2459'},
 {'Attribute': 'Blonde Bob', 'counts': '147'},
 {'Attribute': 'Smile', 'counts': '238'},
 {'Attribute': 'Mohawk', 'counts': '441'},
 {'Attribute': 'Wild Hair', 'counts': '447'},
 {'Attribute': 'Nerd Glasses', 'counts': '572'},
 {'Attribute': 'Pipe', 'counts': '317'},
 {'Attribute': 'Big Shades', 

In [25]:
# Try adding the attributes_list as a collection to the database in a single read
crypto_punks_mdb.attributes_col.insert_many(attributes_list)

<pymongo.results.InsertManyResult at 0x145690580>

## END