# Traders Database Consolidation and Export to MongoDB Atlas

This notebook creates a consolidated MongoDB collection and exports it to our main MongoDB database store in MongoDB Atlas, 'crypto_punks_mdb':

* traders_col

### To manage the MongoDB Atlas database through MongoDB Compass, establish a connection using the following as the connection URI:

mongodb+srv://[user]:[password]@cluster0.wddnt.mongodb.net/[my_database]?retryWrites=true&w=majority"
    
Replace [user] and [password] for the cluster's user and password, and replace [my_database] with crypto_punks_mdb.


In [1]:
# Import main dependencies
import pandas as pd
import numpy as np
import json

# Import image handling libraries
from PIL import Image

# Import MongoDB libraries for database management 
import pymongo
from pymongo import MongoClient

import time

# Create a Connection to MongoDB Atlas

In [2]:
# Make a connection to Mongo with MongoClient (use this URI to establish 
# connection with MongoDB Atlas)

# 1. Get user and password for Atlas cluster connection
from config import user, password

# 2. Establish which database in the Atlas cluster to connect to
my_database = "crypto_punks_mdb"

atlas_path = "mongodb+srv://"+user+":"+password+"@cluster0.wddnt.mongodb.net/"+my_database+"?retryWrites=true&w=majority"



In [3]:
# 3. Establish the connection as a pyMongo client using the
# path to the cluster provided by your Atlas account
atlas_client = pymongo.MongoClient(atlas_path)

# 4. Verify connection was established
print(atlas_client.test)

Database(MongoClient(host=['cluster0-shard-00-02.wddnt.mongodb.net:27017', 'cluster0-shard-00-00.wddnt.mongodb.net:27017', 'cluster0-shard-00-01.wddnt.mongodb.net:27017'], document_class=dict, tz_aware=False, connect=True, retrywrites=True, w='majority', authsource='admin', replicaset='atlas-8x2to2-shard-0', tls=True), 'test')


In [4]:
# Create an object to point to the database
crypto_punks_mdb = atlas_client[my_database]

## Export the traders_col to Mongo DB Atlas

In [5]:
# Read the traders.csv as a dataframe
traders_df = pd.read_csv("Data/traders.csv")
traders_df

Unnamed: 0,index,trader_id,txn_type,punk_id,eth,date,timestamp,source
0,4950,007,Bid,521,0.10,2019-12-17 00:00:00,2019-12-17 00:00:00,larvalabs
1,4949,007,Bid Withdrawn,521,0.10,2020-01-02 00:00:00,2020-01-02 00:00:00,larvalabs
2,126547,007,Purchased,7750,0.70,2020-04-07 00:00:00,2020-04-07 00:00:00,larvalabs
3,126545,007,Sold,7750,0.85,2020-04-08 00:00:00,2020-04-08 00:00:00,larvalabs
4,160412,0age.eth,Bid,9626,12.34,2021-06-09 00:00:00,2021-06-09 00:00:00,larvalabs
...,...,...,...,...,...,...,...,...
132521,118768,🤹🏻‍♂.e…,Bid,7334,89.00,2021-09-07 00:00:00,2021-09-07 00:00:00,larvalabs
132522,118764,🤹🏻‍♂.e…,Bid Withdrawn,7334,111.00,2021-09-08 00:00:00,2021-09-08 00:00:00,larvalabs
132523,118765,🤹🏻‍♂.e…,Bid,7334,111.00,2021-09-08 00:00:00,2021-09-08 00:00:00,larvalabs
132524,51870,🦊🦄👩‍?…,Purchased,3520,32.00,2021-07-31 00:00:00,2021-07-31 00:00:00,larvalabs


In [6]:
# Inspect data types for compatibility with MongoDB
traders_df.dtypes

index          int64
trader_id     object
txn_type      object
punk_id        int64
eth          float64
date          object
timestamp     object
source        object
dtype: object

In [7]:
# Change incompatible data types
traders_df = traders_df.astype(str)
traders_df.dtypes

index        object
trader_id    object
txn_type     object
punk_id      object
eth          object
date         object
timestamp    object
source       object
dtype: object

In [8]:
# Reinspect the dataframe
traders_df

Unnamed: 0,index,trader_id,txn_type,punk_id,eth,date,timestamp,source
0,4950,007,Bid,521,0.1,2019-12-17 00:00:00,2019-12-17 00:00:00,larvalabs
1,4949,007,Bid Withdrawn,521,0.1,2020-01-02 00:00:00,2020-01-02 00:00:00,larvalabs
2,126547,007,Purchased,7750,0.7000000000000001,2020-04-07 00:00:00,2020-04-07 00:00:00,larvalabs
3,126545,007,Sold,7750,0.85,2020-04-08 00:00:00,2020-04-08 00:00:00,larvalabs
4,160412,0age.eth,Bid,9626,12.34,2021-06-09 00:00:00,2021-06-09 00:00:00,larvalabs
...,...,...,...,...,...,...,...,...
132521,118768,🤹🏻‍♂.e…,Bid,7334,89.0,2021-09-07 00:00:00,2021-09-07 00:00:00,larvalabs
132522,118764,🤹🏻‍♂.e…,Bid Withdrawn,7334,111.0,2021-09-08 00:00:00,2021-09-08 00:00:00,larvalabs
132523,118765,🤹🏻‍♂.e…,Bid,7334,111.0,2021-09-08 00:00:00,2021-09-08 00:00:00,larvalabs
132524,51870,🦊🦄👩‍?…,Purchased,3520,32.0,2021-07-31 00:00:00,2021-07-31 00:00:00,larvalabs


In [9]:
# Create an empty list of dictionaries
traders_list=[]

In [10]:
# Loop through the dataframe to populate the list while also adding the bitmap image
for row in range(len(traders_df)):

    # Add the trader data to the traders_list as
    # a dictionary
    traders_list.append({"txn_index": traders_df.at[row,"index"],
                         "trader_id": traders_df.at[row,"trader_id"],
                         "txn_type": traders_df.at[row,"txn_type"],
                         "punk_id": traders_df.at[row,"punk_id"],
                         "eth": traders_df.at[row,"eth"],
                         "date": traders_df.at[row,"date"],
                         "timestamp": traders_df.at[row,"timestamp"],
                         "source": traders_df.at[row,"source"]
                        })

In [11]:
# Inspect the first record of the list
traders_list[0]

{'txn_index': '4950',
 'trader_id': '007',
 'txn_type': 'Bid',
 'punk_id': '521',
 'eth': '0.1',
 'date': '2019-12-17 00:00:00',
 'timestamp': '2019-12-17 00:00:00',
 'source': 'larvalabs'}

In [12]:
# Try adding the traders as a collection to the database in a single read
crypto_punks_mdb.traders_col.insert_many(traders_list)

AutoReconnect: cluster0-shard-00-02.wddnt.mongodb.net:27017: [Errno 32] Broken pipe

In [13]:
# Add the traders as a collection to the database 10,000 records at a time

for runs in range(133):
    start_record = runs*1000
    end_record = 1000 + runs*1000
    if end_record >= 132527:
        end_record = 132526
    print("processing record: ",start_record,"  to: ",end_record)

    # Select the batch to be loaded
    list_to_load = traders_list[start_record:end_record]
  
    # upload the batch to the database collection
    crypto_punks_mdb.traders_col.insert_many(list_to_load)

# Report when done
print ("Export completed ----------------------------")

processing record:  0   to:  1000
processing record:  1000   to:  2000
processing record:  2000   to:  3000
processing record:  3000   to:  4000
processing record:  4000   to:  5000
processing record:  5000   to:  6000
processing record:  6000   to:  7000
processing record:  7000   to:  8000
processing record:  8000   to:  9000
processing record:  9000   to:  10000
processing record:  10000   to:  11000
processing record:  11000   to:  12000
processing record:  12000   to:  13000
processing record:  13000   to:  14000
processing record:  14000   to:  15000
processing record:  15000   to:  16000
processing record:  16000   to:  17000
processing record:  17000   to:  18000
processing record:  18000   to:  19000
processing record:  19000   to:  20000
processing record:  20000   to:  21000
processing record:  21000   to:  22000
processing record:  22000   to:  23000
processing record:  23000   to:  24000
processing record:  24000   to:  25000
processing record:  25000   to:  26000
processi

# END