## Warning: Do not run the code in this Jupyter notebook! The code here is solely for the purpose of demonstrating how I processed the crypto CSV data and uploaded it to MongoDB Atlas. If code here be executed again, it will result in the upload of duplicate data to the database, which could affect the outcomes of the project.

# 0. Import packages, Paths etc

In [1]:
!pip3 install pip install pymongo




In [2]:
# Import packages
import os
import numpy as np
import pandas as pd
from pymongo import MongoClient


In [3]:
# Path to crypto data folder that stored in csv format
path_to_crypto_folder = "./crypto-dataset"
# List of csv crypto data file
path_to_crypto_csv = []

# Use os.listdir to get a list of files in the directory
for filename in os.listdir(path_to_crypto_folder):
    if filename.endswith(".csv"):  # Check for CSV files
        file_path = os.path.join(path_to_crypto_folder, filename)  # Create full file path
        path_to_crypto_csv.append(file_path)  # Add to the list
print(path_to_crypto_csv)

['./crypto-dataset/coin_NEM.csv', './crypto-dataset/coin_EOS.csv', './crypto-dataset/coin_Monero.csv', './crypto-dataset/coin_Polkadot.csv', './crypto-dataset/coin_USDCoin.csv', './crypto-dataset/coin_Uniswap.csv', './crypto-dataset/coin_BinanceCoin.csv', './crypto-dataset/coin_Iota.csv', './crypto-dataset/coin_Aave.csv', './crypto-dataset/coin_Solana.csv', './crypto-dataset/coin_Bitcoin.csv', './crypto-dataset/coin_Cardano.csv', './crypto-dataset/coin_Tether.csv', './crypto-dataset/coin_Cosmos.csv', './crypto-dataset/coin_ChainLink.csv', './crypto-dataset/coin_Litecoin.csv', './crypto-dataset/coin_XRP.csv', './crypto-dataset/coin_Ethereum.csv', './crypto-dataset/coin_Tron.csv', './crypto-dataset/coin_Stellar.csv', './crypto-dataset/coin_CryptocomCoin.csv', './crypto-dataset/coin_Dogecoin.csv', './crypto-dataset/coin_WrappedBitcoin.csv']


# 1. Data Pre-processing

In [4]:
""" 
Convert all crypto data from csv to panda format, only keep last 31 days data 
and them put them into a list
"""
crypto_pandas = []
for i in path_to_crypto_csv:
    crypto_pandas.append(pd.read_csv(i).tail(31))


In [5]:
"""
Check Item:
1. The start and end points of the last 30 data entries for all 
cryptocurrencies are consistent.
2.There are not any missing values or values equal to zero.
"""
for i in crypto_pandas:
    assert i['Date'].iloc[0] == "2021-06-06 23:59:59"
    assert i['Date'].iloc[-1] == "2021-07-06 23:59:59"
    assert ((i != 0) & i.notna()).all().all() == True
print("All good in data!")

All good in data!


In [6]:
# MongoDB is case-sensitive so here to change columns name to all lower-case
for i in crypto_pandas:
    i.rename(columns={
    'Name': 'name',
    'Symbol': 'symbol',
    'Date': 'date',
    'High': 'high',
    'Low': 'low',
    'Open': 'open',
    'Close': 'close',
    'Volume': 'volume',
    'Marketcap': 'marketcap'
}, inplace=True)
    # Drop useless SNo column
    i.drop(['SNo'], axis=1, inplace=True)
    # Convert date from string to timestamp
    i['date'] = pd.to_datetime(i['date'])

In [7]:
crypto_pandas[0].head()

Unnamed: 0,name,symbol,date,high,low,open,close,volume,marketcap
2257,NEM,XEM,2021-06-06 23:59:59,0.190063,0.179704,0.180779,0.187718,84255611.92,1689458000.0
2258,NEM,XEM,2021-06-07 23:59:59,0.192372,0.170055,0.18772,0.170997,87266392.14,1538975000.0
2259,NEM,XEM,2021-06-08 23:59:59,0.173177,0.153965,0.171363,0.165499,94018710.02,1489493000.0
2260,NEM,XEM,2021-06-09 23:59:59,0.175315,0.158113,0.16476,0.173861,98169054.17,1564746000.0
2261,NEM,XEM,2021-06-10 23:59:59,0.177479,0.166559,0.174331,0.170198,79409647.01,1531782000.0


# 2. Upload data to mongoDB atlas

In [8]:
# Connect to MongoDB altas
# !!! Here is not a good practice as it will leak my password of db.  
# !!! I do it in this way because it is just a take home project, 
# !!! I will hide this from github after project be reviewed
client = MongoClient('mongodb+srv://yemengchen:yemengchen@crypto.20dhdye.mongodb.net/?retryWrites=true&w=majority')


In [9]:
# set cryptos collection to variable 
db = client["CryptoAnalysis"]
collection = db["cryptos"] 

In [10]:
for i in crypto_pandas:
    # upload datas to MongoDB altas
    data = i.to_dict('records')
    collection.insert_many(data)

# Finish!