In [1]:
# Initial Imports
import pandas as pd
from pathlib import Path

# New Library for reading many csv files into dataframe
import glob

In [2]:
# Use the `read_csv` function to create Bitcoin data Pandas DataFrame
btc_df = pd.read_csv(
    Path("raw_csv/BTC.csv"),
    header=1,
    index_col="date", 
    parse_dates=True,
    infer_datetime_format=True
)

In [3]:
# Drop unix column from dataframe
btc_df = btc_df.drop(columns=['unix',])

In [4]:
btc_df

Unnamed: 0_level_0,symbol,open,high,low,close,Volume USD,Volume BTC
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2022-08-04,BTC/USD,22813.213072,23006.000000,22792.00,22969.000000,1.247936e+06,54.331300
2022-08-03,BTC/USD,22982.000000,23634.097482,22680.00,22813.873707,1.083055e+08,4747.351281
2022-08-02,BTC/USD,23256.000000,23466.000000,22676.00,22986.000000,9.698831e+07,4219.451219
2022-08-01,BTC/USD,23291.000000,23499.338920,22879.00,23256.431307,1.297409e+08,5578.709695
2022-07-31,BTC/USD,23640.267907,24211.000000,23233.00,23294.000000,7.329680e+07,3146.595788
...,...,...,...,...,...,...,...
2015-02-13,BTC/USD,222.390000,241.760000,221.46,236.000000,6.806973e+06,29079.110000
2015-02-12,BTC/USD,219.190000,223.200000,217.87,222.600000,2.468883e+06,11190.650000
2015-02-11,BTC/USD,220.960000,224.400000,218.10,219.190000,4.268987e+06,19289.280000
2015-02-10,BTC/USD,220.610000,223.880000,214.00,220.960000,6.402351e+06,29268.950000


In [5]:
# Identify Series count
btc_df.count()

symbol        2734
open          2734
high          2734
low           2734
close         2734
Volume USD    2734
Volume BTC    2734
dtype: int64

In [6]:
# Get info
# Compare non-null to series count 
# Check data types
btc_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2734 entries, 2022-08-04 to 2015-02-09
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   symbol      2734 non-null   object 
 1   open        2734 non-null   float64
 2   high        2734 non-null   float64
 3   low         2734 non-null   float64
 4   close       2734 non-null   float64
 5   Volume USD  2734 non-null   float64
 6   Volume BTC  2734 non-null   float64
dtypes: float64(6), object(1)
memory usage: 170.9+ KB


In [7]:
btc_df.describe()

Unnamed: 0,open,high,low,close,Volume USD,Volume BTC
count,2734.0,2734.0,2734.0,2734.0,2734.0,2734.0
mean,13166.264817,13543.787789,12737.95812,13174.623532,149432300.0,18007.853155
std,16704.993587,17165.139255,16168.328213,16704.571263,214931800.0,20351.687123
min,212.63,223.2,162.0,212.63,0.0,0.0
25%,816.105,831.4825,789.9625,817.76,15029750.0,5526.415
50%,7042.4,7292.35,6836.59,7049.55,66237580.0,10708.797379
75%,13263.0,13819.25,12726.5,13348.75,197447200.0,22626.6075
max,67526.0,68958.0,66326.0,67526.0,2491339000.0,266768.61


In [8]:
# Save btc_df as Bitcoin_Data.csv

#btc_df.to_csv('Bitcoin_Data.csv')

In [9]:
# glob.glob('raw_csv*.csv') - returns List[str] of all CSV files in raw_csv directory
# pd.read_csv(f) - returns pd.DataFrame() 
# for f in glob.glob() - returns a List[DataFrames]
# pd.concat() - returns one pd.DataFrame()
df = pd.concat([pd.read_csv(f, header=1,
               index_col="date", 
               parse_dates=True,
               infer_datetime_format=True ) for f in glob.glob('raw_csv/*.csv')])

In [10]:
# Create new dataframe called coins_master which only includes specified columns
coins_master = df[['symbol', 'open', 'high', 'low', 'close', 'Volume USD']]

In [11]:
coins_master

Unnamed: 0_level_0,symbol,open,high,low,close,Volume USD
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-04-22,UOP/USD,0.027273,0.030710,0.027273,0.029466,149.317976
2022-04-21,UOP/USD,0.027258,0.027288,0.026666,0.027288,10.502465
2022-04-20,UOP/USD,0.029777,0.030400,0.026355,0.026977,120.202376
2022-04-19,UOP/USD,0.029155,0.030400,0.029155,0.030400,18.338419
2022-04-18,UOP/USD,0.029155,0.029777,0.028844,0.029777,28.100589
...,...,...,...,...,...,...
2021-12-12,BNT/USD,3.368700,3.368700,3.368700,3.368700,77.830176
2021-12-11,BNT/USD,3.269400,3.331200,3.269400,3.331200,310.402763
2021-12-10,BNT/USD,3.302100,3.302100,3.302100,3.302100,77.826952
2021-12-09,BNT/USD,3.687700,3.688700,3.390400,3.390400,1074.468455


In [12]:
# Identify Series count
coins_master.count()

symbol        61779
open          61779
high          61779
low           61779
close         61779
Volume USD    61779
dtype: int64

In [13]:
# Get info
# Compare non-null to series count 
# Check data types
coins_master.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 61779 entries, 2022-04-22 to 2021-12-08
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   symbol      61779 non-null  object 
 1   open        61779 non-null  float64
 2   high        61779 non-null  float64
 3   low         61779 non-null  float64
 4   close       61779 non-null  float64
 5   Volume USD  61779 non-null  float64
dtypes: float64(5), object(1)
memory usage: 3.3+ MB


In [14]:
coins_master.describe()

Unnamed: 0,open,high,low,close,Volume USD
count,61779.0,61779.0,61779.0,61779.0,61779.0
mean,1359.626205,1405.356335,1308.101834,1357.366187,12269220.0
std,9606.520005,9935.232651,9227.865369,9577.831832,62791150.0
min,0.0,0.0,0.0,0.0,0.0
25%,0.19396,0.201875,0.1864,0.1932,7756.269
50%,1.38,1.4484,1.31,1.3757,97116.57
75%,20.396,21.5,19.3965,20.354,1529422.0
max,320000.0,320000.0,320000.0,320000.0,2491339000.0


In [15]:
# Save coins_master dataframe as Coins_Data_Master.csv

#coins_master.to_csv('Coins_Data_Master.csv')