In [1]:
#initial imports
import pandas as pd
import glob
import os
import numpy as np
import datetime as dt

In [2]:
#list all crypto csv files only in directory
#taken from https://medium.com/@stella96joshua/how-to-combine-multiple-csv-files-using-python-for-your-analysis-a88017c6ff9e
file_list = glob.glob("Data\*.{}".format("csv"))
file_list

['Data\\binancecoin.csv',
 'Data\\bitcoin.csv',
 'Data\\cardano.csv',
 'Data\\dogecoin.csv',
 'Data\\ethereum.csv']

In [3]:
#append all 5 crypto csvs into one master csv
full_crypto_df = pd.DataFrame()

for file in file_list:
    df_temp = pd.read_csv(file)
    full_crypto_df = full_crypto_df.append(df_temp, ignore_index = True)

full_crypto_df

Unnamed: 0,date,price,total_volume,market_cap,coin_name
0,2017-09-16 00:00:00.000,0.107251,1.051223e+00,1.072506e+07,binancecoin
1,2017-09-17 00:00:00.000,0.154041,1.467859e+01,1.540413e+07,binancecoin
2,2017-09-18 00:00:00.000,0.173491,6.001767e+00,1.734912e+07,binancecoin
3,2017-09-19 00:00:00.000,0.168334,3.878927e+00,1.683342e+07,binancecoin
4,2017-09-20 00:00:00.000,0.166628,4.068762e+01,1.666279e+07,binancecoin
...,...,...,...,...,...
12477,2023-01-17 00:00:00.000,1577.725890,9.434571e+09,1.901230e+11,ethereum
12478,2023-01-18 00:00:00.000,1569.530833,8.482482e+09,1.889827e+11,ethereum
12479,2023-01-19 00:00:00.000,1516.555475,1.180948e+10,1.835179e+11,ethereum
12480,2023-01-20 00:00:00.000,1550.508888,6.871703e+09,1.869673e+11,ethereum


In [4]:
#check data types of cols in dataframe
full_crypto_df.dtypes

date             object
price           float64
total_volume    float64
market_cap      float64
coin_name        object
dtype: object

In [5]:
#convert date column to datetime data type
full_crypto_df["date"] = pd.to_datetime(full_crypto_df["date"])
full_crypto_df

Unnamed: 0,date,price,total_volume,market_cap,coin_name
0,2017-09-16,0.107251,1.051223e+00,1.072506e+07,binancecoin
1,2017-09-17,0.154041,1.467859e+01,1.540413e+07,binancecoin
2,2017-09-18,0.173491,6.001767e+00,1.734912e+07,binancecoin
3,2017-09-19,0.168334,3.878927e+00,1.683342e+07,binancecoin
4,2017-09-20,0.166628,4.068762e+01,1.666279e+07,binancecoin
...,...,...,...,...,...
12477,2023-01-17,1577.725890,9.434571e+09,1.901230e+11,ethereum
12478,2023-01-18,1569.530833,8.482482e+09,1.889827e+11,ethereum
12479,2023-01-19,1516.555475,1.180948e+10,1.835179e+11,ethereum
12480,2023-01-20,1550.508888,6.871703e+09,1.869673e+11,ethereum


In [6]:
#check if date column has changed from object to datetime and in a format we want
full_crypto_df.dtypes

date            datetime64[ns]
price                  float64
total_volume           float64
market_cap             float64
coin_name               object
dtype: object

In [7]:
#filter the dataframe to only get our 5 years of data - Jan 1, 2018 to Dec 31, 2022 - and reseting the index
crypto_df = full_crypto_df[(full_crypto_df["date"] > "2018-01-01") & (full_crypto_df["date"] < "2022-12-31")]
crypto_df.reset_index(inplace=True)
crypto_df

Unnamed: 0,index,date,price,total_volume,market_cap,coin_name
0,106,2018-01-02,9.090393,6.127175e+07,9.000762e+08,binancecoin
1,107,2018-01-03,9.886323,5.327417e+07,9.788844e+08,binancecoin
2,108,2018-01-04,9.675758,9.342065e+07,9.580355e+08,binancecoin
3,109,2018-01-05,16.488523,3.374991e+08,1.632595e+09,binancecoin
4,110,2018-01-06,24.369905,6.357853e+08,2.412962e+09,binancecoin
...,...,...,...,...,...,...
9115,12455,2022-12-26,1219.286343,3.694243e+09,1.468407e+11,ethereum
9116,12456,2022-12-27,1226.253415,3.071222e+09,1.476973e+11,ethereum
9117,12457,2022-12-28,1211.819562,4.221451e+09,1.460305e+11,ethereum
9118,12458,2022-12-29,1188.728664,5.177421e+09,1.432418e+11,ethereum


In [8]:
#export the dataframe to a csv as one master file
crypto_df.to_csv("Data\crypto.csv")