In [73]:
import pandas as pd
import os

# Set display options to show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Get a list of all CSV files
csv_files = [os.path.join('data', file) for file in os.listdir('data')]

# Create an empty dictionary that will consist of crypto dataframes
dataframes = {}

# Loop through each CSV file and create a dataframe
for file in csv_files:
    # Extract dataframe name from file name (without the extension)
    df_name = os.path.splitext(os.path.basename(file))[0]
    # Read the CSV file into a dataframe and store it in the dictionary
    dataframes[df_name] = pd.read_csv(file, encoding='latin1')  # Specify encoding here

In [74]:
dataframes['BNB']

Unnamed: 0,Date,Open,High,Low,Close,Volume,Currency
0,2017-11-09,2.05314,2.17423,1.89394,1.99077,19192200.0,USD
1,2017-11-10,2.00773,2.06947,1.64478,1.79684,11155000.0,USD
2,2017-11-11,1.78628,1.91775,1.61429,1.67047,8178150.0,USD
3,2017-11-12,1.66889,1.6728,1.46256,1.51969,15298700.0,USD
4,2017-11-13,1.52601,1.73502,1.5176,1.68662,12238800.0,USD
5,2017-11-14,1.68928,1.73537,1.56827,1.59258,7829600.0,USD
6,2017-11-15,1.58777,1.62498,1.4955,1.53045,7615500.0,USD
7,2017-11-16,1.55619,1.68222,1.53689,1.57792,8928640.0,USD
8,2017-11-17,1.58005,1.58266,1.4953,1.51036,8508840.0,USD
9,2017-11-18,1.51136,1.67678,1.49823,1.67433,32167300.0,USD


In [89]:
# Create an empty dictionary to store filtered dataframes
filtered_dataframes = {}

# Iterate through each dataframe in the original dictionary
for df_name, df in dataframes.items():
    # Check if the last row of the first column is equal to '2022-08-23'
    if not df.empty and df.iloc[-1, 0] == '2022-08-23':
        # If the condition is met, add the dataframe to the filtered dictionary
        filtered_dataframes[df_name] = df

# Now, the filtered_dataframes dictionary contains only the dataframes that meet the condition

In [90]:
filtered_dataframes.keys()

dict_keys(['Celsius', 'IOTA', 'Axie Infinity', 'chainlink', 'PAX Gold', 'Hedera', 'bitcoin cash', 'Kava', 'Elrond', 'Helium', 'Internet Computer', 'STEPN', 'xrp', 'Theta Network', 'Bitcoin Gold', 'Gnosis', 'near protocol', 'Synthetix', 'monero', 'eCash', 'Flow', 'KuCoin Token', 'Celo', 'Zilliqa', 'Convex Finance', 'Holo', 'BitTorrent (New)', 'Waves', 'uniswap', 'Neutrino USD', 'Quant', 'polkadot', 'ethereum classic', 'Loopring', 'Aave', 'Zcash', 'The Sandbox', 'Basic Attention Token', 'dogecoin', 'Klaytn', 'Binance USD', 'Kusama', 'Shiba Inu', 'Neo', 'USDD', 'unus sed leo', 'NEM', 'Lido DAO', 'Decentraland', 'Cronos', 'stellar', 'Algorand', 'ApeCoin', 'Arweave', 'EOS', 'PancakeSwap', 'Mina', 'Curve DAO Token', 'Bitcoin SV', 'tether', 'Decred', 'Pax Dollar', 'THORChain', 'polygon', 'wrapped bitcoin', 'ethereum', 'Bitcoin BEP2', 'VeChain', '1inch', 'Fei USD', 'litecoin', 'Qtum', 'Trust Wallet Token', 'bitcoin', 'cardano', 'Stacks', 'Dash', 'Enjin Coin', 'usd coin', 'Tezos', 'Huobi Token'

In [91]:
len(filtered_dataframes.keys())

94

In [92]:
for df_name, df in dataframes.items():
    start_date = df.iloc[1, 0]
    end_date = df.iloc[-1, 0]
    print(df_name)
    print(f"Start date = {start_date}")
    print(f"End date = {end_date}")

Celsius
Start date = 2018-10-02
End date = 2022-08-23
IOTA
Start date = 2017-11-10
End date = 2022-08-23
Axie Infinity
Start date = 2020-11-05
End date = 2022-08-23
chainlink
Start date = 2018-02-03
End date = 2022-08-23
PAX Gold
Start date = 2019-09-27
End date = 2022-08-23
Hedera
Start date = 2019-09-18
End date = 2022-08-23
bitcoin cash
Start date = 2017-08-04
End date = 2022-08-23
Kava
Start date = 2019-10-26
End date = 2022-08-23
Elrond
Start date = 2020-09-05
End date = 2022-08-23
Helium
Start date = 2020-06-09
End date = 2022-08-23
Internet Computer
Start date = 2021-05-11
End date = 2022-08-23
STEPN
Start date = 2021-04-14
End date = 2022-08-23
.DS_Store
Start date = nan
End date = nan
xrp
Start date = 2015-01-23
End date = 2022-08-23
Theta Network
Start date = 2018-01-18
End date = 2022-08-23
Bitcoin Gold
Start date = 2017-11-10
End date = 2022-08-23
Gnosis
Start date = 2017-11-10
End date = 2022-08-23
near protocol
Start date = 2020-10-16
End date = 2022-08-23
Synthetix
Start

In [96]:
# Initialize a counter for the number of start dates
count_2019 = 0
count_2020 = 0
count_2021 = 0
count_2022 = 0

# Iterate through each dataframe
for df_name, df in dataframes.items():
    start_date = str(df.iloc[1, 0])
    if start_date.split('-')[0] == '2019':
        count_2019 += 1
    elif start_date.split('-')[0] == '2020':
        count_2020 += 1
    elif start_date.split('-')[0] == '2021':
        count_2021 += 1
    elif start_date.split('-')[0] == '2022':
        count_2022 += 1

# Print the count of start dates in 2020
print("Number of start dates in 2019:", count_2019)
print("Number of start dates in 2020:", count_2020)
print("Number of start dates in 2021:", count_2021)
print("Number of start dates in 2022:", count_2022)


Number of start dates in 2019: 15
Number of start dates in 2020: 21
Number of start dates in 2021: 8
Number of start dates in 2022: 1


In [98]:
# Create an empty dictionary to store filtered dataframes
filtered1_dataframes = {}

# Iterate through each dataframe in the filtered dictionary
for df_name, df in dataframes.items():
    # Get the start date from the second row (index 1) of the first column (index 0)
    start_date = df.iloc[1, 0]
    # Check if the start date is a valid numeric value and not missing (NaN)
    if pd.notna(start_date) and start_date.split('-')[0].isdigit():
        # Convert the start date to an integer and check if it's older than 2020
        if int(start_date.split('-')[0]) < 2020:
            # If the condition is met, add the dataframe to the filtered1 dictionary
            filtered1_dataframes[df_name] = df


In [99]:
len(filtered1_dataframes.items())

64

In [100]:
filtered1_dataframes['BNB']

Unnamed: 0,Date,Open,High,Low,Close,Volume,Currency
0,2017-11-09,2.05314,2.17423,1.89394,1.99077,19192200.0,USD
1,2017-11-10,2.00773,2.06947,1.64478,1.79684,11155000.0,USD
2,2017-11-11,1.78628,1.91775,1.61429,1.67047,8178150.0,USD
3,2017-11-12,1.66889,1.6728,1.46256,1.51969,15298700.0,USD
4,2017-11-13,1.52601,1.73502,1.5176,1.68662,12238800.0,USD
5,2017-11-14,1.68928,1.73537,1.56827,1.59258,7829600.0,USD
6,2017-11-15,1.58777,1.62498,1.4955,1.53045,7615500.0,USD
7,2017-11-16,1.55619,1.68222,1.53689,1.57792,8928640.0,USD
8,2017-11-17,1.58005,1.58266,1.4953,1.51036,8508840.0,USD
9,2017-11-18,1.51136,1.67678,1.49823,1.67433,32167300.0,USD


In [107]:
# Iterate through each dataframe in the filtered1_dataframes dictionary
for df_name, df in filtered1_dataframes.items():
    # Find the index of the row where the first column is equal to '2020-01-01'
    index_2020 = df[df.iloc[:, 0] == '2020-01-01'].index
    if not index_2020.empty:
        # Slice the dataframe to remove all rows before the row with index index_2020
        filtered1_dataframes[df_name] = df.loc[index_2020[0]:]


In [108]:
for df_name, df in filtered1_dataframes.items():
    start_date = df.iloc[1, 0]
    end_date = df.iloc[-1, 0]
    print(df_name)
    print(f"Start date = {start_date}")
    print(f"End date = {end_date}")

Celsius
Start date = 2020-01-02
End date = 2022-08-23
IOTA
Start date = 2020-01-02
End date = 2022-08-23
chainlink
Start date = 2020-01-02
End date = 2022-08-23
PAX Gold
Start date = 2020-01-02
End date = 2022-08-23
Hedera
Start date = 2020-01-02
End date = 2022-08-23
bitcoin cash
Start date = 2020-01-02
End date = 2022-08-23
Kava
Start date = 2020-01-02
End date = 2022-08-23
xrp
Start date = 2020-01-02
End date = 2022-08-23
Theta Network
Start date = 2020-01-02
End date = 2022-08-23
Bitcoin Gold
Start date = 2020-01-02
End date = 2022-08-23
Gnosis
Start date = 2020-01-02
End date = 2022-08-23
Synthetix
Start date = 2020-01-02
End date = 2022-08-23
monero
Start date = 2020-01-02
End date = 2022-08-23
KuCoin Token
Start date = 2020-01-02
End date = 2022-08-23
Zilliqa
Start date = 2020-01-02
End date = 2022-08-23
Holo
Start date = 2020-01-02
End date = 2022-08-23
BitTorrent (New)
Start date = 2020-01-02
End date = 2022-08-23
Waves
Start date = 2020-01-02
End date = 2022-08-23
Quant
Start

In [109]:
ready_dataframes = filtered1_dataframes

In [110]:
ready_dataframes['BNB']

Unnamed: 0,Date,Open,High,Low,Close,Volume,Currency
783,2020-01-01,13.730962,13.873946,13.654942,13.689083,172980700.0,USD
784,2020-01-02,13.698126,13.715548,12.989974,13.027011,156376400.0,USD
785,2020-01-03,13.035329,13.763709,13.012638,13.660452,173683900.0,USD
786,2020-01-04,13.667442,13.921914,13.560008,13.891512,182230400.0,USD
787,2020-01-05,13.88834,14.410801,13.886547,14.111019,202552700.0,USD
788,2020-01-06,14.11124,15.003565,14.11124,14.957808,224800400.0,USD
789,2020-01-07,14.966209,15.135275,14.568403,15.00925,191948600.0,USD
790,2020-01-08,15.022264,15.242219,14.337952,14.602934,183632200.0,USD
791,2020-01-09,14.589202,14.611089,14.305363,14.498314,169699800.0,USD
792,2020-01-10,14.478409,15.027689,14.182085,15.025826,200173100.0,USD


In [111]:
ready_dataframes['IOTA']

Unnamed: 0,Date,Open,High,Low,Close,Volume,Currency
783,2020-01-01,0.160043,0.164204,0.159054,0.161058,3981429.0,USD
784,2020-01-02,0.161114,0.1615,0.157615,0.158436,3753023.0,USD
785,2020-01-03,0.158446,0.167603,0.156091,0.166859,4989336.0,USD
786,2020-01-04,0.167035,0.174714,0.16491,0.174213,5312939.0,USD
787,2020-01-05,0.174444,0.175379,0.169002,0.173044,5642017.0,USD
788,2020-01-06,0.173108,0.185326,0.172767,0.182896,9109465.0,USD
789,2020-01-07,0.183048,0.197619,0.183048,0.185702,10842185.0,USD
790,2020-01-08,0.18613,0.190445,0.178838,0.1811,8308095.0,USD
791,2020-01-09,0.181139,0.182257,0.176155,0.178258,5403819.0,USD
792,2020-01-10,0.178181,0.189612,0.171467,0.189612,7247988.0,USD
