In [73]:
import pandas as pd
import os

# Set display options to show all rows and columns
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

# Get a list of all CSV files
csv_files = [os.path.join('data', file) for file in os.listdir('data')]

# Create an empty dictionary that will consist of crypto dataframes
dataframes = {}

# Loop through each CSV file and create a dataframe
for file in csv_files:
    # Extract dataframe name from file name (without the extension)
    df_name = os.path.splitext(os.path.basename(file))[0]
    # Read the CSV file into a dataframe and store it in the dictionary
    dataframes[df_name] = pd.read_csv(file, encoding='latin1')  # Specify encoding here

In [None]:
dataframes['BNB']

In [89]:
# Create an empty dictionary to store filtered dataframes
filtered_dataframes = {}

# Iterate through each dataframe in the original dictionary
for df_name, df in dataframes.items():
    # Check if the last row of the first column is equal to '2022-08-23'
    if not df.empty and df.iloc[-1, 0] == '2022-08-23':
        # If the condition is met, add the dataframe to the filtered dictionary
        filtered_dataframes[df_name] = df

# Now, the filtered_dataframes dictionary contains only the dataframes that meet the condition

In [None]:
filtered_dataframes.keys()

In [91]:
len(filtered_dataframes.keys())

94

In [92]:
for df_name, df in dataframes.items():
    start_date = df.iloc[1, 0]
    end_date = df.iloc[-1, 0]
    print(df_name)
    print(f"Start date = {start_date}")
    print(f"End date = {end_date}")

Celsius
Start date = 2018-10-02
End date = 2022-08-23
IOTA
Start date = 2017-11-10
End date = 2022-08-23
Axie Infinity
Start date = 2020-11-05
End date = 2022-08-23
chainlink
Start date = 2018-02-03
End date = 2022-08-23
PAX Gold
Start date = 2019-09-27
End date = 2022-08-23
Hedera
Start date = 2019-09-18
End date = 2022-08-23
bitcoin cash
Start date = 2017-08-04
End date = 2022-08-23
Kava
Start date = 2019-10-26
End date = 2022-08-23
Elrond
Start date = 2020-09-05
End date = 2022-08-23
Helium
Start date = 2020-06-09
End date = 2022-08-23
Internet Computer
Start date = 2021-05-11
End date = 2022-08-23
STEPN
Start date = 2021-04-14
End date = 2022-08-23
.DS_Store
Start date = nan
End date = nan
xrp
Start date = 2015-01-23
End date = 2022-08-23
Theta Network
Start date = 2018-01-18
End date = 2022-08-23
Bitcoin Gold
Start date = 2017-11-10
End date = 2022-08-23
Gnosis
Start date = 2017-11-10
End date = 2022-08-23
near protocol
Start date = 2020-10-16
End date = 2022-08-23
Synthetix
Start

In [96]:
# Initialize a counter for the number of start dates
count_2019 = 0
count_2020 = 0
count_2021 = 0
count_2022 = 0

# Iterate through each dataframe
for df_name, df in dataframes.items():
    start_date = str(df.iloc[1, 0])
    if start_date.split('-')[0] == '2019':
        count_2019 += 1
    elif start_date.split('-')[0] == '2020':
        count_2020 += 1
    elif start_date.split('-')[0] == '2021':
        count_2021 += 1
    elif start_date.split('-')[0] == '2022':
        count_2022 += 1

# Print the count of start dates in 2020
print("Number of start dates in 2019:", count_2019)
print("Number of start dates in 2020:", count_2020)
print("Number of start dates in 2021:", count_2021)
print("Number of start dates in 2022:", count_2022)


Number of start dates in 2019: 15
Number of start dates in 2020: 21
Number of start dates in 2021: 8
Number of start dates in 2022: 1


In [98]:
# Create an empty dictionary to store filtered dataframes
filtered1_dataframes = {}

# Iterate through each dataframe in the filtered dictionary
for df_name, df in dataframes.items():
    # Get the start date from the second row (index 1) of the first column (index 0)
    start_date = df.iloc[1, 0]
    # Check if the start date is a valid numeric value and not missing (NaN)
    if pd.notna(start_date) and start_date.split('-')[0].isdigit():
        # Convert the start date to an integer and check if it's older than 2020
        if int(start_date.split('-')[0]) < 2020:
            # If the condition is met, add the dataframe to the filtered1 dictionary
            filtered1_dataframes[df_name] = df


In [99]:
len(filtered1_dataframes.items())

64

In [None]:
filtered1_dataframes['BNB']

In [107]:
# Iterate through each dataframe in the filtered1_dataframes dictionary
for df_name, df in filtered1_dataframes.items():
    # Find the index of the row where the first column is equal to '2020-01-01'
    index_2020 = df[df.iloc[:, 0] == '2020-01-01'].index
    if not index_2020.empty:
        # Slice the dataframe to remove all rows before the row with index index_2020
        filtered1_dataframes[df_name] = df.loc[index_2020[0]:]


In [108]:
for df_name, df in filtered1_dataframes.items():
    start_date = df.iloc[1, 0]
    end_date = df.iloc[-1, 0]
    print(df_name)
    print(f"Start date = {start_date}")
    print(f"End date = {end_date}")

Celsius
Start date = 2020-01-02
End date = 2022-08-23
IOTA
Start date = 2020-01-02
End date = 2022-08-23
chainlink
Start date = 2020-01-02
End date = 2022-08-23
PAX Gold
Start date = 2020-01-02
End date = 2022-08-23
Hedera
Start date = 2020-01-02
End date = 2022-08-23
bitcoin cash
Start date = 2020-01-02
End date = 2022-08-23
Kava
Start date = 2020-01-02
End date = 2022-08-23
xrp
Start date = 2020-01-02
End date = 2022-08-23
Theta Network
Start date = 2020-01-02
End date = 2022-08-23
Bitcoin Gold
Start date = 2020-01-02
End date = 2022-08-23
Gnosis
Start date = 2020-01-02
End date = 2022-08-23
Synthetix
Start date = 2020-01-02
End date = 2022-08-23
monero
Start date = 2020-01-02
End date = 2022-08-23
KuCoin Token
Start date = 2020-01-02
End date = 2022-08-23
Zilliqa
Start date = 2020-01-02
End date = 2022-08-23
Holo
Start date = 2020-01-02
End date = 2022-08-23
BitTorrent (New)
Start date = 2020-01-02
End date = 2022-08-23
Waves
Start date = 2020-01-02
End date = 2022-08-23
Quant
Start

In [109]:
ready_dataframes = filtered1_dataframes

In [None]:
ready_dataframes['BNB']

In [None]:
ready_dataframes['IOTA']