In [None]:
import pandas from pd

## load the files in 'files' folder
## get the text before the first underscore in the file name
## create a new column in the dataframe with the text
## return a single dataframe with all the data

In [7]:
df = pd.read_csv('files/ADA_All_graph_coinmarketcap.csv')
df

Unnamed: 0,name;open;high;low;close;volume;marketCap;timestamp
0,2781;0.0291265007;0.1469060034;0.0182099994;0....
1,2781;0.1166220009;0.7773849964;0.0991362035;0....
2,2781;0.7188469768;1.3272099495;0.4406200051;0....
3,2781;0.5159119964;0.5353220105;0.2665230036;0....
4,2781;0.3119730055;0.317723006;0.1270460039;0.1...
...,...
81,2781;0.3887738385;0.4011769437;0.279886356;0.3...
82,2781;0.3453510983;0.414669057;0.3051042193;0.3...
83,2781;0.373213596;0.3856751238;0.3204746488;0.3...
84,2781;0.3418455239;1.1470845373;0.3213986749;1....


In [1]:
import os
import pandas as pd

def load_and_combine_files(folder_path):
    # Define the expected column structure
    expected_columns = ['Start', 'End', 'Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap']
    all_data = []

    # Iterate through all files in the folder
    for file_name in os.listdir(folder_path):
        print(f"Processing {file_name}...")
        if file_name.endswith('.csv'):  # Process only CSV files
            file_path = os.path.join(folder_path, file_name)

            # Extract the text before the first underscore
            text_before_underscore = file_name.split('_')[0]
            print(f"Text before underscore: {text_before_underscore}")

            # Load the semi-colon-separated CSV into a DataFrame
            try:
                df = pd.read_csv(file_path, delimiter=',')
                print(df.columns)

                # Verify and align the columns to the expected structure
                df = df.reindex(columns=expected_columns, fill_value=None)

                # Add a new column with the extracted text
                df['Source'] = text_before_underscore

                # Append the DataFrame to the list
                all_data.append(df)
            except Exception as e:
                print(f"Error processing {file_name}: {e}")
    
    # Combine all DataFrames into a single DataFrame
    if all_data:
        combined_df = pd.concat(all_data, ignore_index=True)
    else:
        combined_df = pd.DataFrame(columns=expected_columns + ["Source"])
    
    return combined_df

# Specify the folder containing the files
folder_path = 'files'

# Load, process, and combine the data
final_df = load_and_combine_files(folder_path)

# Display the resulting DataFrame
print(final_df)

# Optionally, save the combined DataFrame to a new CSV file
final_df.to_csv('combined_data.csv', index=False)
print("Combined data saved to 'combined_data.csv'")


Processing tron_2017-09-18_2025-01-17.csv...
Text before underscore: tron
Index(['Start', 'End', 'Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap'], dtype='object')
Processing .DS_Store...
Processing binance-coin_2017-08-18_2025-01-17.csv...
Text before underscore: binance-coin
Index(['Start', 'End', 'Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap'], dtype='object')
Processing cardano_2017-09-18_2025-01-17.csv...
Text before underscore: cardano
Index(['Start', 'End', 'Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap'], dtype='object')
Processing bitcoin_2010-07-18_2025-01-17.csv...
Text before underscore: bitcoin
Index(['Start', 'End', 'Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap'], dtype='object')
Processing algorand_2019-06-18_2025-01-17.csv...
Text before underscore: algorand
Index(['Start', 'End', 'Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap'], dtype='object')
Processing ripple_2013-08-18_2025-01-17.csv...
Text before underscore: ripple
Index(['Star

In [2]:
final_df.Source.unique()

array(['tron', 'binance-coin', 'cardano', 'bitcoin', 'algorand', 'ripple',
       'solana', 'chainlink', 'ethereum', 'avalanche', 'dogecoin',
       'cronos', 'litecoin'], dtype=object)