In [1]:
import os
import pandas as pd

# Caminho para a pasta com os CSVs
data_folder = 'data/'

# Verifica se a pasta existe
if not os.path.exists(data_folder):
    raise FileNotFoundError(f"The folder '{data_folder}' does not exist.")

# Lista de todos os arquivos CSV na pasta
csv_files = [f for f in os.listdir(data_folder) if f.endswith('.csv')]

# Verifica se há arquivos CSV na pasta
if not csv_files:
    raise FileNotFoundError(f"No CSV files found in the folder '{data_folder}'.")

# Lista para armazenar os DataFrames
dfs = []

# Itera sobre os arquivos CSV
for file in csv_files:
    # Lê o arquivo CSV
    df = pd.read_csv(os.path.join(data_folder, file))
    
    # Verifica se a coluna 'timestamp' existe
    if 'timestamp' not in df.columns:
        raise ValueError(f"The file '{file}' does not contain a 'timestamp' column.")
    
    # Adiciona a coluna 'Bank_Name' (extraído do nome do arquivo)
    bank_name = file.split('BANK')[0]  # Obtém o nome do banco antes de 'BANK'
    df['Bank_Name'] = bank_name
    
    # Adiciona o DataFrame à lista
    dfs.append(df)

# Concatena todos os DataFrames em um único DataFrame
final_df = pd.concat(dfs, ignore_index=True)

# Converte a coluna 'timestamp' para datetime, se necessário
final_df['timestamp'] = pd.to_datetime(final_df['timestamp'])

# Ordena o DataFrame pela coluna 'timestamp'
final_df = final_df.sort_values(by='timestamp')

# Exibe o DataFrame final
print(final_df)

# Opcionalmente, salve o DataFrame final em um novo CSV
final_df.to_csv('Stockmarket.csv', index=False)


                        timestamp     open     high      low    close  \
0       2017-01-02 09:15:00+05:30   338.70   338.70   336.60   338.00   
741092  2017-01-02 09:15:00+05:30   605.00   605.00   601.00   601.45   
1111638 2017-01-02 09:15:00+05:30   719.80   720.90   716.35   720.15   
370546  2017-01-02 09:15:00+05:30   451.40   452.00   450.60   451.20   
1       2017-01-02 09:16:00+05:30   337.40   337.70   337.15   337.30   
...                           ...      ...      ...      ...      ...   
370544  2021-01-01 15:28:00+05:30   231.15   231.35   231.10   231.20   
741091  2021-01-01 15:29:00+05:30   622.20   624.50   622.00   623.40   
370545  2021-01-01 15:29:00+05:30   231.20   231.35   231.00   231.10   
1111637 2021-01-01 15:29:00+05:30  1421.95  1423.45  1421.20  1423.45   
1482183 2021-01-01 15:29:00+05:30  1994.50  1996.00  1993.00  1993.20   

          volume Bank_Name  
0         4667.0       RBL  
741092    7033.0      HDFC  
1111638   8916.0     KOTAK  
370546 

In [2]:
import pandas as pd

# Carrega o arquivo CSV
df = pd.read_csv("Stockmarket.csv")

# Verifica o DataFrame original
print("DataFrame Original:")
print(df.head())

# Reorganiza as colunas para colocar o nome do banco como a segunda coluna
columns_order = ["timestamp", "Bank_Name", "open", "high", "low", "close", "volume"]
df = df[columns_order]

# Salva o CSV corrigido
df.to_csv("Stockmarket.csv", index=False)

print("\nDataFrame Corrigido:")
print(df.head())


DataFrame Original:
                   timestamp   open   high     low   close   volume Bank_Name
0  2017-01-02 09:15:00+05:30  338.7  338.7  336.60  338.00   4667.0       RBL
1  2017-01-02 09:15:00+05:30  605.0  605.0  601.00  601.45   7033.0      HDFC
2  2017-01-02 09:15:00+05:30  719.8  720.9  716.35  720.15   8916.0     KOTAK
3  2017-01-02 09:15:00+05:30  451.4  452.0  450.60  451.20  29198.0      AXIS
4  2017-01-02 09:16:00+05:30  337.4  337.7  337.15  337.30   1629.0       RBL

DataFrame Corrigido:
                   timestamp Bank_Name   open   high     low   close   volume
0  2017-01-02 09:15:00+05:30       RBL  338.7  338.7  336.60  338.00   4667.0
1  2017-01-02 09:15:00+05:30      HDFC  605.0  605.0  601.00  601.45   7033.0
2  2017-01-02 09:15:00+05:30     KOTAK  719.8  720.9  716.35  720.15   8916.0
3  2017-01-02 09:15:00+05:30      AXIS  451.4  452.0  450.60  451.20  29198.0
4  2017-01-02 09:16:00+05:30       RBL  337.4  337.7  337.15  337.30   1629.0
