In [5]:
import pandas as pd
import os


input_folder  = './get_data'
output_folder = './data_code'

columns_to_keep = ['timestamp', 'low', 'high', 'close', 'volume']
rename_mapping = {
    'Tanggal Perdagangan Terakhir': 'timestamp',
    'Open Price': 'open',
    'Terendah': 'low',
    'Tertinggi': 'high',
    'Penutupan': 'close',
    'Volume': 'volume',
}

bulan_mapping = {
    'jan': 'Jan',
    'feb': 'Feb',
    'mar': 'Mar',
    'apr': 'Apr',
    'mei': 'May',
    'jun': 'Jun',
    'jul': 'Jul',
    'agt': 'Aug',
    'sep': 'Sep',
    'okt': 'Oct',
    'nov': 'Nov',
    'des': 'Dec'
}

stock_data = {}
for filename in os.listdir(input_folder):
    if filename.endswith('.xlsx'):
        file_path = os.path.join(input_folder, filename)
        data = pd.read_excel(file_path)
        data.rename(columns=rename_mapping, inplace=True)
        
        data['timestamp'] = data['timestamp'].str.replace(
            r'(\b[a-zA-Z]{3}\b)', 
            lambda x: bulan_mapping.get(x.group().lower(), x.group()), 
            regex=True
        )
        data['timestamp'] = pd.to_datetime(data['timestamp'], format='%d %b %Y', errors='coerce')

        for _, row in data.iterrows():
            stock_code = row['Kode Saham']
            if stock_code not in stock_data:
                stock_data[stock_code] = []
            stock_data[stock_code].append(row[columns_to_keep])

os.makedirs(output_folder, exist_ok=True)
for stock_code, rows in stock_data.items():
    df = pd.DataFrame(rows, columns=columns_to_keep)
    output_path = os.path.join(output_folder, f'{stock_code}.csv')
    df.to_csv(output_path, index=False)

print(f"CSV files have been saved in the {output_folder} folder")

CSV files have been saved in the ./data_code folder


In [6]:
data = pd.read_csv('./data_code/BREN.csv')
data

Unnamed: 0,timestamp,low,high,close,volume
0,2023-10-09,975,975,975,22298500
1,2023-10-10,1060,1215,1215,13625200
2,2023-10-11,1515,1515,1515,38431400
3,2023-10-12,1820,1890,1890,219262300
4,2023-10-13,2240,2360,2360,39504100
...,...,...,...,...,...
151,2024-06-04,8000,8250,8250,14694300
152,2024-06-05,7425,7425,7425,3842600
153,2024-06-06,6700,6700,6700,3041600
154,2024-06-07,6050,6050,6050,2561500


In [1]:
import pandas as pd
import os


input_folder  = './get_data'
output_folder = './combine_data_code'

columns_to_keep = ['Code', 'Company']  
rename_mapping = {
    'Kode Saham': 'Code',               
    'Nama Perusahaan': 'Company',      
}

bulan_mapping = {
    'jan': 'Jan',
    'feb': 'Feb',
    'mar': 'Mar',
    'apr': 'Apr',
    'mei': 'May',
    'jun': 'Jun',
    'jul': 'Jul',
    'agt': 'Aug',
    'sep': 'Sep',
    'okt': 'Oct',
    'nov': 'Nov',
    'des': 'Dec'
}

combined_data = []
start_dates = []
end_dates = []

for filename in os.listdir(input_folder):
    if filename.endswith('.xlsx'):
        file_path = os.path.join(input_folder, filename)
        data = pd.read_excel(file_path)
        data.rename(columns=rename_mapping, inplace=True)
        
        data['Tanggal Perdagangan Terakhir'] = data['Tanggal Perdagangan Terakhir'].str.replace(
            r'(\b[a-zA-Z]{3}\b)', 
            lambda x: bulan_mapping.get(x.group().lower(), x.group()), 
            regex=True
        )
        data['Tanggal Perdagangan Terakhir'] = pd.to_datetime(data['Tanggal Perdagangan Terakhir'], format='%d %b %Y', errors='coerce')

        combined_data.append(data[columns_to_keep])
        
        start_dates.append(data['Tanggal Perdagangan Terakhir'].min())
        end_dates.append(data['Tanggal Perdagangan Terakhir'].max())

combined_df = pd.concat(combined_data, ignore_index=True)

combined_df['Start Date'] = pd.to_datetime(min(start_dates)).strftime('%Y-%m-%d')
combined_df['End Date'] = pd.to_datetime(max(end_dates)).strftime('%Y-%m-%d')

os.makedirs(output_folder, exist_ok=True)
output_path = os.path.join(output_folder, 'combined_data.csv')
combined_df.to_csv(output_path, index=False)

print(f"Combined CSV file has been saved in the {output_folder} folder")

Combined CSV file has been saved in the ./combine_data_code folder


In [3]:
import pandas as pd

combined_data = pd.read_csv("./combine_data_code/combined_data.csv")

bren_data = combined_data[combined_data['Code'] == 'BREN']
bren_data = bren_data[['Code', 'Company', 'Start Date', 'End Date']]
bren_data

Unnamed: 0,Code,Company,Start Date,End Date
688871,BREN,Barito Renewables Energy Tbk.,2020-02-03,2024-06-10
689768,BREN,Barito Renewables Energy Tbk.,2020-02-03,2024-06-10
690666,BREN,Barito Renewables Energy Tbk.,2020-02-03,2024-06-10
691566,BREN,Barito Renewables Energy Tbk.,2020-02-03,2024-06-10
692466,BREN,Barito Renewables Energy Tbk.,2020-02-03,2024-06-10
...,...,...,...,...
826977,BREN,Barito Renewables Energy Tbk.,2020-02-03,2024-06-10
827906,BREN,Barito Renewables Energy Tbk.,2020-02-03,2024-06-10
828835,BREN,Barito Renewables Energy Tbk.,2020-02-03,2024-06-10
829764,BREN,Barito Renewables Energy Tbk.,2020-02-03,2024-06-10


In [7]:
import pandas as pd
import os


input_folder = './get_data'
output_folder = './combine_data_code'

columns_to_keep = ['Code', 'Company']
rename_mapping = {
    'Kode Saham': 'Code',
    'Nama Perusahaan': 'Company',
}

bulan_mapping = {
    'jan': 'Jan',
    'feb': 'Feb',
    'mar': 'Mar',
    'apr': 'Apr',
    'mei': 'May',
    'jun': 'Jun',
    'jul': 'Jul',
    'agt': 'Aug',
    'sep': 'Sep',
    'okt': 'Oct',
    'nov': 'Nov',
    'des': 'Dec'
}

combined_data = []

for filename in os.listdir(input_folder):
    if filename.endswith('.xlsx'):
        file_path = os.path.join(input_folder, filename)
        data = pd.read_excel(file_path)
        data.rename(columns=rename_mapping, inplace=True)

        data['Tanggal Perdagangan Terakhir'] = data['Tanggal Perdagangan Terakhir'].str.replace(
            r'(\b[a-zA-Z]{3}\b)',
            lambda x: bulan_mapping.get(x.group().lower(), x.group()),
            regex=True
        )
        data['Tanggal Perdagangan Terakhir'] = pd.to_datetime(data['Tanggal Perdagangan Terakhir'],
                                                              format='%d %b %Y', errors='coerce')

        combined_data.append(data[columns_to_keep])

combined_df = pd.concat(combined_data, ignore_index=True)
start_date = combined_df['Tanggal Perdagangan Terakhir'].min().strftime('%Y-%m-%d')
end_date = combined_df['Tanggal Perdagangan Terakhir'].max().strftime('%Y-%m-%d')

start_end_df = pd.DataFrame({'Start Date': [start_date], 'End Date': [end_date]})
os.makedirs(output_folder, exist_ok=True)
start_end_df.to_csv(os.path.join(output_folder, 'start_end_dates.csv'), index=False)

print(f"Start and end dates have been saved in the {output_folder} folder")

KeyError: 'Tanggal Perdagangan Terakhir'

In [None]:
data_2 = pd.read_csv("./combine_data_code/combined_data.csv")
data_2