In [None]:
import os
import pandas as pd

folders = ['AMRT', 'ASII', 'BBCA', 'BBNI', 'BBRI', 'BMRI', 'ICBP', 'TLKM', 'UNVR']

bulan_mapping = {
    'jan': 'Jan',
    'feb': 'Feb',
    'mar': 'Mar',
    'apr': 'Apr',
    'mei': 'May',
    'jun': 'Jun',
    'jul': 'Jul',
    'agt': 'Aug',
    'sep': 'Sep',
    'okt': 'Oct',
    'nov': 'Nov',
    'des': 'Dec'
}

for folder in folders:
    all_data = []
    
    folder_path = f"data/{folder}"
    try:
        files = os.listdir(folder_path)
    except FileNotFoundError:
        print(f"Folder '{folder}' tidak ditemukan.")
        continue
    
    for file in files:
        if file.endswith('.xlsx'):
            file_path = os.path.join(folder_path, file)
            try:
                df = pd.read_excel(file_path)
            except Exception as e:
                print(f"Gagal membaca file '{file_path}': {e}")
                continue
            selected_columns = ['Tanggal Perdagangan Terakhir', 'Kode Saham', 'Open Price', 'Tertinggi', 'Terendah', 'Penutupan', 'Volume']
            df = df[selected_columns]
            
            try:
                df['Tanggal Perdagangan Terakhir'] = df['Tanggal Perdagangan Terakhir'].str.replace(r'(\b[a-zA-Z]{3}\b)', lambda x: bulan_mapping.get(x.group().lower(), x.group()), regex=True)
                df['Tanggal Perdagangan Terakhir'] = pd.to_datetime(df['Tanggal Perdagangan Terakhir'], format='%d %b %Y', errors='coerce')
            except Exception as e:
                print(f"Gagal mengonversi tanggal di file '{file_path}': {e}")
                continue
            
            df.rename(columns={'Tanggal Perdagangan Terakhir': 'timestamp'}, inplace=True)
            all_data.append(df)
    
    combined_data = pd.concat(all_data, ignore_index=True)
    csv_file_path = f"data/{folder}.csv"
    try:
        combined_data.to_csv(csv_file_path, index=False)
        print(f"Data gabungan untuk '{folder}' berhasil disimpan di '{csv_file_path}'.")
    except Exception as e:
        print(f"Gagal menyimpan data gabungan ke '{csv_file_path}': {e}")

In [5]:
import pandas as pd

files = ['Gasoline.csv']

for file in files:
    print("File:", file)
    df = pd.read_csv(file)
    num_total_data = len(df)
    duplicate_rows = df[df.duplicated(keep=False)]
    print("Data duplikat beserta yang di-duplikat:")
    print(duplicate_rows)
    num_duplicates = len(duplicate_rows)
    print("Jumlah duplikat:", num_duplicates)
    print("Jumlah data sebelum mengurangi duplikat:", num_total_data)
    num_unique_data = num_total_data - num_duplicates
    print("Jumlah data dikurangi jumlah duplikat:", num_unique_data)
    print()

File: Gasoline.csv
Data duplikat beserta yang di-duplikat:
Empty DataFrame
Columns: [Tanggal, qty, Jumlah]
Index: []
Jumlah duplikat: 0
Jumlah data sebelum mengurangi duplikat: 1998
Jumlah data dikurangi jumlah duplikat: 1998



In [2]:
import pandas as pd

files = ['./combine_data_code/combined_data.csv']

for file in files:
    df = pd.read_csv(file)
    df.drop_duplicates(inplace=True)
    df.to_csv(file, index=False)
    print("Duplikat telah dihapus dan file telah diperbarui:", file)

Duplikat telah dihapus dan file telah diperbarui: ./combine_data_code/combined_data.csv


In [None]:
files = [
    "AMRT.csv",
    "ASII.csv",
    "BBCA.csv",
    "BBNI.csv",
    "BBRI.csv",
    "BMRI.csv",
    "ICBP.csv",
    "TLKM.csv",
    "UNVR.csv"
]

def read_data(file):
    return pd.read_csv(f"data/{file}")
    
all_timestamps = set()
for file in files:
    df = read_data(file)
    timestamps = set(df['timestamp'])
    all_timestamps.update(timestamps)

for file in files:
    df = read_data(file)
    timestamps = set(df['timestamp'])
    missing_timestamps = all_timestamps - timestamps
    print(f"\nTimestamp yang tidak ada di {file}:")
    print(missing_timestamps)

In [None]:
import pandas as pd

files = [
    "AMRT.csv",
    "ASII.csv",
    "BBCA.csv",
    "BBNI.csv",
    "BBRI.csv",
    "BMRI.csv",
    "ICBP.csv",
    "TLKM.csv",
    "UNVR.csv"
]

def read_data(file):
    return pd.read_csv(f"data/{file}")

all_timestamps = set()
for file in files:
    df = read_data(file)
    timestamps = set(df['timestamp'])
    all_timestamps.update(timestamps)
    
missing_timestamp_counts = {}
for file in files:
    df = read_data(file)
    timestamps = set(df['timestamp'])
    missing_timestamps = all_timestamps - timestamps
    missing_timestamp_counts[file] = len(missing_timestamps)

for file, count in missing_timestamp_counts.items():
    print(f"Jumlah timestamp yang tidak ada di {file}: {count}")

In [None]:
files = ['data/AMRT.csv', 'data/ASII.csv', 'data/BBCA.csv', 'data/BBNI.csv',
         'data/BBRI.csv', 'data/BMRI.csv', 'data/ICBP.csv', 'data/TLKM.csv',
         'data/UNVR.csv']

for file in files:
    df = pd.read_csv(file)
    num_rows = len(df)
    print(f"Jumlah baris data di {file}: {num_rows}")

In [37]:
file = 'Gasoline.csv'
df = pd.read_csv(file)
df = df.rename(columns={'Tanggal': 'timestamp'})
df.head(10)

Unnamed: 0.1,Unnamed: 0,qty
0,2016-01-01,8694
1,2016-01-02,10044
2,2016-01-03,13928
3,2016-01-04,12518
4,2016-01-05,11112
5,2016-01-06,11606
6,2016-01-07,9673
7,2016-01-08,9382
8,2016-01-09,10501
9,2016-01-10,10659


In [38]:
df = df.iloc[:, 0:2]
df

Unnamed: 0.1,Unnamed: 0,qty
0,2016-01-01,8694
1,2016-01-02,10044
2,2016-01-03,13928
3,2016-01-04,12518
4,2016-01-05,11112
...,...,...
2124,2021-10-25,12239
2125,2021-10-26,11319
2126,2021-10-27,12502
2127,2021-10-28,12456


In [34]:
df.to_csv('Diesel.csv', index=False)