In [4]:
import pandas as pd

# Load the dataset
file_path = 'data.csv'
data = pd.read_csv(file_path, delimiter=';')

# Data cleaning
data['Terakhir'] = data['Terakhir'].str.replace(',', '.').astype(float)
data['Pembukaan'] = data['Pembukaan'].str.replace(',', '.').astype(float)
data['Tertinggi'] = data['Tertinggi'].str.replace(',', '.').astype(float)
data['Terendah'] = data['Terendah'].str.replace(',', '.').astype(float)
data['Perubahan%'] = data['Perubahan%'].str.replace('%', '').str.replace(',', '.').astype(float)
data['Vol.'] = data['Vol.'].str.replace('K', '').str.replace(',', '.').astype(float) * 1000

# Convert 'Tanggal' to datetime
data['Tanggal'] = pd.to_datetime(data['Tanggal'], format='%d/%m/%Y')
data.set_index('Tanggal', inplace=True)

# Fill missing values with the mean of each column
data.fillna(data.mean(), inplace=True)

# Create dummy variables for missing values
for column in ['Pembukaan', 'Tertinggi', 'Terendah', 'Vol.', 'Perubahan%']:
    data[f'{column}_dummy'] = np.where(data[column].isnull(), 1, 0)

data


Unnamed: 0_level_0,Terakhir,Pembukaan,Tertinggi,Terendah,Vol.,Perubahan%,Pembukaan_dummy,Tertinggi_dummy,Terendah_dummy,Vol._dummy,Perubahan%_dummy
Tanggal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2024-05-29,17.823,18.04,18.043,17.808,224.705882,-1.48,0,0,0,0,0
2024-05-28,18.09,18.46,18.595,18.05,680.0,-1.36,0,0,0,0,0
2024-05-24,18.34,18.46,18.485,18.3,590.0,-0.78,0,0,0,0,0
2024-05-23,18.485,18.815,19.005,18.38,520.0,-1.41,0,0,0,0,0
2024-05-22,18.75,18.58,18.8,18.565,340.0,0.86,0,0,0,0,0
2024-05-21,18.59,18.83,18.835,18.565,330.0,-1.38,0,0,0,0,0
2024-05-20,18.85,18.77,18.865,18.7,330.0,0.51,0,0,0,0,0
2024-05-17,18.755,19.09,19.145,18.725,290.0,-1.81,0,0,0,0,0
2024-05-16,19.1,19.035,19.18,18.83,270.0,-0.05,0,0,0,0,0
2024-05-15,19.11,18.9,19.16,18.73,270.0,2.11,0,0,0,0,0


In [5]:
# Configuration dictionary
config = {
    'target': 'Terakhir',
    'exogenous': ['Pembukaan', 'Tertinggi', 'Terendah', 'Vol.', 'Perubahan%', 
                  'Pembukaan_dummy', 'Tertinggi_dummy', 'Terendah_dummy', 'Vol._dummy', 'Perubahan%_dummy'],
    'order': (1, 1, 1),
    'seasonal_order': (1, 1, 1, 12)
}

# Prepare data
y = data[config['target']]
X = data[config['exogenous']]

# Split the data into training and testing sets
train_size = int(len(data) * 0.8)
y_train, y_test = y[:train_size], y[train_size:]
X_train, X_test = X[:train_size], X[train_size:]

print(f"Training set size: {len(y_train)}")
print(f"Testing set size: {len(y_test)}")


Training set size: 16
Testing set size: 5
