In [6]:
import pandas as pd

# === 1. Load Dataset ===
file_path = "TSLA_2015_2025_Histogram.csv"
df = pd.read_csv(file_path)

# === 2. Membersihkan nilai numerik ===
# Hilangkan koma dan ubah menjadi float
cols_to_clean = ['Open', 'High', 'Low', 'Close', 'Volume']
for col in cols_to_clean:
    df[col] = df[col].str.replace(',', '').astype(float)

# === 3. Konversi kolom tanggal dan urutkan ===
df['date'] = pd.to_datetime(df['date'], format='%m/%d/%Y')
df = df.sort_values('date')

# === 4. Buat kolom target klasifikasi: 'Price_Up' ===
# Price_Up = 1 jika harga penutupan naik dari hari sebelumnya
df['Price_Up'] = df['Close'].diff().apply(lambda x: 1 if x > 0 else 0)

# === 5. Hapus baris pertama (karena NaN akibat diff()) ===
df = df.dropna().reset_index(drop=True)

# === 6. Simpan dataset hasil preprocessing jika diperlukan ===
df.to_csv("TSLA_Preprocessed.csv", index=False)

# Tampilkan 5 baris hasil akhir
print(df[['date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Price_Up']].head())

        date          Open          High           Low         Close  \
0 2015-01-02  1.485800e+08  1.488333e+09  1.421733e+09  1.462067e+09   
1 2015-01-05  1.430333e+09  1.443333e+08  1.381067e+09  1.400600e+09   
2 2015-01-06  1.400400e+09  1.428000e+09  1.361400e+09  1.408533e+09   
3 2015-01-07  1.422333e+09  1.431867e+09  1.398533e+09  1.406333e+09   
4 2015-01-08  1.418733e+09  1.425333e+09  1.400067e+09  1.404133e+08   

       Volume  Price_Up  
0  71466000.0         0  
1  80527500.0         0  
2  93928500.0         1  
3  44526000.0         0  
4  51637500.0         0  


In [2]:
import pandas as pd
import numpy as np
import joblib
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, Input
from keras.optimizers import Adam

# Membaca dataset
df = pd.read_csv('TSLA_Preprocessed.csv', index_col='date', parse_dates=True)

# Ambil hanya kolom 'Open'
data = df[['Open']].copy()
data.dropna(inplace=True)

dataset = data.values

# Normalisasi dan Split Data
scaler = MinMaxScaler(feature_range=(0, 1))
dataset_scaled = scaler.fit_transform(dataset)

train_size = int(len(dataset_scaled) * 0.8)
val_size = int(len(dataset_scaled) * 0.9)

train_data = dataset_scaled[:train_size]
val_data = dataset_scaled[train_size - 60:val_size]
test_data = dataset_scaled[val_size - 60:]

# Membuat Sequences
def create_sequences(data, sequence_length=60):
    X, y = [], []
    for i in range(sequence_length, len(data)):
        X.append(data[i - sequence_length:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

X_train, y_train = create_sequences(train_data)
X_val, y_val = create_sequences(val_data)
X_test, y_test = create_sequences(test_data)

# Reshape untuk LSTM: [samples, timesteps, features]
X_train = X_train.reshape(-1, 60, 1)
X_val = X_val.reshape(-1, 60, 1)
X_test = X_test.reshape(-1, 60, 1)

# Bangun Model LSTM
model = Sequential([
    Input(shape=(60, 1)),
    LSTM(50, return_sequences=True),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(25, activation='relu'),
    Dense(1)
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error')

# Training Model
history = model.fit(
    X_train, y_train,
    epochs=20,
    batch_size=16,
    validation_data=(X_val, y_val),
    verbose=1
)

# Prediksi dan Evaluasi
predictions = model.predict(X_test)
predictions = scaler.inverse_transform(predictions)

# Ambil nilai aktual (tanpa normalisasi)
y_test_actual = scaler.inverse_transform(y_test.reshape(-1, 1))

# Hitung RMSE
rmse = np.sqrt(np.mean((predictions - y_test_actual) ** 2))
print(f'RMSE: {rmse}')

# Sinkronisasi index tanggal dengan prediksi
prediction_dates = data.index[val_size:val_size + len(predictions)]

# DataFrame hasil prediksi
prediction_df = pd.DataFrame({
    'Actual': y_test_actual.flatten(),
    'Predicted': predictions.flatten()
}, index=prediction_dates)

# Visualisasi dengan Plotly
fig = go.Figure()
fig.add_trace(go.Scatter(x=data.index[:train_size], y=data['Open'][:train_size], name='Data Training'))
fig.add_trace(go.Scatter(x=data.index[train_size:val_size], y=data['Open'][train_size:val_size], name='Data Validasi'))
fig.add_trace(go.Scatter(x=prediction_df.index, y=prediction_df['Actual'], name='Data Aktual (Testing)'))
fig.add_trace(go.Scatter(x=prediction_df.index, y=prediction_df['Predicted'], name='Prediksi', line=dict(dash='dot')))

fig.update_layout(
    title='Prediksi Harga Saham TSLA (LSTM)',
    xaxis_title='Tanggal',
    yaxis_title='Harga Pembukaan USD/TSLA',
    template='plotly_white',
    autosize=False,
    width=1000,
    height=600,
    legend=dict(x=0, y=1)
)
fig.show()

Epoch 1/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 28ms/step - loss: 0.0163 - val_loss: 0.0059
Epoch 2/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 23ms/step - loss: 0.0121 - val_loss: 0.0064
Epoch 3/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - loss: 0.0104 - val_loss: 0.0052
Epoch 4/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - loss: 0.0124 - val_loss: 0.0054
Epoch 5/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - loss: 0.0116 - val_loss: 0.0054
Epoch 6/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 21ms/step - loss: 0.0083 - val_loss: 0.0053
Epoch 7/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - loss: 0.0096 - val_loss: 0.0056
Epoch 8/20
[1m126/126[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 19ms/step - loss: 0.0095 - val_loss: 0.0051
Epoch 9/20
[1m126/126[0m [32m

In [3]:
# Simpan Model dan Scaler
model.save('model_lstm.h5')
joblib.dump(scaler, 'scaler.save')



['scaler.save']