# Regresi Sederhana

## Persiapan

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

## Membaca Data

In [2]:
df = pd.read_csv('data/error_log_updated.csv')
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

## Persiapan Data

In [3]:
# Urutkan data berdasarkan User ID dan Timestamp
df = df.sort_values(['User ID', 'Timestamp'])

# Hitung selisih waktu (dalam jam) ke error berikutnya untuk setiap user
df['Next Error (hours)'] = df.groupby('User ID')['Timestamp'].diff().shift(-1).dt.total_seconds() / 3600

# Hapus baris terakhir untuk setiap user (karena tidak ada error berikutnya)
df = df.dropna(subset=['Next Error (hours)'])

# Tambahkan kolom 'Hour of Day'
df['Hour of Day'] = df['Timestamp'].dt.hour

# Tambahkan kolom 'Day of Week' (0 = Senin, 6 = Minggu)
df['Day of Week'] = df['Timestamp'].dt.dayofweek

# Ubah 'Platform' menjadi numerik
platform_mapping = {'Web': 1, 'Android': 2, 'iOS': 3}
df['Platform Numeric'] = df['Platform'].map(platform_mapping)

# Ubah 'Severity Level' menjadi numerik
severity_level_mapping = {'Debug': 1, 'Info': 2, 'Warning': 3, 'Error': 4, 'Critical': 5, 'Fatal': 6}
df['Severity Level Numeric'] = df['Severity Level'].map(severity_level_mapping)

# Pilih fitur dan target variabel
features = ['Hour of Day', 'Day of Week', 'Platform Numeric', 'Severity Level Numeric']
target = 'Next Error (hours)'

In [4]:
# Pisahkan fitur (X) dan target (y)
X = df[features]
y = df[target]

## Membangun Model

In [5]:
# --- Membangun Model ---

# Bagi data menjadi data latih dan data uji
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Inisialisasi model
model = LinearRegression()

# Latih model dengan data latih
model.fit(X_train, y_train)