# Import Library

In [20]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score, mean_absolute_percentage_error, mean_squared_error

# Data Preprocessing

In [21]:
data = {
    "Jarak (KM)" : [100, 150, 200, 250, 300, 350, 400, 450, 500, 550],
    "Musim Puncak" : ["Ya", "Tidak", "Ya", "Tidak", "Ya", "Tidak", "Ya", "Tidak", "Ya", "Tidak"],
    "Harga Tiket (Rp)" : [2300000, 2150000, 3835000, 3530000, 5370000, 5056000, 6900000, 6600000, 8440000, 7980000]
}

In [22]:
data = pd.DataFrame(data)
data

Unnamed: 0,Jarak (KM),Musim Puncak,Harga Tiket (Rp)
0,100,Ya,2300000
1,150,Tidak,2150000
2,200,Ya,3835000
3,250,Tidak,3530000
4,300,Ya,5370000
5,350,Tidak,5056000
6,400,Ya,6900000
7,450,Tidak,6600000
8,500,Ya,8440000
9,550,Tidak,7980000


# Label Encoding

In [23]:
cat_columns = ['Musim Puncak']
for col in cat_columns:
    data[col] = data[col].astype('category').cat.codes

data.head(10)

Unnamed: 0,Jarak (KM),Musim Puncak,Harga Tiket (Rp)
0,100,1,2300000
1,150,0,2150000
2,200,1,3835000
3,250,0,3530000
4,300,1,5370000
5,350,0,5056000
6,400,1,6900000
7,450,0,6600000
8,500,1,8440000
9,550,0,7980000


# Split Data

In [24]:
x = data.drop(['Harga Tiket (Rp)'], axis=1)
y = data['Harga Tiket (Rp)']

In [25]:
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.7, random_state=42)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((7, 2), (3, 2), (7,), (3,))

# Build Model

In [26]:
model = LinearRegression()
model.fit(x_train, y_train)

In [27]:
y_pred = model.predict(x_test)

# Actual vs Prediction

In [28]:
pd.set_option('display.float_format', '{:.0f}'.format)

In [29]:
data = pd.DataFrame({'Actual':y_test, 'Prediction':y_pred})
data.head(10)

Unnamed: 0,Actual,Prediction
8,8440000,8383362
1,2150000,2002414
5,5056000,5028103


# Evaluasi Model

In [30]:
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r_squared = r2_score(y_test, y_pred)

print("Mean Squared Error (MSE):", mse)
print("Root Mean Squared Error (RMSE):", rmse)
print("R-squared:", r_squared)

Mean Squared Error (MSE): 8589253765.358711
Root Mean Squared Error (RMSE): 92678.22702964657
R-squared: 0.9986999197064118


In [31]:
print(f'Koefisien: {model.coef_}')
print(f'Intercept: {model.intercept_}')
print(f'Accuracy score: {model.score(x_test, y_test)}')

Koefisien: [  15128.44827586 1085991.37931034]
Intercept: -266853.44827586133
Accuracy score: 0.9986999197064118
