<a href="https://colab.research.google.com/github/ahlandapu/DataMiningSMK/blob/main/UAS_DataMining_AhlandaPutra.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

**Data Preparation**

In [10]:
# Load dataset
df = pd.read_csv("datasmk2.csv")

In [11]:
# Menangani Missing Values
df.fillna(df.mode().iloc[0], inplace=True)  # Mengisi dengan modus (nilai terbanyak)

In [12]:
# Encoding Data Kategorikal
label_encoders = {}
categorical_columns = ['gender', 'n_sikap', 'organisasi', 'ekstra', 'asal_smp', 'tmpt_tinggal',
                       'status_nikah', 'pendidikan_ayah', 'pekerjaan_ayah', 'gaji_ayah',
                       'pendidikan_ibu', 'pekerjaan_ibu', 'gaji_ibu', 'beasiswa', 'status_rumah', 'hasil']
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # Menyimpan encoder untuk decoding nanti

In [13]:
# Memisahkan Data Fitur dan Target
X = df.drop(columns=['hasil'])  # Fitur
y = df['hasil']  # Target klasifikasi

In [14]:
# Membagi Data Menjadi Training & Testing Set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [15]:
# Menampilkan hasil
print("Data setelah preprocessing:")
print(df.head())

Data setelah preprocessing:
   gender  absen  n_sikap  organisasi  ekstra  asal_smp  tmpt_tinggal  \
0       1      1        0           1       0         0             1   
1       1      1        1           0       0         0             1   
2       1      0        0           0       0         0             1   
3       1      3        0           0       1         0             1   
4       0      5        1           1       0         0             1   

   status_nikah  pendidikan_ayah  pekerjaan_ayah  gaji_ayah  pendidikan_ibu  \
0             2                6               0          1              11   
1             2                8               1          0              11   
2             0                7               9          4              12   
3             2                8               0          1              11   
4             2                7               0          0              11   

   pekerjaan_ibu  gaji_ibu  beasiswa  jml_keluarga  status

**Modelling**

In [16]:
# Modeling dengan Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
y_pred_dt = dt_model.predict(X_test)

In [17]:
# Evaluasi Decision Tree
print("Evaluasi Decision Tree:")
print(classification_report(y_test, y_pred_dt))
print("Akurasi Decision Tree:", accuracy_score(y_test, y_pred_dt))

Evaluasi Decision Tree:
              precision    recall  f1-score   support

           0       0.71      0.74      0.72       113
           1       0.45      0.41      0.43        59

    accuracy                           0.63       172
   macro avg       0.58      0.58      0.58       172
weighted avg       0.62      0.63      0.62       172

Akurasi Decision Tree: 0.627906976744186


In [18]:
# Modeling dengan Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

In [19]:
# Evaluasi Random Forest
print("Evaluasi Random Forest:")
print(classification_report(y_test, y_pred_rf))
print("Akurasi Random Forest:", accuracy_score(y_test, y_pred_rf))

Evaluasi Random Forest:
              precision    recall  f1-score   support

           0       0.70      0.75      0.72       113
           1       0.44      0.37      0.40        59

    accuracy                           0.62       172
   macro avg       0.57      0.56      0.56       172
weighted avg       0.61      0.62      0.61       172

Akurasi Random Forest: 0.622093023255814


**Menyimpan Model**

---


Model yang telah dilatih disimpan dalam format .pkl agar dapat digunakan kembali tanpa perlu pelatihan ulang.

In [20]:
import joblib
joblib.dump(dt_model, "decision_tree_model.pkl")
joblib.dump(rf_model, "random_forest_model.pkl")

['random_forest_model.pkl']

***Deployment menggunakan Streamlit.***

---




In [21]:
pip install streamlit

Collecting streamlit
  Downloading streamlit-1.42.0-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.42.0-py2.py3-none-any.whl (9.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.6/9.6 MB[0m [31m38.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m55.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[

In [22]:
import streamlit as st
import pandas as pd
import joblib

In [23]:
# Load model
dt_model = joblib.load("decision_tree_model.pkl")
rf_model = joblib.load("random_forest_model.pkl")

In [24]:
# Judul aplikasi
st.title("Prediksi Hasil Belajar Siswa")

2025-02-12 04:13:39.618 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]


DeltaGenerator()

In [25]:
# Input data pengguna
gender = st.selectbox("Gender", ["Laki-laki", "Perempuan"])
absen = st.number_input("Jumlah Absen", min_value=0, max_value=50, value=5)
n_sikap = st.selectbox("Nilai Sikap", ["A", "B", "C"])
ekstra = st.selectbox("Ekstrakurikuler", ["Ya", "Tidak"])

2025-02-12 04:13:49.876 Session state does not function when running a script without `streamlit run`


In [26]:
# Konversi input menjadi dataframe
data = pd.DataFrame([[gender, absen, n_sikap, ekstra]],
                    columns=["gender", "absen", "n_sikap", "ekstra"])

In [27]:
# Tombol Prediksi
if st.button("Prediksi"):
    pred_dt = dt_model.predict(data)[0]
    pred_rf = rf_model.predict(data)[0]
    st.write(f"**Prediksi Decision Tree:** {pred_dt}")
    st.write(f"**Prediksi Random Forest:** {pred_rf}")



**Menjalankan Aplikasi**

---
Setelah menyimpan kode di app.py, jalankan aplikasi dengan perintah berikut di terminal



In [32]:
#streamlit run app.py
!pip install streamlit ngrok

Collecting ngrok
  Downloading ngrok-1.4.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Downloading ngrok-1.4.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ngrok
Successfully installed ngrok-1.4.0


In [35]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Downloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.3


In [33]:
%%writefile app.py
import streamlit as st
import pandas as pd
import joblib

# Load model
dt_model = joblib.load("decision_tree_model.pkl")
rf_model = joblib.load("random_forest_model.pkl")

# Judul aplikasi
st.title("Prediksi Hasil Belajar Siswa")

# Input data pengguna
gender = st.selectbox("Gender", ["Laki-laki", "Perempuan"])
absen = st.number_input("Jumlah Absen", min_value=0, max_value=50, value=5)
n_sikap = st.selectbox("Nilai Sikap", ["A", "B", "C"])
ekstra = st.selectbox("Ekstrakurikuler", ["Ya", "Tidak"])

# Konversi input menjadi dataframe
data = pd.DataFrame([[gender, absen, n_sikap, ekstra]],
                    columns=["gender", "absen", "n_sikap", "ekstra"])

# Tombol Prediksi
if st.button("Prediksi"):
    pred_dt = dt_model.predict(data)[0]
    pred_rf = rf_model.predict(data)[0]
    st.write(f"**Prediksi Decision Tree:** {pred_dt}")
    st.write(f"**Prediksi Random Forest:** {pred_rf}")

Writing app.py


In [None]:
from pyngrok import ngrok

# Jalankan Streamlit
!streamlit run app.py &

# Buka akses publik dengan ngrok
url = ngrok.connect(port="8501")
print(f"Aplikasi berjalan di: {url}")


Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.148.113.202:8501[0m
[0m
