# SKS

In [101]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier


In [102]:
url = "https://drive.google.com/uc?export=download&id=1bAu31pKhF-kAYq2ncCveSloH-NLg83aU"
df = pd.read_csv(url)
df

Unnamed: 0,NIM,Angkatan,Kode MK,SKS,Nama Mata Kuliah,Periode Semester,Nilai Huruf
0,2.054297e+08,20051,KP8700,2.0,Kerja Praktek,20122,
1,2.054297e+08,20051,KM8800,1.0,Komprehensif,20122,
2,2.054297e+08,20051,TA8800,4.0,Tugas AkhirSkripsi,20122,
3,2.067001e+08,20061,705101,2.0,AlQuran Dan Ilmu Tafsir,20061,A
4,2.067001e+08,20061,705104,2.0,Bahasa Arab I,20061,A
...,...,...,...,...,...,...,...
192031,1.237050e+09,20231,201004-705-02-013,2.0,Fisika Dasar,20231,
192032,1.237050e+09,20231,201004-705-02-014,1.0,Praktikum Fisika Dasar,20231,
192033,1.237050e+09,20231,201004-705-03-001,2.0,Olah Raga,20231,
192034,1.237050e+09,20231,201004-705-03-011,,Praktek Tilawah,20231,


In [103]:
df = df.dropna()

label_encoder = LabelEncoder()

In [104]:
unique_sks_classification = df.drop_duplicates(subset=["Kode MK", "Nama Mata Kuliah"])[
    ["Kode MK", "Nama Mata Kuliah", "SKS"]
].reset_index(drop=True)

print(unique_sks_classification)

               Kode MK             Nama Mata Kuliah  SKS
0               705101      AlQuran Dan Ilmu Tafsir  2.0
1               705104                Bahasa Arab I  2.0
2               705102             Bahasa Indonesia  2.0
3               705103               Bahasa Inggris  2.0
4               705106                 Fisika Dasar  3.0
..                 ...                          ...  ...
335  201004-705-02-010                  Bahasa Arab  2.0
336  201004-705-02-011             Bahasa Inggris I  2.0
337  201004-705-02-014       Praktikum Fisika Dasar  1.0
338  201004-705-01-003  Praktikum Dasar Pemrograman  1.0
339  201004-705-02-012            Bahasa Inggris II  2.0

[340 rows x 3 columns]


In [105]:
sks_classification = (
    unique_sks_classification.groupby("SKS")
    .size()
    .reset_index(name="Jumlah Mata Kuliah")
)
print("Jumlah dataset SKS")
print(sks_classification)

Jumlah dataset SKS
   SKS  Jumlah Mata Kuliah
0  0.0                  13
1  1.0                  52
2  2.0                 146
3  3.0                 124
4  4.0                   4
5  6.0                   1


In [106]:
import os

output_folder_sks = "output/sks"
os.makedirs(output_folder_sks, exist_ok=True)

sks_classification_output_path = os.path.join(
    output_folder_sks, "output_sks_classification.csv"
)
unique_sks_classification.to_csv(sks_classification_output_path, index=False)

## Decision Tree

In [107]:

df["Nama Mata Kuliah"] = label_encoder.fit_transform(df["Nama Mata Kuliah"])

features_dt = df[["Periode Semester", "Nama Mata Kuliah", "SKS"]]
target_dt = df["SKS"]

X_train_dt, X_test_dt, y_train_dt, y_test_dt = train_test_split(
    features_dt, target_dt, test_size=0.2, random_state=42
)

model_dt = DecisionTreeClassifier()
model_dt.fit(X_train_dt, y_train_dt)

predictions_dt = model_dt.predict(X_test_dt)

accuracy_dt = accuracy_score(y_test_dt, predictions_dt)
classification_report_dt = classification_report(y_test_dt, predictions_dt)

print("\nResults for Decision Tree")
print(f"Accuracy: {accuracy_dt}")
print("Classification Report:\n", classification_report_dt)


Results for Decision Tree
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       594
         1.0       1.00      1.00      1.00      6115
         2.0       1.00      1.00      1.00     16196
         3.0       1.00      1.00      1.00      9807
         4.0       1.00      1.00      1.00       348
         6.0       1.00      1.00      1.00        32

    accuracy                           1.00     33092
   macro avg       1.00      1.00      1.00     33092
weighted avg       1.00      1.00      1.00     33092



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Nama Mata Kuliah"] = label_encoder.fit_transform(df["Nama Mata Kuliah"])


## K-Nearest Neighbors (KNN)

In [108]:

df["Nama Mata Kuliah"] = label_encoder.fit_transform(df["Nama Mata Kuliah"])

features_knn = df[["Periode Semester", "Nama Mata Kuliah", "SKS"]]
target_knn = df["SKS"]

X_train_knn, X_test_knn, y_train_knn, y_test_knn = train_test_split(
    features_knn, target_knn, test_size=0.2, random_state=42
)

model_knn = KNeighborsClassifier()
model_knn.fit(X_train_knn, y_train_knn)

predictions_knn = model_knn.predict(X_test_knn)

accuracy_knn = accuracy_score(y_test_knn, predictions_knn)
classification_report_knn = classification_report(y_test_knn, predictions_knn)

print("\nResults for K-Nearest Neighbors (KNN):")
print(f"Accuracy: {accuracy_knn}")
print("Classification Report:\n", classification_report_knn)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Nama Mata Kuliah"] = label_encoder.fit_transform(df["Nama Mata Kuliah"])



Results for K-Nearest Neighbors (KNN):
Accuracy: 0.9998791248640154
Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       594
         1.0       1.00      1.00      1.00      6115
         2.0       1.00      1.00      1.00     16196
         3.0       1.00      1.00      1.00      9807
         4.0       1.00      1.00      1.00       348
         6.0       1.00      1.00      1.00        32

    accuracy                           1.00     33092
   macro avg       1.00      1.00      1.00     33092
weighted avg       1.00      1.00      1.00     33092

