<a href="https://colab.research.google.com/github/adrianadhari/UTS-Machine-Learning/blob/main/MachineLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

In [None]:
data = pd.read_csv("/content/drive/MyDrive/Machine Learning/mobileprice_modified.csv")
print(data.head())

   battery_power  blue  clock_speed  dual_sim  fc  four_g  int_memory  m_dep  \
0          842.0     0          2.2         0   1       0         7.0    0.6   
1         1021.0     1          0.5         1   0       1        53.0    0.7   
2          563.0     1          0.5         1   2       1        41.0    0.9   
3          615.0     1          2.5         0   0       0        10.0    0.8   
4         1821.0     1          1.2         0  13       1        44.0    0.6   

   mobile_wt  n_cores  ...  px_height  px_width     ram  sc_h  sc_w  \
0      188.0        2  ...         20       756     NaN     9     7   
1      136.0        3  ...        905      1988  2631.0    17     3   
2      145.0        5  ...       1263      1716  2603.0    11     2   
3      131.0        6  ...       1216      1786  2769.0    16     8   
4      141.0        2  ...       1208      1212  1411.0     8     2   

   talk_time  three_g  touch_screen  wifi  price_range  
0         19        0             0

In [None]:
# 1. Identifikasi nama dan jenis atribut
def identify_attributes(df):
    attribute_info = {}
    for column in df.columns:
        unique_values = df[column].nunique()
        if unique_values <= 10:
            attribute_info[column] = {
                "type": "categorical",
                "unique_values": df[column].unique().tolist()
            }
        else:
            attribute_info[column] = {
                "type": "numerical",
                "unique_values_count": unique_values
            }
    return attribute_info

attribute_info = identify_attributes(data)
print("Attribute Information:")
for attr, info in attribute_info.items():
    print(f"{attr}: {info}")

Attribute Information:
battery_power: {'type': 'numerical', 'unique_values_count': 1093}
blue: {'type': 'categorical', 'unique_values': [0, 1]}
clock_speed: {'type': 'numerical', 'unique_values_count': 26}
dual_sim: {'type': 'categorical', 'unique_values': [0, 1]}
fc: {'type': 'numerical', 'unique_values_count': 20}
four_g: {'type': 'categorical', 'unique_values': [0, 1]}
int_memory: {'type': 'numerical', 'unique_values_count': 63}
m_dep: {'type': 'categorical', 'unique_values': [0.6, 0.7, 0.9, 0.8, 0.1, 0.5, 1.0, 0.3, 0.4, 0.2]}
mobile_wt: {'type': 'numerical', 'unique_values_count': 121}
n_cores: {'type': 'categorical', 'unique_values': [2, 3, 5, 6, 1, 8, 4, 7]}
pc: {'type': 'numerical', 'unique_values_count': 21}
px_height: {'type': 'numerical', 'unique_values_count': 1137}
px_width: {'type': 'numerical', 'unique_values_count': 1109}
ram: {'type': 'numerical', 'unique_values_count': 1558}
sc_h: {'type': 'numerical', 'unique_values_count': 15}
sc_w: {'type': 'numerical', 'unique_valu

In [None]:
# 2. Praproses Data
# a. Pisahkan atribut prediktor dan label
X = data.drop(columns=['price_range'])
y = data['price_range']

# b. Menangani missing values
imputer = SimpleImputer(strategy='mean')
X_imputed = imputer.fit_transform(X)

# c. Standarisasi data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_imputed)

# d. Statistik deskriptif sebelum dan sesudah praproses
print("\nStatistik Deskriptif Sebelum Praproses:")
print(X.describe())
print("\nStatistik Deskriptif Setelah Pengisian Missing Values dan Standarisasi:")
print(pd.DataFrame(X_scaled, columns=X.columns).describe())


Statistik Deskriptif Sebelum Praproses:
       battery_power       blue  clock_speed     dual_sim           fc  \
count    1990.000000  2000.0000  2000.000000  2000.000000  2000.000000   
mean     1237.867839     0.4950     1.522250     0.509500     4.309500   
std       439.676025     0.5001     0.816004     0.500035     4.341444   
min       501.000000     0.0000     0.500000     0.000000     0.000000   
25%       850.250000     0.0000     0.700000     0.000000     1.000000   
50%      1225.000000     0.0000     1.500000     1.000000     3.000000   
75%      1615.000000     1.0000     2.200000     1.000000     7.000000   
max      1998.000000     1.0000     3.000000     1.000000    19.000000   

            four_g   int_memory        m_dep    mobile_wt      n_cores  \
count  2000.000000  1990.000000  2000.000000  1990.000000  2000.000000   
mean      0.521500    31.987940     0.501750   140.344221     4.520500   
std       0.499662    18.136427     0.288416    35.407114     2.287837

In [None]:
# 3. Membangun Model Klasifikasi
# a. Gunakan Decision Tree untuk klasifikasi
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.15, random_state=42)
classifier = DecisionTreeClassifier(random_state=42)
classifier.fit(X_train, y_train)

# b. Evaluasi model
y_pred = classifier.predict(X_test)
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

print("\nConfusion Matrix:")
print(conf_matrix)
print(f"Akurasi Model Klasifikasi: {accuracy:.2f}")



Confusion Matrix:
[[69 11  0  0]
 [ 7 61  4  0]
 [ 0  7 49 12]
 [ 0  0  7 73]]
Akurasi Model Klasifikasi: 0.84


In [None]:
# 4. Membangun Model Clustering
# a. Gunakan K-Means untuk clustering
kmeans = KMeans(n_clusters=4, random_state=42)
kmeans.fit(X_scaled)

# b. Hitung silhouette score
silhouette_avg = silhouette_score(X_scaled, kmeans.labels_)
print(f"\nSilhouette Score dari Model Clustering: {silhouette_avg:.2f}")


Silhouette Score dari Model Clustering: 0.06
