In [None]:
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB

In [None]:
dataC = np.array([
    ['Murah', 'Dekat', 'Tidak', 'Ya'],
    ['Sedang', 'Dekat', 'Tidak', 'Ya'],
    ['Mahal', 'Dekat', 'Tidak', 'Ya'],
    ['Mahal', 'Jauh', 'Tidak', 'Tidak'],
    ['Mahal', 'Sedang', 'Tidak', 'Tidak'],
    ['Sedang', 'Jauh', 'Ada', 'Tidak'],
    ['Murah', 'Jauh', 'Ada', 'Tidak'],
    ['Murah', 'Sedang', 'Tidak', 'Ya'],
    ['Mahal', 'Jauh', 'Ada', 'Tidak'],
    ['Sedang', 'Sedang', 'Ada', 'Ya']
])

In [None]:
def calculate_prior(dataC):
  total = len(dataC)
  ya_count = sum(1 for row in dataC if row[-1] == 'Ya')
  tidak_count = total - ya_count
  return ya_count / total, tidak_count / total

In [None]:
def calculate_categorical_prob(dataC, feature_index, value, target_value): #Fixed typo in the parameter name (targer_value -> target_value)
  filtered_data = [row for row in dataC if row[-1] == target_value] #Fixed typo in the variable name (now -> row)
  count = sum(1 for row in filtered_data if row[feature_index] == value)
  return count / len(filtered_data) if len(filtered_data) > 0 else 0

In [None]:
def naive_bayes_predict(dataC, harga, jarak, angkutan):
  prior_ya, prior_tidak = calculate_prior(dataC)

  #Probabilitas kondisional untuk 'Ya'
  p_harga_ya = calculate_categorical_prob(dataC, 0, harga, 'Ya')  #harga 0 adalah ya
  p_jarak_ya = calculate_categorical_prob(dataC, 1, jarak, 'Ya') #harga 1 untuk jarak ya
  p_angkutan_ya = calculate_categorical_prob(dataC, 2, angkutan, 'Ya')

  #Probailitas kondisional untuk Tidak
  p_harga_tidak = calculate_categorical_prob(dataC, 0, harga, 'Tidak') #harga 0 adalah tidak
  p_jarak_tidak = calculate_categorical_prob(dataC, 1, jarak, 'Tidak')
  p_angkutan_tidak = calculate_categorical_prob(dataC, 2, angkutan, 'Tidak')

  #Posterior untuk Ya dan Tidak
  p_ya = prior_ya * p_harga_ya * p_jarak_ya * p_angkutan_ya
  p_tidak = prior_tidak * p_harga_tidak * p_jarak_tidak * p_angkutan_tidak

  return 'Ya' if p_ya > p_tidak else 'Tidak', p_ya, p_tidak

In [None]:
#Prediksi untuk harga "Mahal", jarak "Sedang, dan angkutan "Ada"
prediction = naive_bayes_predict(dataC, 'Mahal', 'Sedang', 'Ada')
prediction

#berdasarkan Max A Posteriori, karena 0.036 > 0.008, maka lokasi dengan harga tanah mahal,
#jarak sedang, dan ada angkutan umum tidak dibangun perumahan

('Tidak', 0.008000000000000002, 0.036)

In [None]:
#Prediksi untuk harga "Mahal", jarak "Sedang, dan angkutan "Ada"
prediction = naive_bayes_predict(dataC, 'Murah', 'Sedang', 'Tidak')
prediction

#berdasarkan Max A Posteriori, karena 0.036 > 0.008, maka lokasi dengan harga tanah mahal,
#jarak sedang, dan ada angkutan umum tidak dibangun perumahan

('Ya', 0.06400000000000002, 0.008000000000000002)

## D.2 Dataset Categorical dan Numeric

In [None]:
#Import library
import numpy as np
from math import sqrt, exp, pi

In [None]:
#Dataset
data = [
    ['Murah', 4, 'Tidak', 'Ya'],
    ['Sedang', 9, 'Tidak', 'Ya'],
    ['Mahal',  3, 'Tidak', 'Ya'],
    ['Mahal', 20, 'Tidak', 'Tidak'],
    ['Mahal', 12, 'Tidak', 'Tidak'],
    ['Sedang', 10, 'Ada', 'Tidak'],
    ['Murah', 19, 'Ada', 'Tidak'],
    ['Murah', 7, 'Tidak', 'Ya'],
    ['Mahal', 8, 'Ada', 'Tidak'],
    ['Sedang', 2, 'Ada', 'Ya']
]

In [None]:
#Hitung distribusi Gaussian untuk fitur numerik
def calculate_gaussian_prob(x, mean, std):
  exponent = exp(-((x - mean) ** 2 / (2 * std ** 2)))
  return (1 / (sqrt(2 * pi) * std)) * exponent

#Hitung mean dan std dev untuk fitur numerik
def calculated_numerical_stats(data, feature_index, target_value):
  filtered_data = [row[feature_index] for row in data if row[-1] == target_value]
  mean = np.mean(filtered_data)
  std = np.std(filtered_data)
  return mean, std

In [None]:
#Klasifikasi Naive Bayes
def naive_bayes_predict(data, harga, jarak, angkutan):
  prior_ya, prior_tidak = calculate_prior(data)

  #Probabilitas kondisional untuk Ya
  p_harga_ya = calculate_categorical_prob(data, 0, harga, 'Ya')
  mean_ya, std_ya = calculated_numerical_stats(data, 1, 'Ya') #fitur numerik
  p_jarak_ya = calculate_gaussian_prob(jarak, mean_ya, std_ya) #fitur numerik
  p_angkutan_ya = calculate_categorical_prob(data, 2, angkutan, 'Ya')

  #Probabilitas kondisional untuk Tidak
  p_harga_tidak = calculate_categorical_prob(data, 0, harga, 'Tidak')
  mean_tidak, std_tidak = calculated_numerical_stats(data, 1, 'Tidak') #fitur numerik
  p_jarak_tidak = calculate_gaussian_prob(jarak, mean_tidak, std_tidak) #fitur numerik
  p_angkutan_tidak = calculate_categorical_prob(data, 2, angkutan, 'Tidak')

  #Posterior untuk Ya dan Tidak
  p_ya = prior_ya * p_harga_ya * p_jarak_ya * p_angkutan_ya
  p_tidak = prior_tidak * p_harga_tidak * p_jarak_tidak * p_angkutan_tidak

  return 'Ya' if p_ya > p_tidak else 'Tidak', p_ya, p_tidak

In [None]:
#Prediksi untuk harga "Murah", jarak, 5, dan angkutan "Tidak"
prediction = naive_bayes_predict(data, 'Murah', 5, 'Tidak')
prediction

('Ya', 0.02447798093110635, 0.0006293259804336087)

## D.3 Dataset Numerik dengan skLearn

In [None]:
#import library
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

In [None]:
#Read dataset
df_net = pd.read_csv('/content/new_social_network_ads.csv')
df_net.head()

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


In [None]:
#Split data into independent/ dependent variables
X = df_net.iloc[:, :-1].values #variabel bebas
y = df_net.iloc[:, -1].values #terikat

In [None]:
#train/ test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=True)

In [None]:
#scale dataset
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
#train bayes-Theorem model
classifier = GaussianNB()
classifier.fit(X_train, y_train)

In [None]:
#prediction
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred), 1), y_test.reshape(len(y_test), 1)), 1))

[[0 0]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [0 1]
 [0 0]
 [1 1]
 [1 0]
 [1 1]
 [1 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 0]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 1]
 [0 1]
 [0 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [0 1]
 [1 1]
 [0 0]
 [0 0]
 [1 0]
 [0 0]
 [1 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 0]
 [0 1]
 [1 1]
 [0 0]
 [0 0]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [0 0]
 [1 1]
 [1 1]
 [1 1]
 [0 0]
 [0 0]
 [1 1]
 [1 1]
 [0 1]
 [0 0]
 [0 0]]


In [None]:
#accuracy
accuracy_score(y_test, y_pred)

0.86

In [None]:
#classification report
print(f'Clssification report : \n{classification_report(y_test, y_pred)}')

Clssification report : 
              precision    recall  f1-score   support

           0       0.88      0.88      0.88        58
           1       0.83      0.83      0.83        42

    accuracy                           0.86       100
   macro avg       0.86      0.86      0.86       100
weighted avg       0.86      0.86      0.86       100



In [None]:
#klasifikasi uji
print(classifier.predict(sc.transform([[50, 87000]])))

[1]
