**Implementasi Data Prediksi Cuaca dengan Python**

In [1]:
from sklearn import preprocessing
import numpy as np

# generating the GaussianNB model
from sklearn.naive_bayes import GaussianNB

# assign features and encoding labels
weather = ['Rainy', 'Rainy', 'Overcast', 'Sunny', 'Sunny', 'Sunny', 'Overcast', 'Rainy',
           'Rainy', 'Sunny', 'Rainy', 'Overcast', 'Overcast', 'Sunny']

temp = ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild',
        'Mild', 'Mild', 'Hot', 'Mild']

humidity = ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High',
            'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High']

windy = ['f', 't', 'f', 'f', 'f', 't', 't', 'f', 'f', 'f', 't', 't', 'f', 't']

labelClass = ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']

In [2]:
# creating LabelEncoder
le = preprocessing.LabelEncoder()

# converting string labels into numbers
weather_encoded = le.fit_transform(weather)
hum_encoded = le.fit_transform(humidity)
temp_encoded = le.fit_transform(temp)
wind_encoded = le.fit_transform(windy)
label = le.fit_transform(labelClass)
print(weather_encoded, temp_encoded, hum_encoded, wind_encoded, label)

[1 1 0 2 2 2 0 1 1 2 1 0 0 2] [1 1 1 2 0 0 0 2 0 2 2 2 1 2] [0 0 0 0 1 1 1 0 1 1 1 0 1 0] [0 1 0 0 0 1 1 0 0 0 1 1 0 1] [0 0 1 1 1 0 1 0 1 1 1 1 1 0]


In [3]:
# combining weather and humidity in a single tuple as features
features = list(zip(weather_encoded, temp_encoded, hum_encoded, wind_encoded))
print(features)

[(1, 1, 0, 0), (1, 1, 0, 1), (0, 1, 0, 0), (2, 2, 0, 0), (2, 0, 1, 0), (2, 0, 1, 1), (0, 0, 1, 1), (1, 2, 0, 0), (1, 0, 1, 0), (2, 2, 1, 0), (1, 2, 1, 1), (0, 2, 0, 1), (0, 1, 1, 0), (2, 2, 0, 1)]


In [4]:
# create a Gaussian Classifier
model = GaussianNB()
model.fit(features, label) # train the model using training set

# data test : Sunny, Hot, Normal, False
X_tes = [[2,1,1,0]]

# predict output
# ''' For Weather : 0:Overcast, 2:Sunny, 1:Rainy ''' For Humidity : 0:High, 1:Normal
# For temp 0:Cool, 1:Hot, 2:Mild ''' For Windy : 0 :f, 1: t
predicted = model.predict(X_tes)
print(" ")
print("today : ")
print(predicted) # --> [1] that means yes, the player should bat first and [0] that means No, player should bowl first

 
today : 
[1]


# **Implementasi data Riil dengan Klasifikasi Naive Bayes**

In [8]:
! gdown --id 1WGj259K6yNj8Ww39B0ZzUx9A4tj43ziT

Downloading...
From: https://drive.google.com/uc?id=1WGj259K6yNj8Ww39B0ZzUx9A4tj43ziT
To: /content/Social_Network_Ads.csv
100% 4.90k/4.90k [00:00<00:00, 6.66MB/s]


In [9]:
import numpy as np
import pandas as pd

# load data csv
df = pd.read_csv('/content/Social_Network_Ads.csv')

# cek data
display(df.head())

# memisahkan fitur dengan label
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

Unnamed: 0,Age,EstimatedSalary,Purchased
0,19,19000,0
1,35,20000,0
2,26,43000,0
3,27,57000,0
4,19,76000,0


In [10]:
# split data training dan testing

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=30)

In [11]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# inisiasi obyek GaussianNB
gnb = GaussianNB()

# fit model
# label y harus dalam bentuk ID atau n_samples
gnb.fit(X_train, y_train)

# prediksi dengan data training
y_train_pred = gnb.predict(X_train)

# evaluasi akurasi training
acc_train = accuracy_score(y_train, y_train_pred)

# prediksi dengan data testing
y_test_pred = gnb.predict(X_test)

# evaluasi akurasi testing
acc_test = accuracy_score(y_test, y_test_pred)

print(f'Hasil akurasi data training : {acc_train}')
print(f'Hasil akurasi data testing : {acc_test}')

Hasil akurasi data training : 0.8964285714285715
Hasil akurasi data testing : 0.8833333333333333


In [12]:
from sklearn.preprocessing import StandardScaler

# inisiasi obyek StandardScaler
scaler = StandardScaler()

# standarisasi pada fitur di X_train dan X_test
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)

In [13]:
# inisiasi obyek GaussianNB
gnb_std = GaussianNB()

# fit model
# label y harus dalam bentuk ID atau n_samples
gnb_std.fit(X_train_std, y_train)

# prediksi dengan data training
y_train_std_pred = gnb_std.predict(X_train_std)

# evaluasi akurasi training
acc_train_std = accuracy_score(y_train, y_train_std_pred)

# prediksi dengan data testing
y_test_std_pred = gnb_std.predict(X_test_std)

# evaluasi akurasi testing
acc_test_std = accuracy_score(y_test, y_test_std_pred)

print(f'Hasil akurasi data training : {acc_train_std}')
print(f'Hasil akurasi data testing : {acc_test_std}')

Hasil akurasi data training : 0.9
Hasil akurasi data testing : 0.8833333333333333


# **Naive Bayes dengan Data Multinomial**

In [14]:
! gdown --id 1vYd2wZ8ZHTN8ZK45D7Yj3xuh5eCMu_xx

Downloading...
From: https://drive.google.com/uc?id=1vYd2wZ8ZHTN8ZK45D7Yj3xuh5eCMu_xx
To: /content/spam.csv
100% 504k/504k [00:00<00:00, 123MB/s]


In [15]:
import numpy as np
import pandas as pd

#load data ke Csv
df = pd.read_csv("/content/spam.csv", encoding='latin-1')

df.head()

Unnamed: 0,v1,v2,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,ham,"Go until jurong point, crazy.. Available only ...",,,
1,ham,Ok lar... Joking wif u oni...,,,
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...,,,
3,ham,U dun say so early hor... U c already then say...,,,
4,ham,"Nah I don't think he goes to usf, he lives aro...",,,


In [16]:
#Drop 3 kolom terakhir dengan fungsi iloc
df = df.drop(df.iloc[:, 2:], axis=1)

#Cek data
df.head()

Unnamed: 0,v1,v2
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [17]:
#Data untuk rename kolom v1 dan v2
new_cols = {
    'v1': 'Labels',
    'v2': 'SMS'
}

#Rename nama kolom v1 dan v2
df = df.rename(columns=new_cols)

#Cek data
df.head()

Unnamed: 0,Labels,SMS
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."


In [18]:
#Cek jumlah Data Per Kelas
print(df['Labels'].value_counts())
print('\n')

#Cek Kelengkapan Data
print(df.info())
print('\n')

#Cek Statistik Deskriptif
print(df.describe())

ham     4825
spam     747
Name: Labels, dtype: int64


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5572 entries, 0 to 5571
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Labels  5572 non-null   object
 1   SMS     5572 non-null   object
dtypes: object(2)
memory usage: 87.2+ KB
None


       Labels                     SMS
count    5572                    5572
unique      2                    5169
top       ham  Sorry, I'll call later
freq     4825                      30


In [19]:
#Data untuk label
new_labels = {
    'spam': 1,
    'ham': 0
}

#Encode label
df['Labels'] = df['Labels'].map(new_labels)

#Cek data
df.head()

Unnamed: 0,Labels,SMS
0,0,"Go until jurong point, crazy.. Available only ..."
1,0,Ok lar... Joking wif u oni...
2,1,Free entry in 2 a wkly comp to win FA Cup fina...
3,0,U dun say so early hor... U c already then say...
4,0,"Nah I don't think he goes to usf, he lives aro..."


In [21]:
X = df['SMS'].values
y = df['Labels'].values

In [22]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer

#Split data training dan testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=50)

#menginisiasi CountVectorizer dengan stop_words
bow = CountVectorizer()

#fitting dan transform X_train dengan CountVectorizer
X_train = bow.fit_transform(X_train)

#transform X_test
X_test = bow.transform(X_test)


In [23]:
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score

# Inisiasi MultinominalNB
mnb = MultinomialNB()

# Fit model
mnb.fit(X_train, y_train)

# Prediksi dengan data training
y_pred_train = mnb.predict(X_train)

# Evaluasi akurasi data training
acc_train = accuracy_score(y_train, y_pred_train)

# Prediksi dengan data testing
y_pred_test = mnb.predict(X_test)

# Evaluasi akurasi data testing
acc_test = accuracy_score(y_test, y_pred_test)

# Print akurasi
print(f'Hasil akurasi data train: {acc_train}')
print(f'Hasil akurasi data test: {acc_test}')


Hasil akurasi data train: 0.9946152120260264
Hasil akurasi data test: 0.9775784753363229
