In [1]:
# Import required modules
# pandas is a module to work with dataframes
import pandas as pd

In [2]:
# csv_file_path is a variable that contains the path of the CSV file that the program will create
# to store the result of scraped data
#
# Replace the value of the variable with the path where you want to store the CSV file
csv_file_path: str = "iphone_15_pro_reviews.csv"

In [3]:
# Read the CSV file and store the data in a dataframe
df = pd.read_csv(csv_file_path)

# Print the csv file as a dataframe
df

Unnamed: 0,Review,Label
0,barang ori cepat proses,neutral
1,packing aman unit kondisi,neutral
2,bagus,neutral
3,jual responsif produk terima sesuai pesan reco...,positive
4,barang original bagus cepat rekomendasi,positive
5,packingnya aman barang segel dapet diskon juta...,neutral
6,barang mulus darat,neutral
7,ok,positive
8,barang terima normal ios update moga lancar te...,neutral
9,barang sdh tp test diupdate seller responsif k...,neutral


In [4]:
# sklearn is a module to work with machine learning models
# train_test_split is a function to split the data into training and testing sets
from sklearn.model_selection import train_test_split

# CountVectorizer is a function to convert text data into numerical data
from sklearn.feature_extraction.text import CountVectorizer

# MultinomialNB is a function to create a Multinomial Naive Bayes model
from sklearn.naive_bayes import MultinomialNB

# accuracy_score is a function to calculate the accuracy of the model
# classification_report is a function to generate a classification report
from sklearn.metrics import accuracy_score, classification_report

In [5]:
# Split the data into training and testing sets
# x_train is the training data
# x_test is the testing data
# y_train is the training labels
# y_test is the testing labels
x_train, x_test, y_train, y_test = train_test_split(
    df["Review"], df["Label"], test_size=0.2, random_state=42
)

In [6]:
# Print the number of training and testing data
print(f"Training data: {len(x_train)}")
print(f"Testing data: {len(x_test)}")

Training data: 36
Testing data: 10


In [7]:
# Create a CountVectorizer object
vectorizer = CountVectorizer()

In [10]:
# Convert the text data into numerical data using the CountVectorizer object
x_train_vectorized = vectorizer.fit_transform(x_train)
x_test_vectorized = vectorizer.transform(x_test)

In [11]:
# Create a Multinomial Naive Bayes model
model = MultinomialNB()

# Train the model using the training data
model.fit(x_train_vectorized, y_train)

In [12]:
# Predict the labels of the testing data
predictions = model.predict(x_test_vectorized)

# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, predictions)

In [13]:
# Print the model accuracy
print(f"Accuracy: {accuracy}")

# Print the accuracy of the model
print(f"Clasification report: {classification_report(y_test, predictions)}")

Accuracy: 0.8
Clasification report:               precision    recall  f1-score   support

     neutral       0.67      1.00      0.80         4
    positive       1.00      0.67      0.80         6

    accuracy                           0.80        10
   macro avg       0.83      0.83      0.80        10
weighted avg       0.87      0.80      0.80        10

