<a href="https://colab.research.google.com/github/dhani43/KNN-Model-TFRF-Dinamic-Crawling-Youtube/blob/main/Implementasi_Sistem.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**1. INSTALL REQUIREMENT**

In [None]:
!pip install pandas numpy scikit-learn openpyxl nltk google-api-python-client Sastrawi

**2. IMPORT REQUIREMENT**

In [None]:
import pandas as pd
import numpy as np
import re
import nltk
import pickle
import os
import time
from datetime import datetime
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report
from google.colab import files, drive
from googleapiclient.discovery import build

**3. FUNGSI LOAD MODEL KNN, PREPROCESSING TEXT, CRAWLING DATA YOUTUBE, DAN PREDIKSI SENTIMEN**

In [None]:
# Fungsi untuk memuat model dan vectorizer
def load_models():
    with open(drive_path + "knn_sentiment.pkl", "rb") as f:
        sentiment_model = pickle.load(f)
    with open(drive_path + "knn_function.pkl", "rb") as f:
        function_model = pickle.load(f)
    with open(drive_path + "vectorizer.pkl", "rb") as f:
        vectorizer = pickle.load(f)
    with open(drive_path + "tf_transformer.pkl", "rb") as f:
        tf_transformer = pickle.load(f)
    with open(drive_path + "sentiment_encoder.pkl", "rb") as f:
        sentiment_encoder = pickle.load(f)
    with open(drive_path + "function_encoder.pkl", "rb") as f:
        function_encoder = pickle.load(f)

    print("‚úÖ Model dan vectorizer berhasil dimuat!")
    return sentiment_model, function_model, vectorizer, tf_transformer, sentiment_encoder, function_encoder

# Fungsi untuk membersihkan teks
def preprocesses_text(text):
    casefolded_text = text.lower()
    cleaned_text = re.sub(r'@\w+|http\S+|www\.\S+|<.*?>|[^\w\s]', ' ', casefolded_text)
    cleaned_text = cleaned_text.strip()
    tokens = word_tokenize(cleaned_text)
    filtered = [word for word in tokens if word not in stop_words]
    stemmed = [stemmer.stem(word) for word in filtered]
    final_text = ' '.join(stemmed)
    return {
        'casefolded_text': casefolded_text,
        'cleaned_text': cleaned_text,
        'tokens': tokens,
        'filtered': filtered,
        'stemmed': stemmed,
        'final_text': final_text
    }

results = df[text_column].astype(str).apply(preprocesses_text)
df['casefolded_text'] = results.apply(lambda x: x['casefolded_text'])
df['cleaned_text'] = results.apply(lambda x: x['cleaned_text'])
df['tokens'] = results.apply(lambda x: x['tokens'])
df['filtered'] = results.apply(lambda x: x['filtered'])
df['stemmed'] = results.apply(lambda x: x['stemmed'])
df['final_text'] = results.apply(lambda x: x['final_text'])

# Fungsi untuk mengambil komentar dari YouTube
def get_video_comments(api_key, video_id):
    try:
        youtube = build('youtube', 'v3', developerKey=api_key)
        all_comments = []
        next_page_token = None
        count = 0

        while True:
            response = youtube.commentThreads().list(
                part="snippet",
                videoId=video_id,
                textFormat="plainText",
                pageToken=next_page_token
            ).execute()

            for item in response.get("items", []):
                comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
                processed = preprocesses_text(comment)
                all_comments.append(processed['final_text'])
                count += 1

            next_page_token = response.get("nextPageToken")
            if not next_page_token:
                break

        print(f"\n‚úÖ Berhasil mengambil {count} komentar dari video YouTube.")

        if not all_comments:
            print("‚ö† Tidak ada komentar yang ditemukan.")

        return all_comments
    except Exception as e:
        print(f"‚ùå Terjadi kesalahan saat mengambil komentar: {e}")
        return []

# Fungsi untuk melakukan prediksi sentimen dan fungsi
def predict_comments(comments, sentiment_model, function_model, vectorizer, tf_transformer, sentiment_encoder, function_encoder):
    if not comments:
        print("‚ö† Tidak ada komentar yang dapat diprediksi.")
        return []

    # Vectorisasi komentar
    X_counts = vectorizer.transform(comments)
    X_tf = tf_transformer.transform(X_counts)

    # Prediksi sentimen
    sentiment_predictions = sentiment_model.predict(X_tf)
    sentiment_labels = sentiment_encoder.inverse_transform(sentiment_predictions)

    # Prediksi fungsi
    function_predictions = function_model.predict(X_tf)
    function_labels = function_encoder.inverse_transform(function_predictions)

    print("\nüéØ HASIL PREDIKSI SENTIMEN DAN FUNGSI:")
    results = []
    for comment, sentiment, function in zip(comments, sentiment_labels, function_labels):
        print(f"üó® Komentar: {comment}\nüîπ Sentimen: {sentiment}\nüîπ Fungsi: {function}\n")
        results.append({
            'comment': comment,
            'predicted_sentiment': sentiment,
            'predicted_function': function
        })
    return results

**4. PREDIKSI SENTIMEN GADGET**

In [None]:
def main():
    api_key = "AIzaSyC216MP_3O1-VblW-zDAxweSUuAoRJ1U2I"
    video_id = "Mari1pJzhWM"
    interval = 150  # Waktu tunggu (dalam detik)

    print("üöÄ Memuat model dan vectorizer...")
    sentiment_model, function_model, vectorizer, tf_transformer, sentiment_encoder, function_encoder = load_models()

    while True:
        print(f"\n‚è≥ Mengambil komentar pada {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}...")
        comments = get_video_comments(api_key, video_id)

        if not comments:
            print("‚ö† Tidak ada komentar yang diambil. Menunggu periode berikutnya...")
        else:
            print("\nüìä Melakukan prediksi sentimen dan fungsi...")
            predictions = predict_comments(comments, sentiment_model, function_model, vectorizer, tf_transformer, sentiment_encoder, function_encoder)
            predictions_df = pd.DataFrame(predictions)
            print("\n‚úÖ Prediksi selesai!")

            if not predictions_df.empty:
                positif_df = predictions_df[predictions_df['predicted_sentiment'] == 'Positif']
                counts = positif_df['predicted_function'].value_counts()

                print("\nüìà Jumlah komentar dengan sentimen positif per fungsi gadget:")
                for fungsi, jumlah in counts.items():
                    print(f"   - {fungsi}: {jumlah} komentar positif")

        print(f"üïí Menunggu {interval / 60} menit sebelum mengambil komentar lagi...\n")
        time.sleep(interval)

if __name__ == "__main__":
    main()