## Cek data

In [None]:
!pip install Sastrawi

import tensorflow as tf
import numpy as np
import joblib
import nltk
nltk.download('stopwords')
nltk.download('punkt')
from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from tensorflow.keras.preprocessing.sequence import pad_sequences

def preprocess_new_data(new_data):
    import pandas as pd
    import re

    # Mengatur parameter stop_words dan stemmer
    factory = StemmerFactory()
    stemmer = factory.create_stemmer()
    stop_words = set(stopwords.words('indonesian'))

    def clean_text(text):
        text = str(text)
        text = re.sub(r'\d+', ' ', text)
        text = text.lower()
        text = re.sub(r'\n', ' ', text)
        text = re.sub(r'[^\w\s]', ' ', text)
        tokens = word_tokenize(text)
        tokens = [word for word in tokens if word not in stop_words]
        tokens = [stemmer.stem(word) for word in tokens]
        return ' '.join(tokens)

    new_df = pd.DataFrame([new_data])
    new_df['Note'].fillna('', inplace=True)

    # Preprocessing teks
    new_df['Note'] = new_df['Note'].apply(clean_text)

    activity_weights = {
        'TASK': 1,
        'CALL': 2,
        'DEADLINE': 1,
        'EMAIL': 1,
        'MEETING': 3
    }
    new_df['Activity Score'] = new_df.apply(lambda row: activity_weights.get(row['Type Activity'], 0), axis=1)

    return new_df

Collecting Sastrawi
  Downloading Sastrawi-1.0.1-py2.py3-none-any.whl.metadata (909 bytes)
Downloading Sastrawi-1.0.1-py2.py3-none-any.whl (209 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/209.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m209.7/209.7 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: Sastrawi
Successfully installed Sastrawi-1.0.1


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [None]:
def extract_new_features(new_df, tokenizer, max_sequence_length):
    # Tokenisasi dan padding
    sequences = tokenizer.texts_to_sequences(new_df['Note'])
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

    X_new = np.hstack((new_df[['Activity Score']].values, padded_sequences))
    return X_new

In [None]:
def predict_new_data(new_data, model, tokenizer, max_sequence_length):
    new_df = preprocess_new_data(new_data)
    X_new = extract_new_features(new_df, tokenizer, max_sequence_length)

    y_pred = model.predict(X_new)
    y_pred_class = np.argmax(y_pred, axis=1)

    # Mapping angka ke label asli
    label_mapping = {4: 'LOST', 0: 'COLD', 1: 'WARM', 2: 'HOT', 3: 'DEAL'}
    #label_mapping = {0: 'LOST', 1: 'COLD', 2: 'WARM', 3: 'HOT', 4: 'DEAL'}
    predicted_label = label_mapping[y_pred_class[0]]

    return predicted_label

In [None]:
if __name__ == "__main__":
    # Data baru
    new_data = {
        'Deal Name': 'New Deal 1',
        'Type Activity': 'TASK',
        'Note': 'menunggu informasi selanjutnya'
    }

    # Memuat model yang telah disimpan
    model = tf.keras.models.load_model('modelLSTMUpdateTry.h5')
    tokenizer = joblib.load('tokenizerUpdate (2).pkl')
    max_sequence_length = 100

    predicted_label = predict_new_data(new_data, model, tokenizer, max_sequence_length)

    print("Predicted Label for the new deal:", predicted_label)



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 374ms/step
Predicted Label for the new deal: WARM
