In [1]:
import pandas as pd
import re
import pickle
import sqlite3
import nltk

from flask import Flask, jsonify
app = Flask(__name__)

from flask import request
from flasgger import Swagger, LazyJSONEncoder, swag_from

from Sastrawi.Stemmer.StemmerFactory import StemmerFactory
from sklearn.feature_extraction.text import CountVectorizer
import numpy as np
from keras.models import load_model
from keras.models import load_model
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

app.json_encoder = LazyJSONEncoder

swagger_template = {
    "info": {
        "title": "API Documentation Platinum Challenge",
        "version": "1.0.0",
        "description": "Dokumentasi API untuk Prediksi Sentimen",
    },
    "host": "127.0.0.1:5000",
}
swagger_config = {
    "headers": [],
    "specs": [
        {"endpoint": "docs", "route": "/docs.json",}
    ],
    "static_url_path": "/flasgger_static",
    "swagger_ui": True,
    "specs_route": "/docs/",
}
swagger = Swagger(app, template=swagger_template, config=swagger_config)

# Inisialisasi Database
def initialize_database():
    conn = sqlite3.connect('goldchallenge_database.db')
    cursor = conn.cursor()

    # Buat tabel jika belum ada
    cursor.execute('''
        CREATE TABLE IF NOT EXISTS cleaned_data (
            id INTEGER PRIMARY KEY AUTOINCREMENT,
            original_text TEXT,
            cleaned_text TEXT
        )
    ''')

    conn.commit()
    conn.close()

# Panggil fungsi inisialisasi pada saat aplikasi dijalankan
initialize_database()

# Load kamus kata alay dari kamusalay.csv
df_stopword = pd.read_csv('/Users/gerrylorinanto/Gelo/Gerry/Binar/Platinum Challenge/stopwords_indonesian.csv', encoding = 'latin1')
df_alay = pd.read_csv('/Users/gerrylorinanto/Gelo/Gerry/Binar/Platinum Challenge/new_kamusalay.csv',  encoding = 'latin1')

#cleansing
def preprocess_text(text):
    #Menghapus USER,RT,URL
    text = re.sub(r'USER|\bRT\b|URL',' ',text)
    
    # lower text
    text = text.lower()
    
    #Menghapus /n
    text =  re.sub(r'\\n',' ',text)
    
    # Hapus emotikon dan karakter khusus
    text = re.sub(r'[^\w\d\s]', '', text)

    # mengganti spasi yang berlebihan
    text = re.sub(r'\s+', ' ', text)

    # Menghapus kata dan huruf yang bergabung
    text = re.sub(r'\w*\d\w*', '', text)
    
    # Mengganti kata yang berulang
    text = re.sub(r'\b(\w+)\1\b', r'\1', text)
    return text

#removing stopwords
stopwords = df_stopword['Stopword'].tolist()
def remove_stopwords(text):
    list_stopwords = text.split()
    return ' '.join([text for text in list_stopwords if text not in stopwords])

#normalization
kamus_alay = dict(zip(df_alay['anakjakartaasikasik'], df_alay['anak jakarta asyik asyik']))
def normalize(text):
    for word in kamus_alay:
        return ' '.join([kamus_alay[word] if word in kamus_alay else word for word in text.split(' ')])

def cleansing_text(text):
    text = preprocess_text(text)
    text = remove_stopwords(text)
    text = normalize(text)
    return text

def preprocessing_text(text):
    factory = StemmerFactory()
    stemmer = factory.create_stemmer()
    
    def stem_text(tokens):
        return stemmer.stem(tokens)

    text = text.apply(stem_text)
    return text

max_features = 100000
tokenizer = Tokenizer(num_words=max_features, split=' ', lower=True)
sentiment = ['negative', 'neutral', 'positive']

count_vectorizer = CountVectorizer()

file = open('/Users/gerrylorinanto/Gelo/Gerry/Binar/Platinum Challenge/Resource LSTM/x_pad_sequences.pickle','rb')
feature_file_from_lstm = pickle.load(file)

model_file_from_lstm = load_model('/Users/gerrylorinanto/Gelo/Gerry/Binar/Platinum Challenge/Model LSTM/model_lstm.h5')

file = open('//Users/gerrylorinanto/Gelo/Gerry/Binar/Platinum Challenge/Resource MLP/feature.p','rb')
feature_file_from_MLP = pickle.load(file)

#model_file_from_MLP = load_model('/Users/gerrylorinanto/Gelo/Gerry/Binar/Platinum Challenge/Model MLP/model_MLP.pickle')
with open('/Users/gerrylorinanto/Gelo/Gerry/Binar/Platinum Challenge/Model MLP/model_MLP.pickle', 'rb') as file:
    model_file_from_MLP = pickle.load(file)

@swag_from(
    "/Users/gerrylorinanto/Gelo/Gerry/Binar/Platinum Challenge/Docs YML/team.yml", methods=['GET'],
)
@app.route('/', methods=['GET'])
def hello_world():
    json_response = {
        "Kelompok 2 DSC Wave 15",
        "PLATINUM - CHALLENGE"
    }
    response_data = jsonify({"kelompok": list(json_response)})  # Mengonversi set menjadi list
    return response_data

@swag_from(
    '/Users/gerrylorinanto/Gelo/Gerry/Binar/Platinum Challenge/Docs YML/text_LSTM.yml', methods=['POST'],
)
@app.route('/text_sentiment_LSTM', methods=['POST'])
def text_sentiment_LSTM():
    original_text = request.form.get('text')
    text = [cleansing_text(original_text)]  # Fix the function name
    feature = tokenizer.texts_to_sequences(text)
    feature = pad_sequences(feature, maxlen=feature_file_from_lstm.shape[1])
    prediction = model_file_from_lstm.predict(feature)
    get_sentiment = sentiment[np.argmax(prediction[0])]

    json_response = {
        "status_code": 200,
        "description": "Teks yang akan diproses",
        "data": {
            'text': original_text,
            'sentiment': get_sentiment
        }
    }

    response_data = jsonify(json_response)
    return response_data

@swag_from(
    '/Users/gerrylorinanto/Gelo/Gerry/Binar/Platinum Challenge/Docs YML/text_MLP.yml', methods=['POST'],
)
@app.route('/text_sentiment_countvectorizer', methods=['POST'])
def text_sentiment_countvectorizer():
    original_text = request.form.get('text')
    text = [cleansing_text(original_text)]

    # Menggunakan CountVectorizer untuk ekstraksi fitur
    feature = count_vectorizer.transform(text)

    # Memprediksi sentimen menggunakan model neural network
    predictions = model_file_from_MLP.predict(feature)
    get_sentiment = sentiment[np.argmax(predictions[0])]

    json_response = {
        "status_code": 200,
        "description": "Teks yang akan diproses menggunakan CountVectorizer",
        "data": {
            'text': original_text,
            'sentiment': get_sentiment
        }
    }

    response_data = jsonify(json_response)
    return response_data


@swag_from(
    '/Users/gerrylorinanto/Gelo/Gerry/Binar/Platinum Challenge/Docs YML/file_LSTM.yml', methods=['POST'],
)
@app.route('/File_Sentiment_LSTM', methods=['POST'])
def File_Sentiment_LSTM(): 
    file = request.files.getlist("file")[0]
    df = pd.read_csv(file, encoding="ISO-8859-1")
    
    texts = df['text'].apply(cleansing_text)
    features = tokenizer.texts_to_sequences(texts)
    features = pad_sequences(features, maxlen=feature_file_from_lstm.shape[1])
    predictions = model_file_from_lstm.predict(features)
    get_sentiments = [sentiment[np.argmax(pred)] for pred in predictions]

    json_response = {
        "status_code": 200,
        "description": "Data yang akan diproses dari file CSV",
        "data": {
            'texts': list(texts),
            'sentiments': get_sentiments
        }
    }
    
    response_data = jsonify(json_response)
    return response_data

if __name__ == "__main__":
    app.run()


  from pandas.core import (


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [04/Mar/2024 10:55:46] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2024 10:55:48] "GET /docs/ HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2024 10:55:48] "[36mGET /flasgger_static/swagger-ui-bundle.js HTTP/1.1[0m" 304 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2024 10:55:48] "[36mGET /flasgger_static/swagger-ui.css HTTP/1.1[0m" 304 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2024 10:55:48] "[36mGET /flasgger_static/swagger-ui-standalone-preset.js HTTP/1.1[0m" 304 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2024 10:55:48] "[36mGET /flasgger_static/lib/jquery.min.js HTTP/1.1[0m" 304 -
INFO:werkzeug:127.0.0.1 - - [04/Mar/2024 10:55:48] "GET /docs.json HTTP/1.1" 200 -
ERROR:__main__:Exception on /text_sentiment_countvectorizer [POST]
Traceback (most recent call last):
  File "/Users/gerrylorinanto/anaconda3/lib/python3.11/site-packages/flask/app.py", line 1463, in wsgi_a