In [2]:
import re
import logging
import pandas as pd
from flask import Flask, jsonify, request
from flasgger import Swagger, LazyJSONEncoder, swag_from

app = Flask(__name__)

# Membaca file csv daftar kata baku dan tidak baku
df_kamusalay = pd.read_csv('C:/Users/ROG/Documents/Data-20231031T003049Z-001/project-hate-speech/Asset Challenge/new_kamusalay.csv', encoding='latin-1', header=None)
df_kamusalay.columns = ["tidak_baku", "baku"]
kamus_alay = dict(zip(df_kamusalay["tidak_baku"], df_kamusalay["baku"]))

# Membaca file csv kata kasar
df_abusive = pd.read_csv('C:/Users/ROG/Documents/Data-20231031T003049Z-001/project-hate-speech/Asset Challenge/abusive.csv')

# Mengambil daftar kata kasar
kata_abusive = df_abusive['ABUSIVE'].tolist()

app.json_encoder = LazyJSONEncoder
swagger_template = {
    "info": {
        "title": "API Documentation for Data Processing and Modeling",
        "version": "1.0.0",
        "description": "Dokumentasi API untuk Data Processing dan Modeling",
    },
    "host": "127.0.0.1:5000"
}

swagger_config = {
    "headers": [],
    "specs": [
        {
            "endpoint": 'docs',
            "route": '/docs.json',
        }
    ],
    "static_url_path": "/flasgger_static",
    "swagger_ui": True,
    "specs_route": "/docs/"
}

swagger = Swagger(app, template=swagger_template, config=swagger_config)

@swag_from('C:/Users/ROG/Documents/Data-20231031T003049Z-001/project-hate-speech/docs/hello_world.yml', methods=['GET'])
@app.route('/', methods=['GET'])
def hello_world():
    json_response = {
        'status_code': 200,
        'description': "Menyapa Hello World",
        'data': "Hello World",
    }
    return jsonify(json_response)

@swag_from('C:/Users/ROG/Documents/Data-20231031T003049Z-001/project-hate-speech/docs/text.yml', methods=['GET'])
@app.route('/text', methods=['GET'])
def text():
    json_response = {
        'status_code': 200,
        'description': "Original Teks",
        'data': "Text",
    }
    return jsonify(json_response)

@swag_from('C:/Users/ROG/Documents/Data-20231031T003049Z-001/project-hate-speech/docs/text_clean.yml', methods=['GET'])
@app.route('/text_clean', methods=['GET'])
def text_clean():
    json_response = {
        'status_code': 200,
        'description': "Teks yang sudah dibersihkan",
        'data': "Text Clean",
    }
    return jsonify(json_response)

@swag_from('C:/Users/ROG/Documents/Data-20231031T003049Z-001/project-hate-speech/docs/text_processing.yml', methods=['POST'])
@app.route('/text_processing', methods=['POST'])
def text_processing():
    text = request.form.get('text')
    json_response = {
        'status_code': 200,
        'description': "Teks yang sudah diproses",
        'data': re.sub(r'[^a-zA-Z0-9]', ' ', text)
    }
    return jsonify(json_response)


@swag_from("C:/Users/ROG/Documents/Data-20231031T003049Z-001/project-hate-speech/docs/text_processing_file.yml", methods=['POST'])
@app.route('/text-processing-file', methods=['POST'])
def text_processing_file():
    global post_df

    # Menggunakan request.files['file'] untuk mendapatkan file yang diunggah
    file = request.files['file']

    # IMPORT FILE OBJECT INTO PANDAS DATAFRAME (Anda bisa menyertakan nrows jika perlu)
    post_df = pd.read_csv(file, encoding='latin-1')

    # Menjaga DataFrame, tidak mengubahnya menjadi Series
    post_df = post_df[['Tweet']]

    # DROP DUPLICATED TWEETS
    post_df.drop_duplicates(inplace=True)

    # CREATE NEW NUMBER OF CHARACTERS (NO_CHAR) COLUMN THAT CONSISTS OF LENGTH OF TWEET CHARACTERS
    post_df['no_char'] = post_df['Tweet'].apply(len)

    # CREATE NEW NUMBER OF WORDS (NO_WORDS) COLUMN THAT CONSISTS OF NUMBER OF WORDS OF EACH TWEET
    post_df['no_words'] = post_df['Tweet'].apply(lambda x: len(x.split()))

#   CREATE A FUNCTION TO CLEAN DATA FROM ANY NON ALPHA-NUMERIC (AND NON-SPACE) CHARACTERS, AND STRIP IT FROM LEADING/TRAILING SPACES
    def tweet_cleansing(x):
        tweet = x
        word_tweet = tweet.split()
        cleaned_tweet = re.sub(r'\\x[a-f0-9]{2}', '', ' '.join(word_tweet))
        # untuk membersihkan non alphanumeric
        cleaned_tweet = re.sub(r'[^a-zA-Z0-9 ]', '', cleaned_tweet).lower()
        kata_bersih = [kamus_alay.get(word, word) for word in cleaned_tweet.split()]
        kata_bersih = [word for word in kata_bersih if word not in kata_abusive]
        return ' '.join(kata_bersih)

    result = {
        'status_code': 200,
        'message': 'Successful response',
        'data': {
            'no_char': post_df['no_char'].tolist(),
            'no_words': post_df['no_words'].tolist(),
            'cleaned_tweets': post_df['Tweet'].apply(tweet_cleansing).tolist()
        }
    }

    return jsonify(result)

if __name__ == '__main__':
    app.run()


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [30/Nov/2023 21:14:27] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [30/Nov/2023 21:14:27] "GET /favicon.ico HTTP/1.1" 404 -
127.0.0.1 - - [30/Nov/2023 21:14:29] "GET /docs/ HTTP/1.1" 200 -
127.0.0.1 - - [30/Nov/2023 21:14:29] "GET /flasgger_static/swagger-ui.css HTTP/1.1" 304 -
127.0.0.1 - - [30/Nov/2023 21:14:29] "GET /flasgger_static/swagger-ui-bundle.js HTTP/1.1" 304 -
127.0.0.1 - - [30/Nov/2023 21:14:29] "GET /flasgger_static/lib/jquery.min.js HTTP/1.1" 304 -
127.0.0.1 - - [30/Nov/2023 21:14:29] "GET /flasgger_static/swagger-ui-standalone-preset.js HTTP/1.1" 304 -
127.0.0.1 - - [30/Nov/2023 21:14:30] "GET /docs.json HTTP/1.1" 200 -
127.0.0.1 - - [30/Nov/2023 21:15:00] "POST /text-processing-file HTTP/1.1" 200 -
