In [1]:
import re
import pandas as pd
from flask import request
from flasgger import Swagger, LazyString, LazyJSONEncoder
from flasgger import swag_from
from flask import Flask, jsonify
import sqlite3


In [2]:
df_abussive = pd.read_csv('asset-challenges/abusive.csv')
df_abussive

Unnamed: 0,ABUSIVE
0,alay
1,ampas
2,buta
3,keparat
4,anjing
...,...
120,rezim
121,sange
122,serbet
123,sipit


In [3]:
df_kamusalay = pd.read_csv('asset-challenges/new_kamusalay.csv', header=None, names=['kata_alay', 'kata_baku'],encoding='latin-1')
df_kamusalay

Unnamed: 0,kata_alay,kata_baku
0,anakjakartaasikasik,anak jakarta asyik asyik
1,pakcikdahtua,pak cik sudah tua
2,pakcikmudalagi,pak cik muda lagi
3,t3tapjokowi,tetap jokowi
4,3x,tiga kali
...,...,...
15162,mendikbud,menteri pendidikan dan kebudayaan
15163,mendag,menteri perdagangan
15164,menaker,menteri tenaga kerja
15165,memetwit,mentwit


In [4]:
df_data = pd.read_csv('asset-challenges/data.csv',encoding='latin-1')
df_data

Unnamed: 0,Tweet,HS,Abusive,HS_Individual,HS_Group,HS_Religion,HS_Race,HS_Physical,HS_Gender,HS_Other,HS_Weak,HS_Moderate,HS_Strong
0,- disaat semua cowok berusaha melacak perhatia...,1,1,1,0,0,0,0,0,1,1,0,0
1,RT USER: USER siapa yang telat ngasih tau elu?...,0,1,0,0,0,0,0,0,0,0,0,0
2,"41. Kadang aku berfikir, kenapa aku tetap perc...",0,0,0,0,0,0,0,0,0,0,0,0
3,USER USER AKU ITU AKU\n\nKU TAU MATAMU SIPIT T...,0,0,0,0,0,0,0,0,0,0,0,0
4,USER USER Kaum cebong kapir udah keliatan dong...,1,1,0,1,1,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
13164,USER jangan asal ngomong ndasmu. congor lu yg ...,1,1,1,0,0,0,1,0,0,1,0,0
13165,USER Kasur mana enak kunyuk',0,1,0,0,0,0,0,0,0,0,0,0
13166,USER Hati hati bisu :( .g\n\nlagi bosan huft \...,0,0,0,0,0,0,0,0,0,0,0,0
13167,USER USER USER USER Bom yang real mudah terdet...,0,0,0,0,0,0,0,0,0,0,0,0


In [5]:
df_data['Tweet']

0        - disaat semua cowok berusaha melacak perhatia...
1        RT USER: USER siapa yang telat ngasih tau elu?...
2        41. Kadang aku berfikir, kenapa aku tetap perc...
3        USER USER AKU ITU AKU\n\nKU TAU MATAMU SIPIT T...
4        USER USER Kaum cebong kapir udah keliatan dong...
                               ...                        
13164    USER jangan asal ngomong ndasmu. congor lu yg ...
13165                         USER Kasur mana enak kunyuk'
13166    USER Hati hati bisu :( .g\n\nlagi bosan huft \...
13167    USER USER USER USER Bom yang real mudah terdet...
13168    USER Mana situ ngasih(": itu cuma foto ya kuti...
Name: Tweet, Length: 13169, dtype: object

In [6]:
def replace_kata_alay(text, df_kamusalay):
    words = text.split()
    for i in range(len(words)):
        if words[i] in df_kamusalay['kata_alay'].values:
            new_word = df_kamusalay.loc[df_kamusalay['kata_alay'] == words[i], 'kata_baku'].values[0]
            words[i] = new_word
    return ' '.join(words)

In [7]:
def insert_data(text, cleaned_text):
    try: 
        conn_f = sqlite3.connect('asset-challenges/input_kalimat.db')       
        cursor_f = conn_f.cursor()    
        cursor_f.execute("INSERT INTO data_kalimat (text, cleaned_text) VALUES (?, ?)", (text, cleaned_text))
        conn_f.commit()
    except Exception as e:
        print(f"Error insert: {str(e)}")
    finally:
        conn_f.close()

In [8]:
def insert_data_file(text, cleaned_text_new):
    try: 
        conn_f = sqlite3.connect('asset-challenges/input_file.db')       
        cursor_f = conn_f.cursor()    
        cursor_f.execute("INSERT INTO data_kalimat_file (text_file, cleaned_text_file) VALUES (?, ?)", (text, cleaned_text_new))
        conn_f.commit()
    except Exception as e:
        print(f"Error insert: {str(e)}")
    finally:
        conn_f.close()

In [39]:
app = Flask(__name__)

app.json_encoder = LazyJSONEncoder
swagger_template = dict(
info = {
    'title': LazyString(lambda: 'API Documentation for Data Processing and Modeling'),
    'version': LazyString(lambda: '1.0.0'),
    'description': LazyString(lambda: 'Dokumentasi API untuk Data Processing dan Modeling'),
    },
    host = LazyString(lambda: request.host)
)

swagger_config = {
    "headers": [],
    "specs": [
        {
            "endpoint": 'api',
            "route": '/api.json',
        }
    ],
    "static_url_path": "/flasgger_static",
    "swagger_ui": True,
    "specs_route": "/api/"
}


swagger = Swagger(app, template=swagger_template,             
                  config=swagger_config)


# input kalimat
@swag_from("C://Users/akung/api/text_processing.yml", methods=['POST'])
@app.route('/text-processing', methods=['POST'])
def text_processing():

    text = request.form.get('text')
    cleaned_text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)
    cleaned_text = re.sub(' +', ' ', cleaned_text).strip()
    cleaned_text = replace_kata_alay(cleaned_text, df_kamusalay)
    
#     conn.execute('''INSERT INTO data_kalimat (text, cleaned_text) VALUES (?, ?) ''', (text, cleaned_text))
#     conn.commit()
    insert_data(text, cleaned_text)

    
    json_response = {
        'status_code': 200,
        'description': "Teks yang sudah diproses",
        'data': cleaned_text,
    }

    response_data = jsonify(json_response)
    return response_data


#input file
@swag_from("C://Users/akung/api/text_processing_file.yml", methods=['POST'])
@app.route('/text-processing-file', methods=['POST'])
def text_processing_file():

    file = request.files.getlist('file')[0]

    df = pd.read_csv(file,encoding='latin1')

    texts = df['Tweet'].to_list()
    
    cleaned_text = []
    for text in texts:
        cleaned_text.append(re.sub(r'[^a-zA-Z0-9\s]', ' ', text))
    
    
#     cleaned_text_new = []
#     for text in cleaned_text:
#         cleaned_text_new.append(replace_kata_alay(text, df_kamusalay))
    
    cleaned_text_alay = [replace_kata_alay(text, df_kamusalay) for text in cleaned_text]
    tanpa_kata_user = [text.replace('USER', '') for text in cleaned_text_alay]
    cleaned_text_new = [text.replace('RT', '') for text in tanpa_kata_user]
    
    for i in range(len(texts)):
        insert_data_file(texts[i], cleaned_text_new[i])
#     print(cleaned_text_new)
    json_response = {
        'status_code': 200,
        'description': "Teks yang sudah diproses",
        'data': cleaned_text_new,
    }
    response_data = jsonify(json_response)
    return response_data

if __name__ == '__main__':
   app.run()

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:5000/ (Press CTRL+C to quit)
127.0.0.1 - - [30/Nov/2023 01:01:25] "GET /api/ HTTP/1.1" 200 -
127.0.0.1 - - [30/Nov/2023 01:01:25] "GET /api.json HTTP/1.1" 200 -
127.0.0.1 - - [30/Nov/2023 01:04:23] "POST /text-processing-file HTTP/1.1" 200 -


membuat table untuk input kalimat

In [26]:

cursor.execute("DROP TABLE data_kalimat")


<sqlite3.Cursor at 0x21cacc87f80>

In [27]:
cursor.execute('''CREATE TABLE data_kalimat(id INTEGER PRIMARY KEY AUTOINCREMENT, text TEXT, cleaned_text TEXT)''')
print('Sukses buat table')

Sukses


In [29]:
#test insert
cursor.execute("INSERT INTO data_kalimat (text, cleaned_text) VALUES (?, ?)", ('test kalimat', 'test kalimat bersih'))
conn.commit()
print('sukses')

sukses


In [9]:
conn = sqlite3.connect('asset-challenges/input_kalimat.db')
cursor = conn.cursor()
#cek isi data
cursor.execute("SELECT * FROM data_kalimat")
hasil = cursor.fetchall()

print(hasil)
cursor.close()
conn.close()

[(1, 'test kalimat', 'test kalimat bersih'), (2, 'coba', 'bersih'), (3, 'cccc', 'cccc'), (4, 'ini pakcikdahtua 😊😊😒❤️', 'ini pak cik sudah tua')]


Membuat table untuk input file

In [59]:
conn = sqlite3.connect('asset-challenges/input_file.db')
cursor = conn.cursor()
cursor.execute('''CREATE TABLE data_kalimat_file(id INTEGER PRIMARY KEY AUTOINCREMENT, text_file TEXT, cleaned_text_file TEXT)''')
print('Sukses buat table')
cursor.close()
conn.close()



OperationalError: table data_kalimat_file already exists

In [38]:
conn = sqlite3.connect('asset-challenges/input_file.db')
cursor = conn.cursor()
#cek isi data
cursor.execute("SELECT * FROM data_kalimat_file LIMIT 10")
hasil = cursor.fetchall()
print(hasil)
cursor.close()
conn.close()

[]


In [32]:
df_hasil = pd.DataFrame(hasil)
df_hasil

Unnamed: 0,0,1,2
0,1,coba,bersih
1,2,coba 2 😒😒😍😍 P)(&%$#@),bersih 2
2,3,- disaat semua cowok berusaha melacak perhatia...,di saat semua cowok berusaha melacak perhatian...
3,4,RT USER: USER siapa yang telat ngasih tau elu?...,siapa yang telat memberi tau kamu edan sara...
4,5,"41. Kadang aku berfikir, kenapa aku tetap perc...",41 Kadang aku berpikir kenapa aku tetap percay...
...,...,...,...
13166,13167,USER jangan asal ngomong ndasmu. congor lu yg ...,jangan asal berbicara ndasmu congor kamu yang...
13167,13168,USER Kasur mana enak kunyuk',Kasur mana enak kunyuk
13168,13169,USER Hati hati bisu :( .g\n\nlagi bosan huft \...,Hati hati bisu tidak dan lagi bosan duh xf0 x...
13169,13170,USER USER USER USER Bom yang real mudah terdet...,Bom yang real mudah terdeteksi bom yang te...


In [37]:
conn = sqlite3.connect('asset-challenges/input_file.db')
cursor = conn.cursor()
#cek isi data
cursor.execute("DELETE FROM data_kalimat_file")
conn.commit()
cursor.close()
conn.close()

In [13]:
text = "coba 2   😒😒😍😍 P)(&%$#@)"
bersih = "bersih 2"
insert_data_file(text, bersih)