# Install requirements

In [1]:
# Install requirements
!pip install fastapi==0.68.1
!pip install timm
!pip install python-multipart==0.0.5
!pip install uvicorn==0.15.0
!pip install aiofiles
!pip install nest-asyncio
!pip install pyngrok

Collecting fastapi==0.68.1
  Downloading fastapi-0.68.1-py3-none-any.whl (52 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.3/52.3 kB[0m [31m659.1 kB/s[0m eta [36m0:00:00[0m
[?25hCollecting starlette==0.14.2 (from fastapi==0.68.1)
  Downloading starlette-0.14.2-py3-none-any.whl (60 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.6/60.6 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: starlette, fastapi
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
lida 0.0.10 requires kaleido, which is not installed.
lida 0.0.10 requires python-multipart, which is not installed.
lida 0.0.10 requires uvicorn, which is not installed.[0m[31m
[0mSuccessfully installed fastapi-0.68.1 starlette-0.14.2
Collecting timm
  Downloading timm-0.9.12-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━

In [2]:
from typing import List

from fastapi import FastAPI, File, UploadFile, Form
from fastapi.responses import HTMLResponse, StreamingResponse

import io
import numpy as np
import pandas as pd

import torch
import cv2

import matplotlib.pyplot as plt


# Def requirements

## Tokenizer

In [3]:
from transformers import TFAutoModel, AutoTokenizer

In [4]:
model_name = 'vinai/phobert-base'

# Tokenizing
tokenizer = AutoTokenizer.from_pretrained(model_name)

config.json:   0%|          | 0.00/557 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/895k [00:00<?, ?B/s]

bpe.codes:   0%|          | 0.00/1.14M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.13M [00:00<?, ?B/s]

In [5]:
def to_input(X_pred):
  MAX_LEN = 200
  inputs_pred = [tokenizer(item,
                      max_length = MAX_LEN,
                      padding = 'max_length',
                      truncation = True,
                      return_tensors = 'np')['input_ids'].reshape(MAX_LEN)
            for _, item in X_pred.items()]

  X_pred = np.asarray(inputs_pred)
  return X_pred

In [6]:
def pred_to_lableText(predictions):

  labels = np.argmax(predictions, axis=-1)
  # Tạo từ điển ánh xạ
  label_dict = {0: 'negative', 1: 'neutral', 2: 'positive'}

  # Chuyển đổi nhãn
  labels_text = [label_dict[label] for label in labels]

  return labels_text

## Load model

In [7]:
import gdown
import os
import shutil

file_ids = ['1-6tCaOfVAWIJ06PJ5tQzR5I42rqHDBSC', '1-3cssVqIvPW5zl-JgQj3hR_Gi_9Y4FK7']
output_dir = 'Model'

# Tạo thư mục 'Model' nếu nó chưa tồn tại
if not os.path.exists('Model'):
    os.makedirs('Model')

for file_id in file_ids:
    url = f'https://drive.google.com/uc?id={file_id}'
    output = gdown.download(url, quiet=False)
    # Kiểm tra xem tệp đã tồn tại trong thư mục đích hay chưa
    if not os.path.exists(os.path.join(output_dir, os.path.basename(output))):
        shutil.move(output, output_dir)
    else:
        print(f"File {os.path.basename(output)} already exists in the destination directory.")


Downloading...
From: https://drive.google.com/uc?id=1-6tCaOfVAWIJ06PJ5tQzR5I42rqHDBSC
To: /content/model_weights.h5
100%|██████████| 541M/541M [00:07<00:00, 68.0MB/s]
Downloading...
From: https://drive.google.com/uc?id=1-3cssVqIvPW5zl-JgQj3hR_Gi_9Y4FK7
To: /content/model.json
100%|██████████| 3.70k/3.70k [00:00<00:00, 14.8MB/s]


In [8]:
from transformers import TFRobertaModel
from tensorflow.keras.models import model_from_json

dir_model = '/content/Model'
# Mở tệp JSON và đọc kiến trúc
with open(dir_model+'/model.json', 'r') as json_file:
    model_json = json_file.read()

# Tải mô hình từ JSON
model = model_from_json(model_json, custom_objects={'TFRobertaModel': TFRobertaModel})

# Tải trọng số vào mô hình
model.load_weights(dir_model+"/model_weights.h5")


## predict

In [9]:
import base64
from fastapi.responses import FileResponse


app = FastAPI()

@app.post("/uploadcsv/")
async def create_upload_files(files: List[UploadFile] = File(...)):
    """ Create API endpoint to send CSV file to and specify
     what type of file it'll take

    :param files: Get CSV files, defaults to File(...)
    :type files: List[UploadFile], optional
    :return: A DataFrame from the CSV file
    :rtype: pandas.DataFrame
    """


    for file in files:
        contents = await file.read()  # Đọc nội dung của tệp CSV
        df = pd.read_csv(io.StringIO(contents.decode('utf-8')))  # Tạo DataFrame từ nội dung

        X_pred = df.sentence.copy()
        X_pred = to_input(X_pred)

        with torch.no_grad():
          predictions = model.predict(X_pred)

        label_text = pred_to_lableText(predictions)

        # Tạo một bản sao của df
        df_new = df.copy()

        # Thêm cột label_text vào df_new
        df_new['label_text'] = label_text

        # Lưu df_new thành tệp CSV
        df_new.to_csv('new_predictions.csv', index=False)

    # Trả về tệp CSV dưới dạng tải xuống
    return FileResponse('new_predictions.csv', media_type='application/octet-stream', filename='new_predictions.csv')


In [10]:
@app.get("/")
async def main():
    """Create a basic home page to upload a file

    :return: HTML for homepage
    :rtype: HTMLResponse
    """

    content = """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <meta name="viewport" content="width=device-width, initial-scale=1.0">
        <title>SENTIMENT ANALYSIS</title>
        <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.3/css/all.min.css">
        <style>
            * {
                margin: 0;
                padding: 0;
                font-family: sans-serif;
            }

            .banner {
                animation: changeBackground 16s infinite;
            }
            .banner {
                width: 100%;
                height: 100vh;
                background-image: url(https://i.pinimg.com/564x/88/1f/0d/881f0db828e42e7e622072cd1a40eef1.jpg);
                background-size: cover;
                background-position: center;
                display: flex;
                align-items: center;
                justify-content: center;
                text-align: center;
                color: white;
                flex-direction: column;
            }
            nav {
                position: fixed;
                top: 0;
                left: 0;
                width: 100%;
                background-color: #333;
                padding: 5px;
            }

            .logo {
                float: left;
                margin-right: 10px;
            }

            nav ul {
                list-style-type: none;
                margin: 0;
                padding: 0;
                float: right;
                margin-top: 3px;
                margin-right: 10px;
            }

            nav ul li {
                display: inline;
                margin-left: 10px;
            }

            nav ul li a {
                color: #fff;
                text-decoration: none;
                padding: 5px;
            }

            .banner {
                margin-top: 40px;
            }
            .title {
                border: 5px solid #ff4655;
            }
            .banner h3 {
                color: white;
                background-color: #ff4655;
                padding: 10px 15px;
                font-size: 42px;
                background-clip: padding-box;
                border: 5px solid transparent;
            }


            .banner form {
                display: flex;
                flex-direction: column;
                align-items: center;
            }
            input[type="file"], input[type="number"] {
                font-family: "Arial", sans-serif;
                padding: 5px;
                font-size: 16px;
                border: 2px solid #ccc;
                background-color: #0c0b0b;
                margin-bottom: 5px;
                color: white;
            }
            input[type="number"] {
                border-radius: 12px;
            }
            input[type="submit"] {
                font-family: "Verdana", sans-serif;
                padding: 10px 20px;
                border: none;
                border-radius: 25px;
                background-color: #0f120f;
                border: 2px solid #e0e9e8;
                color: white;
                cursor: pointer;
                font-weight: bold;
                font-family: "Lato", sans-serif;
            }
            audio {
                display: none;
            }

            /* Style the custom audio player */
            .audio-player {
                background-color: #000;
                padding: 10px;
                color: #fff;
                display: none;
                border-radius: 5px;
                cursor: pointer;
            }
        </style>
    </head>
    <body>
        <div class="banner">
            <nav>
                <ul>
                    <li><i class="fab fa-facebook-f fa-2x" style="color: #3b5998;"></i></li>
                    <li><i class="fab fa-twitter fa-2x" style="color: #55acee;"></i></li>
                    <li><i class="fab fa-google fa-2x" style="color: #dd4b39;"></i></li>
                    <li><i class="fab fa-instagram fa-2x" style="color: #ac2bac;"></i></li>
                </ul>
            </nav>
            <div class="title">
                <h3>SENTIMENT ANALYSIS</h3>
            </div>

            <form action="/uploadcsv/" enctype="multipart/form-data" method="post">
                <input name="files" type="file" accept=".csv">
                <input type="submit" value="Upload CSV">
            </form>

        </div>
    </body>


    </html>

      """
    return HTMLResponse(content=content)

# Setup the server

In [11]:
auth_token = "2SNVvi3DMeCAGfkN1l6lRbqhCg5_5GC4C21zFzmGGvGTDhv2w" #@param {type:"string"}
# Since we can't access Colab notebooks IP directly we'll use
# ngrok to create a public URL for the server via a tunnel

# Authenticate ngrok
# https://dashboard.ngrok.com/signup
# Then go to the "Your Authtoken" tab in the sidebar and copy the API key
import os
os.system(f"ngrok authtoken {auth_token}")

0

In [12]:

from pyngrok import ngrok

# Create tunnel
public_url = ngrok.connect(8000,bind_tls=True)

In [13]:
# Check if it exists
!ps aux | grep ngrok

root        1277  0.0  0.0   7372  3548 ?        S    07:47   0:00 /bin/bash -c ps aux | grep ngrok
root        1279  0.0  0.0   6480  2268 ?        S    07:47   0:00 grep ngrok


# Make web public

In [None]:
import nest_asyncio

# Allow for asyncio to work within the Jupyter notebook cell
nest_asyncio.apply()

import uvicorn

# Run the FastAPI app using uvicorn
print(public_url)
uvicorn.run(app)

INFO:     Started server process [691]
INFO:uvicorn.error:Started server process [691]
INFO:     Waiting for application startup.
INFO:uvicorn.error:Waiting for application startup.
INFO:     Application startup complete.
INFO:uvicorn.error:Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
INFO:uvicorn.error:Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


NgrokTunnel: "https://9690-35-188-32-195.ngrok-free.app" -> "http://localhost:8000"


In [None]:
# Kill tunnel
ngrok.disconnect(public_url=public_url)