In [None]:
import nest_asyncio
import uvicorn
from fastapi import FastAPI, UploadFile, File
from fastapi.responses import HTMLResponse, FileResponse
from fastapi.templating import Jinja2Templates
from fastapi.requests import Request
import pandas as pd
from collections import Counter
from sklearn.feature_extraction.text import TfidfVectorizer
from konlpy.tag import Okt
from io import StringIO
import os

# Jupyter Notebook에서 이벤트 루프를 여러 번 실행할 수 있도록 설정
nest_asyncio.apply()

# FastAPI 애플리케이션 초기화
app = FastAPI()

# Jinja2 템플릿 설정
templates = Jinja2Templates(directory="templates")

# 형태소 분석기 초기화
okt = Okt()

# 형태소 분석 및 명사 추출 함수
def extract_nouns(text):
    tokens = okt.pos(text)
    nouns = [word for word, pos in tokens if pos in ['Noun']]
    return ' '.join(nouns)

# 불용어 목록 생성 함수
def generate_stopwords(nouns):
    noun_counts = Counter(nouns.split())
    total_nouns = len(noun_counts)
    top_1_percent = int(total_nouns * 0.20)
    bottom_1_percent = int(total_nouns * 0.20)
    
    stopwords = [noun for noun, count in noun_counts.most_common(top_1_percent)]
    stopwords += [noun for noun, count in noun_counts.most_common()[:-bottom_1_percent-1:-1]]
    return stopwords

# 불용어 제거된 명사 추출 함수
def filter_nouns(nouns, stopwords):
    return ' '.join([noun for noun in nouns.split() if noun not in stopwords])

@app.get("/", response_class=HTMLResponse)
async def read_root(request: Request):
    return templates.TemplateResponse("index.html", {"request": request})

@app.post("/extract-nouns/", response_class=HTMLResponse)
async def extract_nouns_endpoint(request: Request, file: UploadFile = File(...)):
    content = await file.read()
    data = pd.read_csv(StringIO(content.decode('utf-8')))
    data['nouns'] = data['Content'].apply(extract_nouns)
    result_data = data[['Store_Name', 'nouns']]
    
    # 결과 파일 저장
    output_file_path = './csv/스타벅스명사추출결과테스트.csv'
    result_data.to_csv(output_file_path, index=False)
    
    # nouns 축소 출력 및 '더보기' 기능
    result_data['nouns_short'] = result_data['nouns'].apply(lambda x: x[:100] + '...' if len(x) > 100 else x)
    
    result_html = result_data[['Store_Name', 'nouns_short']].to_html(escape=False, index=False)
    code_html = '''
    <pre>
    {code}
    </pre>
    '''.format(code='''
import pandas as pd
from konlpy.tag import Okt

# CSV 파일 로드
file_path = './csv/스타벅스블로그본문.csv'
data = pd.read_csv(file_path)
data = data.head(2)

# 형태소 분석기 초기화
okt = Okt()

# 형태소 분석 및 명사 추출 함수
def extract_nouns(text):
    tokens = okt.pos(text)
    nouns = [word for word, pos in tokens if pos in ['Noun']]
    return ' '.join(nouns)

# `Content` 컬럼에서 명사 추출
data['nouns'] = data['Content'].apply(extract_nouns)

# 결과 저장
output_file_path = './csv/스타벅스명사추출결과테스트.csv'
data.to_csv(output_file_path, index=False)
    '''.strip())
    
    return templates.TemplateResponse("result.html", {"request": request, "result": result_html, "code": code_html, "file_path": output_file_path})

@app.post("/generate-stopwords/", response_class=HTMLResponse)
async def generate_stopwords_endpoint(request: Request, file: UploadFile = File(...)):
    content = await file.read()
    data = pd.read_csv(StringIO(content.decode('utf-8')))
    
    all_nouns = ' '.join(data['nouns'])
    stopwords = generate_stopwords(all_nouns)
    data['filtered_nouns'] = data['nouns'].apply(lambda x: filter_nouns(x, stopwords))
    result_data = data[['Store_Name', 'filtered_nouns']]
    
    # 결과 파일 저장
    output_file_path = './csv/스타벅스키워드추천_결과테스트.csv'
    result_data.to_csv(output_file_path, index=False)
    
    # filtered_nouns 축소 출력 및 '더보기' 기능
    result_data['filtered_nouns_short'] = result_data['filtered_nouns'].apply(lambda x: x[:100] + '...' if len(x) > 100 else x)
    
    result_html = result_data[['Store_Name', 'filtered_nouns_short']].to_html(escape=False, index=False)
    code_html = '''
    <pre>
    {code}
    </pre>
    '''.format(code='''
import pandas as pd
from collections import Counter
from konlpy.tag import Okt

# CSV 파일 로드
file_path = './csv/스타벅스명사추출결과테스트.csv'
data = pd.read_csv(file_path)

# 형태소 분석기 초기화
okt = Okt()

# 형태소 분석 및 명사 추출 함수
def extract_nouns(text):
    tokens = okt.pos(text)
    nouns = [word for word, pos in tokens if pos in ['Noun']]
    return nouns

# 불용어 목록 생성 함수
def generate_stopwords(nouns):
    noun_counts = Counter(nouns)
    total_nouns = len(noun_counts)
    top_1_percent = int(total_nouns * 0.01)
    bottom_1_percent = int(total_nouns * 0.01)
    
    stopwords = [noun for noun, count in noun_counts.most_common(top_1_percent)]
    stopwords += [noun for noun, count in noun_counts.most_common()[:-bottom_1_percent-1:-1]]
    return stopwords

# 불용어 제거된 명사 추출 함수
def filter_nouns(nouns, stopwords):
    return ' '.join([noun for noun in nouns if noun not in stopwords])

data['filtered_nouns'] = data['nouns'].apply(lambda x: filter_nouns(x, stopwords))

# 결과 저장
output_file_path = './csv/스타벅스키워드추천_결과테스트.csv'
data.to_csv(output_file_path, index=False)
    '''.strip())
    
    return templates.TemplateResponse("result.html", {"request": request, "result": result_html, "code": code_html, "file_path": output_file_path})

@app.get("/download/")
async def download_file(file_path: str):
    return FileResponse(file_path, media_type='application/octet-stream', filename=os.path.basename(file_path))

# FastAPI 서버 실행
if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)
