In [6]:
import os
from functools import reduce
from collections import OrderedDict, defaultdict

import requests
from requests.exceptions import ConnectionError, ReadTimeout
from requests.exceptions import ContentDecodingError, TooManyRedirects
import pandas as pd

from db.mongo import MyMongo

pd.set_option('display.max_colwidth', -1)

In [7]:
with MyMongo() as db:
    df_top5 = db.get_df_from_table('audit_quote', 'top_5_quote_per_day')

<--Mongo Connected.
Mongo Connection Closed.-->


In [10]:
def div_center(s):
    return f'<div style="text-align:center">{s}</div>'

def br_n(n):
    return '<br>'*n

def a_link(s, href):
    return f'<a href={href}>{s}</a>'

def span_underline(s):
    return f'<u>{s}</u>'

def parse_date(date):
    return date[:4] + '\t' + date[4:6] + '\t' + date[6:8].lstrip('0') + '\t\t'

def display_date(date):
    date = date[:4] + '년 ' + date[4:6] + '월 ' + date[6:8].lstrip('0') + '일'
    return div_center(date) + '\t\t'

def quote_count(n):
    return f' ({n}회)'

def quote_source(c):
    return f' ({c})'

def add_period(s):
    return s + '.' if s[-1] != '.' else s       

In [12]:
d = defaultdict(list)
for i, row in df_top5.iterrows():
    date = row['날짜']
    d[date].append(row)
result = ''

for date, row_list in d.items():
    result += parse_date(date)
    result += parse_date(date)
    result += br_n(1) + display_date(date)
    body = ''
    for row in row_list:
        name = row['국회의원']
        said = add_period(row['인용문'])
        company = row['언론사']
        href = row['url_hankyung']
        cnt = quote_count(row['인용 횟수'])
        source = quote_source(company)
        if company == '한국경제':
            said = span_underline(a_link(said, href))
        body += name + cnt + br_n(1) + said + source + br_n(2)
    result += body
    result += '\n'

result

data_dir = '/Users/jake/OneDrive - leverage innovative users/Documents/News_Item/Audit_quote/'
file = 'result.tsv'

with open(os.path.join(data_dir, file), 'w') as f:
    f.write(result)

# result.to_csv(os.path.join(data_dir, file), sep='\t', index=False)