- 2 Stage
- 데이터를 만들고 Zip형태로 생성
- Zip을 Lambda에서 다루는 형태로
    - 이렇게 하는 이유는 의존성 Library를 최대한 줄이기 위함

# First Stage

In [1]:
import os
import datetime
from pathlib import Path

import nltk
import numpy as np
import pandas as pd
import seaborn as sns
from wordcloud import WordCloud

import matplotlib as mpl
from konlpy.tag import Twitter
import matplotlib.pyplot as plt
import matplotlib.font_manager as fm

In [2]:
# 그래프를 노트북 안에 그리기 위해 설정
%matplotlib inline

# 그래프에서 마이너스 폰트 깨지는 문제에 대한 대처
mpl.rcParams['axes.unicode_minus'] = False

font_dirs = ['../static/NanumBarunGothic.ttf']
font_files = fm.findSystemFonts(fontpaths=font_dirs)

for font_file in font_files:
    fm.fontManager.addfont(font_file)

sns.set(font_scale=1.2)
# set font
plt.rcParams['font.family'] = 'NanumBarunGothic'

In [80]:
import io
import zipfile

FOLLOWERS = dict()
FOLLOWERS['instagram'] = 4703  # https://www.instagram.com/kimdongyeon_dy/
FOLLOWERS['youtube'] = 6590  # https://www.youtube.com/channel/UCDBTesvGqBsob4BsOc41hqA
FOLLOWERS['facebook'] = 13000  # https://www.facebook.com/DY.AfterYou/


def read_zipfile(zipfile_path: str):
    myzip = zipfile.ZipFile(zipfile_path)
    datas = dict()
    
    for name in myzip.namelist():
        if not name.startswith("__MACOSX") and name.endswith("csv"):
            key = name.split("/")[-1][:-4].lower().replace(" ", "_")
            buf = io.BytesIO(myzip.read(name))
            datas[key] = buf
    
    return datas

def _prep_datalab(datas: dict):
    datas['datalab'] = pd.read_csv(datas["datalab"], skiprows=7, names=['Date', "버즈량"])
    

def _prep_fb_insights(datas: dict):
    datas['facebook_follower'] = pd.read_csv(datas['fb_insights'],
                                        usecols=['Date', 'Counts'],
                                        parse_dates=['Date'])
    
    datas['facebook_follower'].drop(0, inplace=True)
    datas['facebook_follower'].columns = ['Date', 'facebook_Counts']

    # datas['facebook_follower']['sns_type'] = "Facebook"
    datas['facebook_follower'] = datas['facebook_follower'].fillna(0)
    datas['facebook_follower']['facebook_Counts'] = datas['facebook_follower']["facebook_Counts"].map(int)
    
    total_sum = datas['facebook_follower']['facebook_Counts'].sum()
    facebook_follower_start = FOLLOWERS['facebook'] - total_sum
    datas['facebook_follower']['facebook_Counts'] = datas['facebook_follower']['facebook_Counts'].cumsum() + facebook_follower_start
    datas['facebook_follower']['facebook_Texts'] = datas['facebook_follower']["facebook_Counts"].astype(str)

    # instagram followers
    datas['instagram_follower'] = datas['facebook_follower'].loc[:, 'Date':'Date'].copy()
    datas['instagram_follower']["instagram_Counts"] = np.linspace(1518, FOLLOWERS['instagram'], len(datas['instagram_follower'])).astype(int)
    datas['instagram_follower']['instagram_Texts'] = datas['instagram_follower']["instagram_Counts"].astype(str)
    
def _prep_table_data(datas: dict):
    datas['youtube_table'] = pd.read_csv(datas["table_data"]) # 언제 쓰임?

def _prep_totals(datas: dict):
    datas['youtube_follower'] = pd.read_csv(datas['totals'], parse_dates=['Date'])
#     datas['youtube_follower'] = datas['youtube_follower'][(datas['youtube_follower']['Date'] >= s_date) &
#                              (datas['youtube_follower']['Date'] <= e_date)]
    
    s_follower = FOLLOWERS['youtube'] - datas['youtube_follower']['Subscribers'].sum()
    datas['youtube_follower']['Subscribers'] = datas['youtube_follower']['Subscribers'].cumsum() + s_follower
    datas['youtube_follower'].columns = ['Date', 'youtube_Counts']
    datas['youtube_follower']['youtube_Texts'] = datas['youtube_follower']["youtube_Counts"].astype(str)

def preprocessed(datas: dict):
    _prep_datalab(datas)
    _prep_fb_insights(datas)
    _prep_table_data(datas)
    _prep_totals(datas)
    return datas

def make_bardatas(datas):
    left_df = pd.DataFrame([], columns=['Date'])

    for right_name in ['youtube_follower', 'facebook_follower', 'instagram_follower']:
        right_df = datas[right_name]
        left_df = pd.merge(left=left_df, right=right_df, on="Date", how='outer')
    
    left_df.dropna(inplace=True)
    return left_df

In [81]:
datas = read_zipfile("/Users/gugeonmo/Data/Archive.zip")

In [82]:
datas = preprocessed(datas)

In [83]:
bar_df = make_bardatas(datas)

**여기까지 하고 우선 마무리**

# Second Stage