## 데이터 분석 및 전처리

In [None]:
import sqlite3
import json

import pandas as pd

In [None]:
conn = sqlite3.connect('data/dash.db')
cur = conn.cursor()

In [None]:
df_stats = pd.read_sql_query("SELECT * FROM tb_nfttown", conn)
df_discord = pd.read_sql_query("SELECT * FROM tb_discord_statistics", conn)
df_twitter = pd.read_sql_query("SELECT * FROM tb_twitter_statistics", conn)
df_youtube = pd.read_sql_query("SELECT * FROM tb_youtube_statistics", conn)

In [None]:
df_stats

In [None]:
df_stats.sort_values(by=["checkdate"], ascending=True, inplace=True)

In [None]:
del df_stats["twitter_followers_count"], df_stats["discord_member_count"], df_stats["brezel_count"], df_stats["totalbrezel_count"], df_stats["insta_member_count"]

In [None]:
df_stats.rename(
    columns={
        "checkdate": "check_date",
        "newUsers": "new_users",
        "active1DayUsers": "active_1day_users",
        "active7DayUsers": "active_7day_users",
        "active28DayUsers": "active_28day_users",
        "totalUsers": "total_users",
        "totalTownUsers": "total_town_users",
        "newTownUsers": "new_town_users",
    },
    inplace=True,
)

In [None]:
df_stats["check_date"] = pd.to_datetime(df_stats["check_date"], format="%Y%m%d").dt.strftime("%Y-%m-%d")


In [None]:
df_stats

In [None]:
df_discord.sort_values(by=["checkdate"], ascending=True, inplace=True)
df_discord["checkdate"] = pd.to_datetime(df_discord["checkdate"], format="%Y%m%d").dt.strftime("%Y-%m-%d")
df_discord.rename(
    columns={
        "checkdate": "check_date",
    },
    inplace=True,

)
df_discord["members"] = df_discord["members"].astype(int)
df_discord["online_members"] = df_discord["online_members"].astype(int)

df_discord

In [None]:
df_twitter.sort_values(by=["checkdate"], ascending=True, inplace=True)
df_twitter["checkdate"] = pd.to_datetime(df_twitter["checkdate"], format="%Y%m%d")
df_twitter

In [None]:
df_youtube.sort_values(by=["checkdate"], ascending=True, inplace=True)
df_youtube["checkdate"] = pd.to_datetime(df_youtube["checkdate"], format="%Y%m%d").dt.strftime("%Y-%m-%d")
del df_youtube["outdegree"]

df_youtube.rename(
    columns={
        "checkdate": "check_date",
        "indegree": "subscriber",
    },
    inplace=True,
)

df_youtube

In [None]:
df_stats.to_csv("output/csv/nfttown_v1.csv", index=False)
df_discord.to_csv("output/csv/discord_v1.csv", index=False)
df_twitter.to_csv("output/csv/twitter_v1.csv", index=False)
df_youtube.to_csv("output/csv/youtube_v1.csv", index=False)

## 적재 데이터 변환

In [None]:
df_ga = df_stats[['check_date', 'new_users', 'active_1day_users', 'active_7day_users', 'active_28day_users', 'total_users']]
df_ga

In [None]:
df_ga["active_1day_users"] = df_ga["active_1day_users"].astype(int)
df_ga["active_7day_users"] = df_ga["active_7day_users"].astype(int)
df_ga["active_28day_users"] = df_ga["active_28day_users"].astype(int)
df_ga["total_users"] = df_ga["total_users"].astype(int)
df_ga["new_users"] = df_ga["new_users"].astype(int)

In [None]:
for item in df_ga.to_dict(orient='records'):    
    with open(f"output/google_analytics/{item["check_date"]}.json", "w") as f:
        json.dump(item, f, indent=4)

In [None]:
df_town = df_stats[['check_date', 'total_town_users', 'new_town_users']]
df_town["total_town_users"] = df_town["total_town_users"].astype(int)
df_town["new_town_users"] = df_town["new_town_users"].astype(int)

In [None]:
df_town

In [None]:
import seaborn as sns

sns.set_theme(style="whitegrid")
sns.lineplot(data=df_town, x="check_date", y="total_town_users")

In [None]:
for item in df_town.to_dict(orient='records'):    
    with open(f"output/town_users/{item["check_date"]}.json", "w") as f:
        json.dump(item, f, indent=4)

In [None]:
for item in df_youtube.to_dict(orient='records'):    
    with open(f"output/youtube/{item["check_date"]}.json", "w") as f:
        json.dump(item, f, indent=4)

In [None]:
for item in df_discord.to_dict(orient='records'):    
    with open(f"output/discord/{item["check_date"]}.json", "w") as f:
        json.dump(item, f, indent=4)