In [None]:
import sqlite3
import pandas as pd
import koreanize_matplotlib

conn = sqlite3.connect('yes24_books_it.db')
df = pd.read_sql_query("SELECT * FROM books", conn)
conn.close()

In [None]:
df

### 전처리 및 파생변수 만들기

In [None]:
df["rating"] = pd.to_numeric(df["rating"], errors="coerce")

In [None]:
df["pub_year"] = df["pub_date"].map(lambda x: x.split("년")[0]).astype(float)
df["pub_month"] = df["pub_date"].map(lambda x: x.split("년")[1].replace("월", "")).astype(float)

In [None]:
df["pub_year_month"] = df["pub_date"].str.replace("(년|월)", "", regex=True).str.replace(" ", "-")

### 최근 도서의 출판연도별 책

In [None]:
df_recent = df[df["pub_year"] > 2010]
df_recent["pub_year_month"].value_counts().sort_index().plot(figsize=(12, 4))

In [None]:
pub_ym = pd.crosstab(df["pub_year"], df["pub_month"]).sort_index(ascending=False)
pub_ym.index = pub_ym.index.astype(str)
pub_ym.columns = pub_ym.columns.astype(str)
pub_ym.style.background_gradient(axis=None)

In [None]:
df["review_count"] = pd.to_numeric(df["review_count"].str.replace(",", ""), errors="coerce")

In [None]:
df["GPT_LLM"] = df["description"].str.contains("GPT|LLM")

In [None]:
df[df["description"].str.contains("GPT")].nlargest(10, "review_count")

### GPT 와 LLM 도서에 대한 빈도수

In [None]:
df.loc[df["GPT_LLM"], "publisher"].value_counts()

In [None]:
df[df["GPT_LLM"]].groupby("publisher").agg({
    "review_count":"sum", 
    "title":"count"}).nlargest(10, "review_count")

In [None]:
df[(df["publisher"] == "리코멘드") & df["GPT_LLM"]]

### 기술통계

In [None]:
df.hist(bins=50);

In [None]:
df.describe()

In [None]:
df.describe(include="object")

In [None]:
df["publisher"].value_counts().head(20)

In [None]:
df["pub_year"].value_counts().sort_index().iloc[1:].plot(title="판매 도서 중 연도별 컴퓨터 공학 도서 수", figsize=(10, 4))

In [None]:
pd.crosstab(df["publisher"], df["pub_year"])

In [None]:
df_recent = df[df["pub_year"] > 2015]
top_pub = df_recent["publisher"].value_counts().head(30)
top_pub

In [None]:
pd.crosstab(df_recent["publisher"], df_recent["pub_year"]).loc[top_pub.index]