# [정답 가이드] 데이터 분석 복습 챌린지

Day 1부터 Final Challenge까지의 예시 답안입니다.

--- 
## Day 1. Pandas 기초

In [None]:
import pandas as pd

# Q2
data = {
    '상품명': ['사과', '배', '우유', '감자칩'],
    '가격': [1500, 2000, 2500, 1200],
    '재고': [50, 20, 100, 80],
    '카테고리': ['과일', '과일', '유제품', '과자']
}
df_store = pd.DataFrame(data)

# Q3
print(df_store.head(2))

# Q4
df_store.info()

# Q5
print(df_store['가격'].mean())

--- 
## Day 2. 데이터 선택 및 필터링

In [None]:
# Q1
print(df[['Name', 'Score']])

# Q2
print(df.loc[2, 'City'])

# Q3
print(df.iloc[-1])

# Q4
print(df[df['Age'] >= 30])

# Q5
print(df[(df['Score'] >= 80) & (df['City'] == 'Tokyo')])

--- 
## Day 3. 결측치 및 시각화

In [None]:
# Q1
print(df.isnull().sum())

# Q2
df_clean = df.dropna()

# Q3
df['A'] = df['A'].fillna(df['A'].mean())

# Q4
df['D'].plot()

--- 
## Day 4. 이상치 및 Groupby

In [None]:
# Q1
df['Assists'].plot(kind='box')

# Q2
print(df.groupby('Team')['Points'].sum())

# Q3
print(df.groupby('Team')[['Points', 'Assists']].mean())

# Q4
df_filtered = df[df['Assists'] <= 50]

--- 
## Day 5. 피벗 테이블 및 시각화 기초

In [None]:
# Q1
print(df.pivot_table(index='Date', columns='Region', values='Sales', aggfunc='sum'))

# Q2
print(df.pivot_table(index='Region', values='Sales', aggfunc='mean'))

# Q3
import matplotlib.pyplot as plt
plt.plot([1, 2, 3, 4], [10, 20, 25, 30])
plt.title('Sample Chart')
plt.show()

--- 
## Day 6. Seaborn 실전 시각화

In [None]:
import seaborn as sns
# Q1
sns.scatterplot(data=tips, x='total_bill', y='tip')
plt.show()

# Q2
sns.barplot(data=tips, x='day', y='total_bill')
plt.show()

# Q3
sns.boxplot(data=tips, x='time', y='total_bill')
plt.show()

# Q4
sns.scatterplot(data=tips, x='total_bill', y='tip', hue='sex')
plt.show()

--- 
## Day 7. 상관분석 및 시계열 데이터

In [None]:
import numpy as np
# Q1
print(df.corr(numeric_only=True))

# Q2
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm')
plt.show()

# Q3
df.set_index('date', inplace=True)
weekly_sales = df['sales'].resample('W').sum()

# Q4
df['sales_7d_avg'] = df['sales'].rolling(window=7).mean()

# Q5
df[['sales', 'sales_7d_avg']].plot()
plt.show()

--- 
## [Final Challenge] TMDB 영화 데이터 분석 및 전략 보고서 정답

In [None]:
# Step 0. 로드 (상대 경로 사용)
df = pd.read_json('tmdb_movies.json')

# Step 1. 정제
df['main_genre'] = df['genres'].apply(lambda x: x[0]['name'] if x else 'Unknown')
df['country_code'] = df['production_countries'].apply(lambda x: x[0]['iso_3166_1'] if x else 'Unknown')
df = df[(df['budget'] > 0) & (df['revenue'] > 0)]
df['vote_average'] = df['vote_average'].replace(0, df['vote_average'].mean())
df = df[df['runtime'] <= 300]

# Step 2. 상관분석
numeric_df = df.select_dtypes(include=[np.number])
print(numeric_df.corr())
sns.heatmap(numeric_df.corr(), annot=True)
plt.show()

# Step 3. 시계열 분석
df['release_date'] = pd.to_datetime(df['release_date'])
df.set_index('release_date', inplace=True)
df.resample('Y')['revenue'].mean().plot(title='Yearly Avg Revenue')
plt.show()
df['popularity'].rolling(window=3).mean().plot(title='3-Month Moving Avg of Popularity')
plt.show()

# Step 4. 심화 시각화
df['ROI'] = df['revenue'] / df['budget']
df.groupby('main_genre')['ROI'].mean().sort_values(ascending=False).head(3).plot(kind='bar')
plt.show()

plt.hist(df['vote_average'], bins=20)
plt.axvline(df['vote_average'].mean(), color='red')
plt.show()