### 코로나로 인한 사람들의 흥미 변화 및 유망 품목을 파악하기 위해 앱 데이터를 크롤링해서 분석해보았습니다.

* 앱 순위를 크롤링한 사이트는 [모바일 인덱스](https://www.mobileindex.com/) 입니다.

먼저, 기간별로 어플 순위를 가져왔습니다.


이때, **구글 플레이 스토어(안드로이드)**와 **앱 스토어(IOS)**를 구분지어 데이터를 크롤링했습니다.  

**기간**은 **2019년 1월 ~ 12월**과 **2020년 1월 ~ 6월**의 데이터를 가져왔습니다.

### 기간별 앱 순위와 장르 가져오기

* 설정 초기화 및 필요한 모듈 임포트

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

!apt install chromium-chromedriver
!pip install selenium

from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive
Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following package was automatically installed and is no longer required:
  libnvidia-common-440
Use 'apt autoremove' to remove it.
The following additional packages will be installed:
  chromium-browser chromium-browser-l10n chromium-codecs-ffmpeg-extra
Suggested packages:
  webaccounts-chromium-extension unity-chromium-extension adobe-flashplugin
The following

In [2]:
from bs4 import BeautifulSoup
import pandas as pd
import plotly.express as px
import numpy as np

먼저, 각 월말에 해당하는 데이터를 가져오기 위해 해당 사이트의 쿼리스트링을 활용하였습니다. 쿼리스트링을 변경하며 해당 날짜에 알맞은 데이터를 가져온 뒤, 해당 페이지의 소스를 받아옵니다.

이후 받아온 페이지 소스를 활용해 순위에 알맞은 앱 명과 장르를 가져오도록 함수로 구현하였습니다.

In [None]:
genre_dic = {}

def makeUrl(dateList,i, kinds):
  if kinds == 'playstore': 
    url = 'https://www.mobileindex.com/app/get_rank_all?rt=r&mk=2&c=kr&t=app&rs=100&d=' + dateList[i]
  else:
    url = 'https://www.mobileindex.com/app/get_rank_all?rt=r&mk=1&c=kr&t=app&rs=100&d=' + dateList[i]
  return url

def getGenreUrl(url):
  driver = webdriver.Chrome('chromedriver',options=options)
  driver.get(url)

  html = driver.page_source
  soup = BeautifulSoup(html, 'html.parser')
  genreURL = soup.select('div.item-info > a')
  return genreURL

def get_genre(url,appname):
  if appname in genre_dic:
    return genre_dic[appname]
  dr = webdriver.Chrome('chromedriver',options=options)
  dr.get(url)
  ht = dr.page_source
  so = BeautifulSoup(ht, 'html.parser')
  genre = so.select('tr.text-center')[0].text
  index = genre.find('2')
  if genre[1] == 'G':
    genre_dic[appname] = genre[13:(index-1)]
    return genre[13:(index-1)]
  elif genre[1] == 'A':
    genre_dic[appname] = genre[11:(index-1)]
    return genre[11:(index-1)]

def separateAppRank(rank):
  free_rank = []
  pay_rank = []
  sales_rank = [] 
  for j in range(len(rank)):
    if len(rank[j]) == 100:
      free_rank.append(rank[j][0::2])
      pay_rank.append(rank[j][1::2])
    elif len(rank[j]) == 150 or len(rank[j]) == 300:
      free_rank.append(rank[j][0::3])
      pay_rank.append(rank[j][1::3])
      sales_rank.append(rank[j][2::3])
  return free_rank, pay_rank, sales_rank

def getAppRank(url, year):
  driver = webdriver.Chrome('chromedriver',options=options)
  driver.get(url)

  html = driver.page_source
  soup = BeautifulSoup(html, 'html.parser')
  notices = soup.select('span.appname')
  
  tmp = []
  for n in notices:
    tmp.append(n.text.strip())
  return tmp

위의 함수들을 활용하여, 연도와 기간, 그리고 스토어 종류별 알맞은 데이터 값들을 크롤링하였고, 이를 각각 list에 저장하였습니다.


In [None]:
#2019
appRank_p = []
appRank_a = []
date_2019 = ['2019-01-31','2019-02-28','2019-03-31','2019-04-30','2019-05-31','2019-06-30','2019-07-31','2019-08-31','2019-09-30','2019-10-31','2019-11-30','2019-12-31']
date_size = len(date_2019)
kinds = ['playstore','appstore']

for kind in kinds:
  appRank = []
  appGenre = []
  for i in range(date_size):
    url = makeUrl(date_2019,i,kind)
    tmp = getAppRank(url,2019)
    appRank.append(tmp)
    genreURL = getGenreUrl(url)
    genreList = list()
    for j in range(len(genreURL)):
      genreList.append(get_genre('https://www.mobileindex.com/'+genreURL[j]['href'],tmp[j]))
    appGenre.append(genreList)
    print(kind,' data of ',date_2019[i],' is done')
  if kind == 'playstore': 
    appRank_p = appRank[:]
  else:
    appRank_a = appRank[:] 
  
  if kind == 'playstore': 
    p_appRank_free_2019,p_appRank_pay_2019,p_appRank_sales_2019 = separateAppRank(appRank)
    print('sep is done')
    p_appGenre_free_2019,p_appGenre_pay_2019,p_appGenre_sales_2019  = separateAppRank(appGenre)
    print('sep2 is done') 
  else:
    a_appRank_free_2019,a_appRank_pay_2019,a_appRank_sales_2019 = separateAppRank(appRank)
    print('sep is done')
    a_appGenre_free_2019,a_appGenre_pay_2019,a_appGenre_sales_2019  = separateAppRank(appGenre)
    print('sep2 is done') 

In [None]:
#2020
appRank = []
date_2020 = ['2020-01-31','2020-02-29','2020-03-31','2020-04-30','2020-05-31','2020-06-30']
date_size = len(date_2020)
kinds = ['playstore','appstore']

for kind in kinds:
  appRank = []
  appGenre = []
  for i in range(date_size):
    url = makeUrl(date_2020,i,kind)
    tmp = getAppRank(url,2020)
    appRank.append(tmp)
    genreURL = getGenreUrl(url)
    genreList = list()
    for j in range(len(genreURL)):
      genreList.append(get_genre('https://www.mobileindex.com/'+genreURL[j]['href'],tmp[j]))
    appGenre.append(genreList)
    print(kind,' date of ',date_2020[i],' is done')
  
  if kind == 'playstore': 
    p_appRank_free_2020,p_appRank_pay_2020,p_appRank_sales_2020 = separateAppRank(appRank)
    print('sep is done')
    p_appGenre_free_2020,p_appGenre_pay_2020,p_appGenre_sales_2020  = separateAppRank(appGenre)
    print('sep2 is done') 
  else:
    a_appRank_free_2020,a_appRank_pay_2020,a_appRank_sales_2020 = separateAppRank(appRank)
    print('sep is done')
    a_appGenre_free_2020,a_appGenre_pay_2020,a_appGenre_sales_2020  = separateAppRank(appGenre)
    print('sep2 is done') 

앞선 크롤링을 통해 받아온 리스트들을 활용하여 무료 앱은 무료 앱끼리, 유료는 유료끼리, 그리고 매출 순위까지 각각 합쳐 이후 데이터 분석을 위해 DataFrame으로 변환하는 과정을 거쳤습니다.

이 때 열 이름은 각 연도와 월 명을 언더바를 이용하여 19_1 과 같이 나타내었고, 장르는 19_1_genre 와 같이 나타내었습니다.

In [None]:
p_appRank_free = list()
for i in range(len(p_appRank_free_2019)):
  p_appRank_free.append(p_appRank_free_2019[i])
  p_appRank_free.append(p_appGenre_free_2019[i])
for i in range(len(p_appRank_free_2020)):
  p_appRank_free.append(p_appRank_free_2020[i])
  p_appRank_free.append(p_appGenre_free_2020[i])

a_appRank_free = list()
for i in range(len(a_appRank_free_2019)):
  a_appRank_free.append(a_appRank_free_2019[i])
  a_appRank_free.append(a_appGenre_free_2019[i])
for i in range(len(a_appRank_free_2020)):
  a_appRank_free.append(a_appRank_free_2020[i])
  a_appRank_free.append(a_appGenre_free_2020[i])

col = []
for i in range(12):
  col.append('19_'+str(i+1))
  col.append('19_'+str(i+1)+'_gen')
for i in range(6):
  col.append('20_'+str(i+1))
  col.append('20_'+str(i+1)+'_gen')

appRank_free_playstore_df = pd.DataFrame(p_appRank_free)
appRank_free_playstore_df = appRank_free_playstore_df.T
appRank_free_playstore_df.columns = col
appRank_free_playstore_df.to_csv('/content/gdrive/My Drive/dacon/appRank_free_playstore_2019.csv', encoding='utf-8-sig')

appRank_free_appstore_df = pd.DataFrame(a_appRank_free)
appRank_free_appstore_df = appRank_free_appstore_df.T
appRank_free_appstore_df.columns = col
appRank_free_appstore_df.to_csv('/content/gdrive/My Drive/dacon/appRank_free_appstore_2020.csv', encoding='utf-8-sig')


In [None]:
p_appRank_pay = list()
for i in range(len(p_appRank_pay_2019)):
  p_appRank_pay.append(p_appRank_pay_2019[i])
  p_appRank_pay.append(p_appGenre_pay_2019[i])
for i in range(len(p_appRank_pay_2020)):
  p_appRank_pay.append(p_appRank_pay_2020[i])
  p_appRank_pay.append(p_appGenre_pay_2020[i])

a_appRank_pay = list()
for i in range(len(a_appRank_pay_2019)):
  a_appRank_pay.append(a_appRank_pay_2019[i])
  a_appRank_pay.append(a_appGenre_pay_2019[i])
for i in range(len(a_appRank_pay_2020)):
  a_appRank_pay.append(a_appRank_pay_2020[i])
  a_appRank_pay.append(a_appGenre_pay_2020[i])

appRank_pay_playstore_df = pd.DataFrame(p_appRank_pay)
appRank_pay_playstore_df = appRank_pay_playstore_df.T
appRank_pay_playstore_df.columns = col
appRank_pay_playstore_df.to_csv('/content/gdrive/My Drive/dacon/appRank_pay_playstore_2019.csv', encoding='utf-8-sig')

appRank_pay_appstore_df = pd.DataFrame(a_appRank_pay)
appRank_pay_appstore_df = appRank_pay_appstore_df.T
appRank_pay_appstore_df.columns = col
appRank_pay_appstore_df.to_csv('/content/gdrive/My Drive/dacon/appRank_pay_appstore_2020.csv', encoding='utf-8-sig')

In [None]:
p_appRank_sales = list()
for i in range(len(p_appRank_sales_2019)):
  p_appRank_sales.append(p_appRank_sales_2019[i])
  p_appRank_sales.append(p_appGenre_sales_2019[i])
for i in range(len(p_appRank_sales_2020)):
  p_appRank_sales.append(p_appRank_sales_2020[i])
  p_appRank_sales.append(p_appGenre_sales_2020[i])

a_appRank_sales = list()
for i in range(len(a_appRank_sales_2019)):
  a_appRank_sales.append(a_appRank_sales_2019[i])
  a_appRank_sales.append(a_appGenre_sales_2019[i])
for i in range(len(a_appRank_sales_2020)):
  a_appRank_sales.append(a_appRank_sales_2020[i])
  a_appRank_sales.append(a_appGenre_sales_2020[i])

p_sales_rank_df = pd.DataFrame(p_appRank_sales)
p_sales_rank_df = p_sales_rank_df.T
p_sales_rank_df.columns = col[12:]
p_sales_rank_df.to_csv('/content/gdrive/My Drive/app_sales_rank_playstore_2019.csv', encoding='utf-8-sig')

a_sales_rank_df = pd.DataFrame(a_appRank_sales)
a_sales_rank_df = a_sales_rank_df.T
a_sales_rank_df.columns = col
a_sales_rank_df.to_csv('/content/gdrive/My Drive/dacon/app_sales_rank_appstore_2020.csv', encoding='utf-8-sig')

### 순위별 장르 점수 매기기 

데이터를 가져오는 작업이 끝났다면, 이제 데이터를 가공해야할 차례입니다.

사람들의 **흥미 변화 및 유망 품목을 파악하는 것**이 목표이기에, 각 앱 명이 아닌 앱의 장르를 기준으로 분석하였습니다.

순위별 점수를 장르에 매겨, 해당 장르의 합산 점수를 장르명과 점수의 딕셔너리 형태로 리스트에 저장합니다. 

이후, **2019년 1월 ~ 9월** 의 장르 점수 값들을 합산한 뒤 평균내어 **기준값**으로 설정하고, **2020년 1월 ~ 6월**의 장르 점수 값과, 기준값을 각각 비교합니다. 이 때 비교한 결과는 증감율로서 표현합니다.


* 먼저, 필요한 데이터를 불러옵니다. 이 csv 파일들은 모두 위의 장르 및 앱 순위 구하기를 통해 만든 데이터프레임을 csv로 저장한 파일입니다.

In [34]:
appRank_free_playstore_19_df = pd.read_csv('/content/gdrive/My Drive/dacon/app/appRank_free_playstore_2019.csv')
appRank_free_appstore_19_df = pd.read_csv('/content/gdrive/My Drive/dacon/app/appRank_free_appstore_2019.csv')
appRank_pay_playstore_19_df = pd.read_csv('/content/gdrive/My Drive/dacon/app/appRank_pay_playstore_2019.csv')
appRank_pay_appstore_19_df = pd.read_csv('/content/gdrive/My Drive/dacon/app/appRank_pay_appstore_2019.csv')
appRank_free_playstore_20_df = pd.read_csv('/content/gdrive/My Drive/dacon/app/appRank_free_playstore_2020.csv')
appRank_free_appstore_20_df = pd.read_csv('/content/gdrive/My Drive/dacon/app/appRank_free_appstore_2020.csv')
appRank_pay_playstore_20_df = pd.read_csv('/content/gdrive/My Drive/dacon/app/appRank_pay_playstore_2020.csv')
appRank_pay_appstore_20_df = pd.read_csv('/content/gdrive/My Drive/dacon/app/appRank_pay_appstore_2020.csv')
appRank_sales_playstore_20_df = pd.read_csv('/content/gdrive/My Drive/dacon/app/app_sales_rank_playstore_2020.csv')
appRank_sales_appstore_20_df = pd.read_csv('/content/gdrive/My Drive/dacon/app/app_sales_rank_appstore_2020.csv')
appRank_sales_playstore_19_df = pd.read_csv('/content/gdrive/My Drive/dacon/app/app_sales_rank_playstore_2019.csv')
appRank_sales_appstore_19_df = pd.read_csv('/content/gdrive/My Drive/dacon/app/app_sales_rank_appstore_2019.csv')

장르별 점수를 매긴 기준은, 총 1~50위까지 있는 경우에 1위인 장르는 50점, 50위인 장르는 1점으로 매겼습니다. 

이후 각 월별 장르 점수를 합산하였습니다.

#### 플레이스토어 무료

In [4]:
appRank_free_playstore_19_df.drop(['Unnamed: 0'],axis=1,inplace=True)

In [5]:
tmp = [i for i in list(appRank_free_playstore_19_df) if 'gen' in i]
free_p_gen = list(set(np.array(appRank_free_playstore_19_df[tmp]).reshape(len(tmp)*50,)))
free_p_gen_dic_19 = []
for i in tmp:
  free_p_gen_dic_tmp = {key: 0 for key in dict.fromkeys(free_p_gen).keys()}
  score = 50
  for j in appRank_free_playstore_19_df.dropna()[i]:
    free_p_gen_dic_tmp[j] = free_p_gen_dic_tmp[j]+score
    score -= 1
  free_p_gen_dic_19.append(free_p_gen_dic_tmp)

col = []
for i in range(12):
  col.append('19_'+str(i+1))

free_playstore_19 = pd.DataFrame(free_p_gen_dic_19)
free_playstore_19 = free_playstore_19.T
free_playstore_19.columns = col
free_playstore_19 = free_playstore_19.drop([free_playstore_19.index[0]])
free_playstore_19.head()

Unnamed: 0,19_1,19_2,19_3,19_4,19_5,19_6,19_7,19_8,19_9,19_10,19_11,19_12
예술/디자인,0,0,0,8,0,0,0,0,0,0,0,0
여행 및 지역정보,60,88,70,121,91,106,119,93,108,92,149,160
쇼핑,189,104,127,86,105,113,145,308,111,146,179,168
비즈니스,0,0,0,0,0,5,0,21,0,15,33,3
라이프스타일,0,0,22,3,51,49,36,60,18,0,20,0


아래는 기준값을 구하는 과정입니다. 19년 1~9월의 장르별 합산 값 평균을 계산하여 average 열에 저장합니다. 

또한 20년 데이터와 비교하기 위해 장르명을 genre라는 리스트에 저장합니다.

In [6]:
free_p_standard_19 = free_playstore_19[['19_1','19_2','19_3','19_4','19_5','19_6','19_7','19_8','19_9']]
free_p_standard_19 = free_p_standard_19.reset_index()
free_p_standard_19 = free_p_standard_19.set_index('index')

free_p_standard_19['average'] = 0.000
print(len(free_p_standard_19))
for i in range(24):
  total = 0
  for j in range(0,9):
    total += free_p_standard_19.iloc[i][j]
  free_p_standard_19['average'][i] = total/9

free_p_standard_19 = free_p_standard_19[['average']]
free_p_standard_19 = free_p_standard_19.reset_index()
genre = list(free_p_standard_19['index'])
free_p_standard_19 = free_p_standard_19.set_index('index')
free_p_standard_19.head()

24




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,average
index,Unnamed: 1_level_1
예술/디자인,0.888889
여행 및 지역정보,95.111111
쇼핑,143.111111
비즈니스,2.888889
라이프스타일,26.555556


20년의 무료, 플레이스토어 앱 데이터를 불러와 19년도와 같은 가공 과정을 거칩니다. 

In [7]:
appRank_free_playstore_20_df.drop(['Unnamed: 0'], axis = 1, inplace=True)

tmp = [i for i in list(appRank_free_playstore_20_df) if 'gen' in i]
free_p_gen = list(set(np.array(appRank_free_playstore_20_df[tmp]).reshape(len(tmp)*50,)))
free_p_gen_dic_20 = []
for i in tmp:
  free_p_gen_dic_tmp = {key: 0 for key in dict.fromkeys(free_p_gen).keys()}
  score = 50
  for j in appRank_free_playstore_20_df.dropna()[i]:
    free_p_gen_dic_tmp[j] = free_p_gen_dic_tmp[j]+score
    score -= 1
  free_p_gen_dic_20.append(free_p_gen_dic_tmp)

col = []
for i in range(6):
  col.append('20_'+str(i+1))

free_playstore_20 = pd.DataFrame(free_p_gen_dic_20)
free_playstore_20 = free_playstore_20.T
free_playstore_20.columns = col
free_playstore_20 = free_playstore_20.drop([free_playstore_20.index[0]])

col = []
for i in range(1,7):
  col.append(str(i)+'월')

free_playstore_20 = free_playstore_20.reset_index()

이후 20년의 경우에는 average를 구하는 것이 아닌, 증감율을 계산하여 각 월에 해당하는 열에 대입합니다.




증감율 계산은

**(현재 값 - 기준 값) / 기준값**

으로 계산하였습니다.

In [8]:
for i in range(len(col)):
  free_playstore_20[col[i]] = 0.000

for i in range(len(free_playstore_20)):
  base = free_playstore_20['index'][i]
  if base in genre:
    idx = genre.index(base)
    base = free_p_standard_19.iloc[idx]['average']
  else:
    base = 1.0
  for j in range(2, 8):
    if base == 0:
      value = 0.0
    else:
      value = (float(free_playstore_20.iloc[i][j]) - base) / base
    free_playstore_20[col[j-2]][i] = value

free_playstore_20.head()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,index,20_1,20_2,20_3,20_4,20_5,20_6,1월,2월,3월,4월,5월,6월
0,쇼핑,213,295,228,149,127,139,1.061335,0.593168,0.041149,-0.112578,-0.028727,-0.992584
1,비즈니스,0,70,49,72,72,70,23.230769,15.961538,23.923077,23.923077,23.230769,7.04142
2,라이프스타일,31,97,0,0,38,0,2.65272,-1.0,-1.0,0.430962,-1.0,-0.900107
3,날씨,0,0,0,0,48,0,-1.0,-1.0,-1.0,1.037736,-1.0,-1.042453
4,소셜,95,74,104,81,76,60,-0.130548,0.221932,-0.048303,-0.10705,-0.295039,-1.001534


이후 plotly를 활용하여 animation 효과를 주기 위해 적절한 데이터프레임의 형태로 변환하는 과정을 거칩니다. 

과정은 간략히 아래 코드에서 확인하실 수 있습니다.  

genre, month, value 3개의 열을 가지는 데이터프레임으로 value에 증감율의 값이 들어가게 됩니다.

In [9]:
free_playstore_20 = free_playstore_20.set_index('index')
free_playstore_20 = free_playstore_20[['1월','2월','3월','4월','5월','6월']]
free_playstore_20 = free_playstore_20.reset_index()
free_playstore_20.columns = ['genre', '1월','2월','3월','4월','5월','6월']
month = list()
for i in range(1,7):
  month.append(list(free_playstore_20[str(i)+'월']))
genre_list = list(free_playstore_20['genre'])

In [10]:
free_playstore_20_graph = pd.DataFrame()
for i in range(6):
  free_playstore_20_fig = pd.DataFrame(genre_list,columns = ['genre'])
  free_playstore_20_fig['value'] = month[i]
  free_playstore_20_fig['month'] = str(i+1)+'월'
  free_playstore_20_fig = free_playstore_20_fig[['genre','month','value']]
  free_playstore_20_graph = pd.concat([free_playstore_20_graph,free_playstore_20_fig], axis = 0)

free_playstore_20_graph

Unnamed: 0,genre,month,value
0,쇼핑,1월,1.061335
1,비즈니스,1월,23.230769
2,라이프스타일,1월,2.652720
3,날씨,1월,-1.000000
4,소셜,1월,-0.130548
...,...,...,...
18,엔터테인먼트,6월,-1.004122
19,식음료,6월,-1.003759
20,자동차,6월,20.750000
21,음악/오디오,6월,-0.968590


데이터 전처리과정을 모두 끝낸 뒤, plotly를 활용하여 plot합니다.

In [11]:
df = free_playstore_20_graph

fig = px.bar(df, x="genre", y="value", color="genre",
  animation_frame="month", animation_group="genre", range_y=[-5, 65])
fig.show()

아래는 위의 방법과 같은 방법으로 무료, 유료, 매출을 플레이스토어와 앱스토어로 나누어 plot한 결과입니다.

#### 플레이스토어 유료

In [12]:
appRank_pay_playstore_19_df.drop(['Unnamed: 0'], axis = 1, inplace= True)

tmp = [i for i in list(appRank_pay_playstore_19_df) if 'gen' in i]
pay_p_gen = list(set(np.array(appRank_pay_playstore_19_df[tmp]).reshape(len(tmp)*50,)))
pay_p_gen_dic_19 = []
for i in tmp:
  pay_p_gen_dic_tmp = {key: 0 for key in dict.fromkeys(pay_p_gen).keys()}
  score = 50
  for j in appRank_pay_playstore_19_df.dropna()[i]:
    pay_p_gen_dic_tmp[j] = pay_p_gen_dic_tmp[j]+score
    score -= 1
  pay_p_gen_dic_19.append(pay_p_gen_dic_tmp)

col = []
for i in range(12):
  col.append('19_'+str(i+1))

pay_playstore_19 = pd.DataFrame(pay_p_gen_dic_19)
pay_playstore_19 = pay_playstore_19.T
pay_playstore_19.columns = col
pay_playstore_19 = pay_playstore_19.drop([pay_playstore_19.index[0]])

In [13]:
pay_p_standard_19 = pay_playstore_19[['19_1','19_2','19_3','19_4','19_5','19_6','19_7','19_8','19_9']]
pay_p_standard_19 = pay_p_standard_19.reset_index()
pay_p_standard_19 = pay_p_standard_19.set_index('index')

pay_p_standard_19['average'] = 0.000
for i in range(len(pay_p_standard_19)):
  total = 0
  for j in range(0,9):
    total += pay_p_standard_19.iloc[i][j]
  pay_p_standard_19['average'][i] = total/9

pay_p_standard_19 = pay_p_standard_19[['average']]
pay_p_standard_19 = pay_p_standard_19.reset_index()
genre = list(pay_p_standard_19['index'])
pay_p_standard_19 = pay_p_standard_19.set_index('index')



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [14]:
appRank_pay_playstore_20_df.drop(['Unnamed: 0'], axis = 1, inplace=True)

tmp = [i for i in list(appRank_pay_playstore_20_df) if 'gen' in i]
pay_p_gen = list(set(np.array(appRank_pay_playstore_20_df[tmp]).reshape(len(tmp)*50,)))
pay_p_gen_dic_20 = []
for i in tmp:
  pay_p_gen_dic_tmp = {key: 0 for key in dict.fromkeys(pay_p_gen).keys()}
  score = 50
  for j in appRank_pay_playstore_20_df.dropna()[i]:
    pay_p_gen_dic_tmp[j] = pay_p_gen_dic_tmp[j]+score
    score -= 1
  pay_p_gen_dic_20.append(pay_p_gen_dic_tmp)

In [15]:
col = []
for i in range(6):
  col.append('20_'+str(i+1))

pay_playstore_20 = pd.DataFrame(pay_p_gen_dic_20)
pay_playstore_20 = pay_playstore_20.T
pay_playstore_20.columns = col
pay_playstore_20 = pay_playstore_20.drop([pay_playstore_20.index[0]])

col = []
for i in range(1,7):
  col.append(str(i)+'월')

pay_playstore_20 = pay_playstore_20.reset_index()

for i in range(len(col)):
  pay_playstore_20[col[i]] = 0.000

for i in range(len(pay_playstore_20)):
  base = pay_playstore_20['index'][i]
  if base in genre:
    idx = genre.index(base)
    base = pay_p_standard_19.iloc[idx]['average']
  else:
    base = 1.0
  for j in range(2, 8):
    if base == 0:
      value = 0.0
    else:
      value = (float(pay_playstore_20.iloc[i][j]) - base) / base
    pay_playstore_20[col[j-2]][i] = value

pay_playstore_20 = pay_playstore_20.set_index('index')
pay_playstore_20 = pay_playstore_20[['1월','2월','3월','4월','5월','6월']]
pay_playstore_20 = pay_playstore_20.reset_index()
pay_playstore_20.head()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,index,1월,2월,3월,4월,5월,6월
0,동영상 플레이어/편집기,-0.303362,-0.366693,-0.373729,-0.303362,-0.289289,-1.002135
1,사진,0.260487,0.437077,0.382273,0.114344,0.041272,-0.998414
2,커뮤니케이션,-0.020864,-0.154993,0.032787,-0.020864,0.099851,-1.00028
3,엔터테인먼트,-1.0,-1.0,-1.0,-1.0,-1.0,-1.071429
4,맞춤 설정,-0.050695,-0.381848,-0.50695,-0.197874,-0.308258,-1.000373


In [16]:
month = list()
for i in range(1,7):
  month.append(list(pay_playstore_20[str(i)+'월']))

genre_list = list(pay_playstore_20['index'])

pay_playstore_20_graph = pd.DataFrame()
for i in range(6):
  pay_playstore_20_fig = pd.DataFrame(genre_list,columns = ['genre'])
  pay_playstore_20_fig['value'] = month[i]
  pay_playstore_20_fig['month'] = str(i+1)+'월'
  pay_playstore_20_fig = pay_playstore_20_fig[['genre','month','value']]
  pay_playstore_20_graph = pd.concat([pay_playstore_20_graph,pay_playstore_20_fig], axis = 0)

pay_playstore_20_graph.head()

Unnamed: 0,genre,month,value
0,동영상 플레이어/편집기,1월,-0.303362
1,사진,1월,0.260487
2,커뮤니케이션,1월,-0.020864
3,엔터테인먼트,1월,-1.0
4,맞춤 설정,1월,-0.050695


In [17]:
df = pay_playstore_20_graph

fig = px.bar(df, x="genre", y="value", color="genre",
  animation_frame="month", animation_group="genre", range_y=[-5, 22])
fig.show()

#### 플레이스토어 매출

In [18]:
appRank_sales_playstore_19_df.drop(['Unnamed: 0'],axis=1,inplace= True)

tmp = [i for i in list(appRank_sales_playstore_19_df) if 'gen' in i]
sales_p_gen = list(set(np.array(appRank_sales_playstore_19_df[tmp]).reshape(len(tmp)*50,)))
sales_p_gen_dic_19 = []
for i in tmp:
  sales_p_gen_dic_tmp = {key: 0 for key in dict.fromkeys(sales_p_gen).keys()}
  score = 50
  for j in appRank_sales_playstore_19_df.dropna()[i]:
    sales_p_gen_dic_tmp[j] = sales_p_gen_dic_tmp[j]+score
    score -= 1
  sales_p_gen_dic_19.append(sales_p_gen_dic_tmp)

In [19]:
col = []
for i in range(6,12):
  col.append('19_'+str(i+1))

sales_playstore_19 = pd.DataFrame(sales_p_gen_dic_19)
sales_playstore_19 = sales_playstore_19.T
sales_playstore_19.columns = col
sales_playstore_19 = sales_playstore_19.drop([sales_playstore_19.index[0]])
sales_playstore_19 = sales_playstore_19.reset_index()
sales_playstore_19 = sales_playstore_19.set_index('index')

In [20]:
sales_playstore_19['average'] = 0.000

for i in range(len(sales_playstore_19)):
  total = 0
  for j in range(0,6):
    total += sales_playstore_19.iloc[i][j]
  sales_playstore_19['average'][i] = total/9

sales_playstore_19 = sales_playstore_19[['average']]
sales_playstore_19 = sales_playstore_19.reset_index()
genre = list(sales_playstore_19['index'])
sales_playstore_19 = sales_playstore_19.set_index('index')
sales_playstore_19.head()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,average
index,Unnamed: 1_level_1
커뮤니케이션,86.0
사진,0.333333
엔터테인먼트,160.888889
스포츠,33.444444
도서/참고자료,25.666667


In [21]:
appRank_sales_playstore_20_df.drop(['Unnamed: 0'], axis = 1, inplace=True)
tmp = [i for i in list(appRank_sales_playstore_20_df) if 'gen' in i]
sales_p_gen = list(set(np.array(appRank_sales_playstore_20_df[tmp]).reshape(len(tmp)*50,)))
sales_p_gen_dic_20 = []
for i in tmp:
  sales_p_gen_dic_tmp = {key: 0 for key in dict.fromkeys(sales_p_gen).keys()}
  score = 50
  for j in appRank_sales_playstore_20_df.dropna()[i]:
    sales_p_gen_dic_tmp[j] = sales_p_gen_dic_tmp[j]+score
    score -= 1
  sales_p_gen_dic_20.append(sales_p_gen_dic_tmp)

In [22]:
col = []
for i in range(6):
  col.append('20_'+str(i+1))

sales_playstore_20 = pd.DataFrame(sales_p_gen_dic_20)
sales_playstore_20 = sales_playstore_20.T
sales_playstore_20.columns = col
sales_playstore_20 = sales_playstore_20.drop([sales_playstore_20.index[0]])
sales_playstore_20 = sales_playstore_20.reset_index()

In [23]:
col = []
for i in range(1,7):
  col.append(str(i)+'월')

for i in range(len(col)):
  sales_playstore_20[col[i]] = 0.000

for i in range(len(sales_playstore_20)):
  base = sales_playstore_20['index'][i]
  if base in genre:
    idx = genre.index(base)
    base = sales_playstore_19.iloc[idx]['average']
  else:
    base = 1.0
  for j in range(2, 8):
    if base == 0:
      value = 0.0
    else:
      value = (float(sales_playstore_20.iloc[i][j]) - base) / base
    sales_playstore_20[col[j-2]][i] = value

sales_playstore_20 = sales_playstore_20.set_index('index')
sales_playstore_20 = sales_playstore_20[['1월','2월','3월','4월','5월','6월']]
sales_playstore_20 = sales_playstore_20.reset_index()
sales_playstore_20.columns = ['genre', '1월','2월','3월','4월','5월','6월']



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [24]:
month = list()
for i in range(1,7):
  month.append(list(sales_playstore_20[str(i)+'월']))
genre_list = list(sales_playstore_20['genre'])

sales_playstore_20_graph = pd.DataFrame()
for i in range(6):
  sales_playstore_20_fig = pd.DataFrame(genre_list,columns = ['genre'])
  sales_playstore_20_fig['value'] = month[i]
  sales_playstore_20_fig['month'] = str(i+1)+'월'
  sales_playstore_20_fig = sales_playstore_20_fig[['genre','month','value']]
  sales_playstore_20_graph = pd.concat([sales_playstore_20_graph,sales_playstore_20_fig], axis = 0)

sales_playstore_20_graph.head()

Unnamed: 0,genre,month,value
0,동영상 플레이어/편집기,1월,45.0
1,커뮤니케이션,1월,0.639535
2,엔터테인먼트,1월,0.230663
3,스포츠,1월,0.82392
4,금융,1월,-1.0


In [25]:
df = sales_playstore_20_graph

fig = px.bar(df, x="genre", y="value", color="genre",
  animation_frame="month", animation_group="genre", range_y=[-5, 20])
fig.show()

#### 앱스토어 무료

In [35]:
appRank_free_appstore_19_df.drop(['Unnamed: 0'],axis=1,inplace=True)

tmp = [i for i in list(appRank_free_appstore_19_df) if 'gen' in i]
free_a_gen = list(set(np.array(appRank_free_appstore_19_df[tmp]).reshape(len(tmp)*100,)))
free_a_gen_dic_19 = []
for i in tmp:
  free_a_gen_dic_tmp = {key: 0 for key in dict.fromkeys(free_a_gen).keys()}
  score = 100
  for j in appRank_free_appstore_19_df.dropna()[i]:
    free_a_gen_dic_tmp[j] = free_a_gen_dic_tmp[j]+score
    score -= 1
  free_a_gen_dic_19.append(free_a_gen_dic_tmp)

In [36]:
col = []
for i in range(12):
  col.append('19_'+str(i+1))

free_appstore_19 = pd.DataFrame(free_a_gen_dic_19)
free_appstore_19 = free_appstore_19.T
free_appstore_19.columns = col
free_appstore_19 = free_appstore_19.drop([free_appstore_19.index[0]])

free_a_standard_19 = free_appstore_19[['19_1','19_2','19_3','19_4','19_5','19_6','19_7','19_8','19_9']]
free_a_standard_19 = free_a_standard_19.reset_index()
free_a_standard_19 = free_a_standard_19.set_index('index')
free_a_standard_19.head()

Unnamed: 0_level_0,19_1,19_2,19_3,19_4,19_5,19_6,19_7,19_8,19_9
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
롤플레잉\n (일 경과),0,0,34,0,0,0,0,0,0
Simulation,0,0,72,163,0,0,0,0,0
보드,0,34,0,100,86,93,0,123,76
아케이드,178,77,119,81,82,309,0,0,46
퍼즐,0,58,65,78,97,78,199,174,0


In [37]:
free_a_standard_19['average'] = 0.000
print(len(free_a_standard_19))
for i in range(24):
  total = 0
  for j in range(0,9):
    total += free_a_standard_19.iloc[i][j]
  free_a_standard_19['average'][i] = total/9

free_a_standard_19 = free_a_standard_19[['average']]
free_a_standard_19 = free_a_standard_19.reset_index()
genre = list(free_a_standard_19['index'])
free_a_standard_19 = free_a_standard_19.set_index('index')
free_a_standard_19.head()

62




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,average
index,Unnamed: 1_level_1
롤플레잉\n (일 경과),3.777778
Simulation,26.111111
보드,56.888889
아케이드,99.111111
퍼즐,83.222222


In [38]:
appRank_free_appstore_20_df.drop(['Unnamed: 0'], axis = 1, inplace=True)

tmp = [i for i in list(appRank_free_appstore_20_df) if 'gen' in i]
free_a_gen = list(set(np.array(appRank_free_appstore_20_df[tmp]).reshape(len(tmp)*100,)))
free_a_gen_dic_20 = []
for i in tmp:
  free_a_gen_dic_tmp = {key: 0 for key in dict.fromkeys(free_a_gen).keys()}
  score = 100
  for j in appRank_free_appstore_20_df.dropna()[i]:
    free_a_gen_dic_tmp[j] = free_a_gen_dic_tmp[j]+score
    score -= 1
  free_a_gen_dic_20.append(free_a_gen_dic_tmp)

In [39]:
col = []
for i in range(6):
  col.append('20_'+str(i+1))

free_appstore_20 = pd.DataFrame(free_a_gen_dic_20)
free_appstore_20 = free_appstore_20.T
free_appstore_20.columns = col
free_appstore_20 = free_appstore_20.drop([free_appstore_20.index[0]])
free_appstore_20 = free_appstore_20.reset_index()

In [40]:
col = []
for i in range(1,7):
  col.append(str(i)+'월')

for i in range(len(col)):
  free_appstore_20[col[i]] = 0.000

for i in range(len(free_appstore_20)):
  base = free_appstore_20['index'][i]
  if base in genre:
    idx = genre.index(base)
    base = free_a_standard_19.iloc[idx]['average']
  else:
    base = 1.0
  for j in range(2, 8):
    if base == 0:
      value = 0.0
    else:
      value = (float(free_appstore_20.iloc[i][j]) - base) / base
    free_appstore_20[col[j-2]][i] = value

free_appstore_20 = free_appstore_20.set_index('index')
free_appstore_20 = free_appstore_20[['1월','2월','3월','4월','5월','6월']]
free_appstore_20 = free_appstore_20.reset_index()
free_appstore_20.columns = ['genre', '1월','2월','3월','4월','5월','6월']
free_appstore_20



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,genre,1월,2월,3월,4월,5월,6월
0,보드,1.373047,0.001953,-1.0,-1.0,-1.0,-0.975864
1,아케이드,0.210762,-1.0,-1.0,0.29148,-1.0,-0.997873
2,퍼즐,0.910547,1.271028,-1.0,-1.0,2.016021,-0.989059
3,여행 및 지역정보,-1.0,-0.771955,-0.512813,-0.406565,-0.248488,-1.002591
4,전략,1.103093,-1.0,0.484536,0.530928,-0.520619,-0.982942
5,쇼핑,0.21501,-0.032454,-0.322515,0.198783,0.273834,-0.999564
6,캐주얼 게임\n (일 경과),-1.0,-1.0,61.0,-1.0,-1.0,-2.0
7,비즈니스,4.245856,4.867403,3.425414,2.903315,3.375691,-0.89444
8,Navigation,0.0,0.0,0.0,0.0,0.0,0.0
9,라이프스타일,-1.0,-0.511482,-0.727557,0.822547,0.371608,-1.009395


In [41]:
month = list()
for i in range(1,7):
  month.append(list(free_appstore_20[str(i)+'월']))

genre_list = list(free_appstore_20['genre'])

free_appstore_20_graph = pd.DataFrame()
for i in range(6):
  free_appstore_20_fig = pd.DataFrame(genre_list,columns = ['genre'])
  free_appstore_20_fig['value'] = month[i]
  free_appstore_20_fig['month'] = str(i+1)+'월'
  free_appstore_20_fig = free_appstore_20_fig[['genre','month','value']]
  free_appstore_20_graph = pd.concat([free_appstore_20_graph,free_appstore_20_fig], axis = 0)

free_appstore_20_graph.head()

Unnamed: 0,genre,month,value
0,보드,1월,1.373047
1,아케이드,1월,0.210762
2,퍼즐,1월,0.910547
3,여행 및 지역정보,1월,-1.0
4,전략,1월,1.103093


In [42]:
df = free_appstore_20_graph

fig = px.bar(df, x="genre", y="value", color="genre",
  animation_frame="month", animation_group="genre", range_y=[-5, 65])
fig.show()

#### 앱스토어 유료

In [43]:
appRank_pay_appstore_19_df.drop(['Unnamed: 0'], axis = 1, inplace= True)

tmp = [i for i in list(appRank_pay_appstore_19_df) if 'gen' in i]
pay_a_gen = list(set(np.array(appRank_pay_appstore_19_df[tmp]).reshape(len(tmp)*100,)))
pay_a_gen_dic_19 = []
for i in tmp:
  pay_a_gen_dic_tmp = {key: 0 for key in dict.fromkeys(pay_a_gen).keys()}
  score = 100
  for j in appRank_pay_appstore_19_df.dropna()[i]:
    pay_a_gen_dic_tmp[j] = pay_a_gen_dic_tmp[j]+score
    score -= 1
  pay_a_gen_dic_19.append(pay_a_gen_dic_tmp)

In [44]:
col = []
for i in range(12):
  col.append('19_'+str(i+1))

pay_appstore_19 = pd.DataFrame(pay_a_gen_dic_19)
pay_appstore_19 = pay_appstore_19.T
pay_appstore_19.columns = col
pay_appstore_19 = pay_appstore_19.drop([pay_appstore_19.index[0]])

pay_a_standard_19 = pay_appstore_19[['19_1','19_2','19_3','19_4','19_5','19_6','19_7','19_8','19_9']]
pay_a_standard_19 = pay_a_standard_19.reset_index()
pay_a_standard_19 = pay_a_standard_19.set_index('index')

In [45]:
pay_a_standard_19['average'] = 0.000
for i in range(len(pay_a_standard_19)):
  total = 0
  for j in range(0,9):
    total += pay_a_standard_19.iloc[i][j]
  pay_a_standard_19['average'][i] = total/9

pay_a_standard_19 = pay_a_standard_19[['average']]
pay_a_standard_19 = pay_a_standard_19.reset_index()
genre = list(pay_a_standard_19['index'])
pay_a_standard_19 = pay_a_standard_19.set_index('index')
pay_a_standard_19.head()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,average
index,Unnamed: 1_level_1
Simulation,19.777778
Health & Fitness,47.0
보드,103.666667
아케이드,165.333333
퍼즐,98.777778


In [46]:
appRank_pay_appstore_20_df.drop(['Unnamed: 0'], axis = 1, inplace=True)

tmp = [i for i in list(appRank_pay_appstore_20_df) if 'gen' in i]
pay_a_gen = list(set(np.array(appRank_pay_appstore_20_df[tmp]).reshape(len(tmp)*100,)))
pay_a_gen_dic_20 = []
for i in tmp:
  pay_a_gen_dic_tmp = {key: 0 for key in dict.fromkeys(pay_a_gen).keys()}
  score = 100
  for j in appRank_pay_appstore_20_df.dropna()[i]:
    pay_a_gen_dic_tmp[j] = pay_a_gen_dic_tmp[j]+score
    score -= 1
  pay_a_gen_dic_20.append(pay_a_gen_dic_tmp)

In [47]:
col = []
for i in range(6):
  col.append('20_'+str(i+1))

pay_appstore_20 = pd.DataFrame(pay_a_gen_dic_20)
pay_appstore_20 = pay_appstore_20.T
pay_appstore_20.columns = col
pay_appstore_20 = pay_appstore_20.drop([pay_appstore_20.index[0]])
pay_appstore_20.head()

col = []
for i in range(1,7):
  col.append(str(i)+'월')

pay_appstore_20 = pay_appstore_20.reset_index()

In [48]:
for i in range(len(col)):
  pay_appstore_20[col[i]] = 0.000

for i in range(len(pay_appstore_20)):
  base = pay_appstore_20['index'][i]
  if base in genre:
    idx = genre.index(base)
    base = pay_a_standard_19.iloc[idx]['average']
  else:
    base = 1.0
  for j in range(2, 8):
    if base == 0:
      value = 0.0
    else:
      value = (float(pay_appstore_20.iloc[i][j]) - base) / base
    pay_appstore_20[col[j-2]][i] = value

pay_appstore_20 = pay_appstore_20.set_index('index')
pay_appstore_20 = pay_appstore_20[['1월','2월','3월','4월','5월','6월']]
pay_appstore_20 = pay_appstore_20.reset_index()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [49]:
month = list()
for i in range(1,7):
  month.append(list(pay_appstore_20[str(i)+'월']))

genre_list = list(pay_appstore_20['index'])

pay_appstore_20_graph = pd.DataFrame()
for i in range(6):
  pay_appstore_20_fig = pd.DataFrame(genre_list,columns = ['genre'])
  pay_appstore_20_fig['value'] = month[i]
  pay_appstore_20_fig['month'] = str(i+1)+'월'
  pay_appstore_20_fig = pay_appstore_20_fig[['genre','month','value']]
  pay_appstore_20_graph = pd.concat([pay_appstore_20_graph,pay_appstore_20_fig], axis = 0)

pay_appstore_20_graph.head()

Unnamed: 0,genre,month,value
0,Health & Fitness,1월,2.234043
1,보드,1월,0.302251
2,아케이드,1월,-0.189516
3,퍼즐,1월,-0.412823
4,Strategy,1월,-1.0


In [50]:
df = pay_appstore_20_graph

fig = px.bar(df, x="genre", y="value", color="genre",
  animation_frame="month", animation_group="genre", range_y=[-5, 22])
fig.show()

#### 앱스토어 매출

In [51]:
appRank_sales_appstore_19_df.drop(['Unnamed: 0'],axis=1,inplace= True)

tmp = [i for i in list(appRank_sales_appstore_19_df) if 'gen' in i]
sales_a_gen = list(set(np.array(appRank_sales_appstore_19_df[tmp]).reshape(len(tmp)*100,)))
sales_a_gen_dic_19 = []
for i in tmp:
  sales_a_gen_dic_tmp = {key: 0 for key in dict.fromkeys(sales_a_gen).keys()}
  score = 100
  for j in appRank_sales_appstore_19_df.dropna()[i]:
    sales_a_gen_dic_tmp[j] = sales_a_gen_dic_tmp[j]+score
    score -= 1
  sales_a_gen_dic_19.append(sales_a_gen_dic_tmp)

In [52]:
col = []
for i in range(12):
  col.append('19_'+str(i+1))

sales_appstore_19 = pd.DataFrame(sales_a_gen_dic_19)
sales_appstore_19 = sales_appstore_19.T
sales_appstore_19.columns = col
sales_appstore_19 = sales_appstore_19.drop([sales_appstore_19.index[0]])
sales_appstore_19 = sales_appstore_19.reset_index()
sales_appstore_19 = sales_appstore_19.set_index('index')
sales_appstore_19.head()

Unnamed: 0_level_0,19_1,19_2,19_3,19_4,19_5,19_6,19_7,19_8,19_9,19_10,19_11,19_12
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
,0,0,0,0,0,0,0,0,0,0,0,0
보드,149,132,105,88,94,80,70,99,55,45,54,39
아케이드,64,92,139,91,89,82,86,71,77,74,66,97
퍼즐,75,73,104,155,118,117,114,18,30,94,19,28
전략,508,410,462,423,383,417,499,348,499,567,688,620


In [53]:
sales_appstore_19['average'] = 0.000

for i in range(len(sales_appstore_19)):
  total = 0
  for j in range(12):
    total += sales_appstore_19.iloc[i][j]
  sales_appstore_19['average'][i] = total/9

sales_appstore_19 = sales_appstore_19[['average']]
sales_appstore_19 = sales_appstore_19.reset_index()
genre = list(sales_appstore_19['index'])
sales_appstore_19 = sales_appstore_19.set_index('index')
sales_appstore_19.head()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0_level_0,average
index,Unnamed: 1_level_1
,0.0
보드,112.222222
아케이드,114.222222
퍼즐,105.0
전략,647.111111


In [54]:
appRank_sales_appstore_20_df.drop(['Unnamed: 0'], axis = 1, inplace=True)

tmp = [i for i in list(appRank_sales_appstore_20_df) if 'gen' in i]
sales_a_gen = list(set(np.array(appRank_sales_appstore_20_df[tmp]).reshape(len(tmp)*100,)))
sales_a_gen_dic_20 = []
for i in tmp:
  sales_a_gen_dic_tmp = {key: 0 for key in dict.fromkeys(sales_a_gen).keys()}
  score = 100
  for j in appRank_sales_appstore_20_df.dropna()[i]:
    sales_a_gen_dic_tmp[j] = sales_a_gen_dic_tmp[j]+score
    score -= 1
  sales_a_gen_dic_20.append(sales_a_gen_dic_tmp)

In [55]:
col = []
for i in range(6):
  col.append('20_'+str(i+1))

sales_appstore_20 = pd.DataFrame(sales_a_gen_dic_20)
sales_appstore_20 = sales_appstore_20.T
sales_appstore_20.columns = col
sales_appstore_20 = sales_appstore_20.drop([sales_appstore_20.index[0]])
sales_appstore_20 = sales_appstore_20.reset_index()

col = []
for i in range(1,7):
  col.append(str(i)+'월')

for i in range(len(col)):
  sales_appstore_20[col[i]] = 0.000

for i in range(len(sales_appstore_20)):
  base = sales_appstore_20['index'][i]
  if base in genre:
    idx = genre.index(base)
    base = sales_appstore_19.iloc[idx]['average']
  else:
    base = 1.0
  for j in range(2, 8):
    if base == 0:
      value = 0.0
    else:
      value = (float(sales_appstore_20.iloc[i][j]) - base) / base
    sales_appstore_20[col[j-2]][i] = value

sales_appstore_20 = sales_appstore_20.set_index('index')
sales_appstore_20 = sales_appstore_20[['1월','2월','3월','4월','5월','6월']]
sales_appstore_20 = sales_appstore_20.reset_index()
sales_appstore_20.columns = ['genre', '1월','2월','3월','4월','5월','6월']
sales_appstore_20.head()



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Unnamed: 0,genre,1월,2월,3월,4월,5월,6월
0,보드,-0.162376,-0.456436,-0.483168,-0.331683,-0.741584,-1.001447
1,아케이드,0.190661,-0.177043,0.111868,-0.255837,-0.38716,-0.998331
2,퍼즐,-0.409524,-0.704762,-0.942857,-0.857143,-0.904762,-1.0039
3,전략,-0.235062,0.154361,-0.044986,-0.250515,-0.123798,-1.000363
4,전략\n (일 경과),-1.0,-1.0,-1.0,68.0,75.5,-2.5


In [56]:
month = list()
for i in range(1,7):
  month.append(list(sales_appstore_20[str(i)+'월']))
genre_list = list(sales_appstore_20['genre'])

sales_appstore_20_graph = pd.DataFrame()
for i in range(6):
  sales_appstore_20_fig = pd.DataFrame(genre_list,columns = ['genre'])
  sales_appstore_20_fig['value'] = month[i]
  sales_appstore_20_fig['month'] = str(i+1)+'월'
  sales_appstore_20_fig = sales_appstore_20_fig[['genre','month','value']]
  sales_appstore_20_graph = pd.concat([sales_appstore_20_graph,sales_appstore_20_fig], axis = 0)

sales_appstore_20_graph.head()

Unnamed: 0,genre,month,value
0,보드,1월,-0.162376
1,아케이드,1월,0.190661
2,퍼즐,1월,-0.409524
3,전략,1월,-0.235062
4,전략\n (일 경과),1월,-1.0


In [58]:
df = sales_appstore_20_graph

fig = px.bar(df, x="genre", y="value", color="genre",
  animation_frame="month", animation_group="genre", range_y=[-5, 25])
fig.show()

#### 결론 함께 내기!!!