# 政务类App应用商店数据分析项目

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib as mlp
import matplotlib.pyplot as plt
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
from collections import Counter
from pyecharts import options as opts
from pyecharts.charts import Bar
from pyecharts.charts import Line
from pyecharts.charts import Pie
import requests
from pandas import DataFrame

import warnings
warnings.filterwarnings("ignore")

## 全国各（部分）省市政务类app评分排行

In [2]:
allrates = (
    Bar()
    .add_xaxis(['椰省市','石家庄一点通','在成都','i深圳','随申办市民云','青e办','i西安','云上贵州多彩宝','拉萨政务服务','皖事办',
                '哈尔滨政务服务','郑好办','北京通','金城办','爱山东','e福州','蒙速办','浙里办','江苏省政务服务','津心办','鄂汇办',
                '广西政务','我的太原','闽政通APP','i厦门','广东政务服务','沈阳政务服务','辽事通','灵动长春'])
    .add_yaxis("评分", [5.0,5.0,4.8,4.3,4.3,4.1,4.0,4.0,4.0,3.4,3.2,3.0,3.0,3.0,2.9,2.9,2.8,2.8,2.5,2.4,2.2,2.2,2.0,1.8,1.8
                      ,1.7,1.6,1.6,1.4], category_gap=2, color='orange')
    .set_global_opts(xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=-40)),
        title_opts=opts.TitleOpts(title="全国各（部分）省市政务类app评分排名"))
)

allrates.render_notebook()

## 郑好办APP

In [3]:
flag = [1,2,3,4,5,6,7,8,9,10]
urllist = []
for i in flag:
    url = f"https://itunes.apple.com/rss/customerreviews/page={i}/id=1502268233/sortby=mostrecent/json?l=en&&cc=cn"
    urllist.append(url)
rating = [] #评分
version = [] #版本
title = [] #标题
content = [] #内容
for url in urllist:
    res = requests.get(url)
    data = res.json()['feed']['entry']
    for i in range(len(data)):
        rating.append(data[i]['im:rating']['label'])
        version.append(data[i]["im:version"]['label'])
        title.append(data[i]['title']['label'])
        content.append(data[i]['content']['label'])
data = {'rating':rating,
       'title':title,
        'version': version,
       'content':content
       }
zhb = DataFrame(data)

In [4]:
zhb

Unnamed: 0,rating,title,version,content
0,1,垃圾,2.4.2,点界面经常点不动，反馈也无人受理，智能对话一直就是省略号，根本就没用
1,5,好,2.4.2,非常方便快捷 好
2,1,一直都是刷新，根本打不开,2.4.2,已经彻底无语了，开发者们能不能管理一下，每天不是系统维护进不去，就是提示网络不稳定，怎么刷新...
3,1,政府的APP能不能专业点,2.4.2,网络卡顿，根本加载不进去，好几天了无法解决。要这app有毛线用
4,1,垃圾软件,2.4.2,停个车还要下app才能缴费，下了app怎么都注册不成功，点开就卡住，关联支付宝也卡住。
...,...,...,...,...
495,1,打一星都嫌多，技术太差了点吧,1.0,app运行卡顿，经常网络异常，政府app做成这样也是醉了
496,1,垃圾软件，开发者用点心不行吗,1.0,平常用就时不时的有bug，今天有抢优惠券的活动，我等了半个小时准备抢，到时间了直接显示网络异...
497,1,网络异常,1.0,登陆不上，与支付宝关联也登陆不上，完全网络异常，哦呵呵？4G、Wi-Fi都不是网络吗？
498,1,一星不能多了,1.0,十点抢券，不能登录，页面刷新不出来，图标不显示。没有抢券入口。没有这个技术就别弄，别学支付宝...


In [5]:
zhb["version"].unique()

array(['2.4.2', '2.4.1', '2.4', '2.3', '2.2', '2.1.1', '2.0.1', '1.2.0',
       '1.1.0', '1.0.1', '1.0'], dtype=object)

In [6]:
zhb["rating"].unique()

array(['1', '5', '4', '2', '3'], dtype=object)

### 评分比例

In [7]:
star1 = zhb[zhb.rating == "1"]
star2 = zhb[zhb.rating == "2"]
star3 = zhb[zhb.rating == "3"]
star4 = zhb[zhb.rating == "4"]
star5 = zhb[zhb.rating == "5"]

positv_rating = len(star5)+len(star4)
negativ_rating = len(star1)+len(star2)
neutral_rating = len(star3)

pie = (
    Pie()
    .add("", [list(z) for z in zip(['好评（4分和5分）','差评（1分和2分）','中评（3分）'], 
                            [positv_rating,negativ_rating,neutral_rating])])
    .set_global_opts(title_opts=opts.TitleOpts(title="郑好办App评分分布"))
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
)

pie.render_notebook()

### 整体评分变化趋势

In [8]:
def total_rating(df, ver):
    total_rating =  df[df.version == ver].rating.apply(lambda x: int(x)).sum()
    return total_rating

In [9]:
rating1 = total_rating(zhb, '1.0')/len(zhb[zhb.version == "1.0"])
rating2 = (total_rating(zhb, '1.0')+total_rating(zhb, '1.0.1'))/(len(zhb[zhb.version == "1.0"])+len(zhb[zhb.version == "1.0.1"]))
rating3 = (total_rating(zhb, '1.0')+total_rating(zhb, '1.0.1')+total_rating(zhb, '1.1.0'))/(len(zhb[zhb.version == "1.0"])+len(zhb[zhb.version == "1.0.1"])+len(zhb[zhb.version == "1.1.0"]))
rating4 = (total_rating(zhb, '1.0')+total_rating(zhb, '1.0.1')+total_rating(zhb, '1.1.0')+total_rating(zhb, '1.2.0'))/(len(zhb[zhb.version == "1.0"])+
            len(zhb[zhb.version == "1.0.1"])+len(zhb[zhb.version == "1.1.0"])+len(zhb[zhb.version == "1.2.0"]))
rating5 = (total_rating(zhb, '1.0')+total_rating(zhb, '1.0.1')+total_rating(zhb, '1.1.0')+total_rating(zhb, '1.2.0')+
          total_rating(zhb, '2.0.1'))/(len(zhb[zhb.version == "1.0"])+len(zhb[zhb.version == "1.0.1"])+
            len(zhb[zhb.version == "1.1.0"])+len(zhb[zhb.version == "1.2.0"])+len(zhb[zhb.version == "2.0.1"]))
rating6 = (total_rating(zhb, '1.0')+total_rating(zhb, '1.0.1')+total_rating(zhb, '1.1.0')+total_rating(zhb, '1.2.0')+
          total_rating(zhb, '2.0.1')+total_rating(zhb, '2.1.1'))/(len(zhb[zhb.version == "1.0"])+len(zhb[zhb.version == "1.0.1"])+
            len(zhb[zhb.version == "1.1.0"])+len(zhb[zhb.version == "1.2.0"])+len(zhb[zhb.version == "2.0.1"])+len(zhb[zhb.version == "2.1.1"]))
rating7 = (total_rating(zhb, '1.0')+total_rating(zhb, '1.0.1')+total_rating(zhb, '1.1.0')+total_rating(zhb, '1.2.0')+
          total_rating(zhb, '2.0.1')+total_rating(zhb, '2.1.1')+total_rating(zhb, '2.2'))/(len(zhb[zhb.version == "1.0"])+
            len(zhb[zhb.version == "1.0.1"])+len(zhb[zhb.version == "1.1.0"])+len(zhb[zhb.version == "1.2.0"])+
            len(zhb[zhb.version == "2.0.1"])+len(zhb[zhb.version == "2.1.1"])+len(zhb[zhb.version == "2.2"]))
rating8 = (total_rating(zhb, '1.0')+total_rating(zhb, '1.0.1')+total_rating(zhb, '1.1.0')+total_rating(zhb, '1.2.0')+
          total_rating(zhb, '2.0.1')+total_rating(zhb, '2.1.1')+total_rating(zhb, '2.2')+total_rating(zhb, '2.3'))/(len(zhb[zhb.version == "1.0"])+
            len(zhb[zhb.version == "1.0.1"])+len(zhb[zhb.version == "1.1.0"])+len(zhb[zhb.version == "1.2.0"])+
            len(zhb[zhb.version == "2.0.1"])+len(zhb[zhb.version == "2.1.1"])+len(zhb[zhb.version == "2.2"])+len(zhb[zhb.version == "2.3"]))
rating9 = (total_rating(zhb, '1.0')+total_rating(zhb, '1.0.1')+total_rating(zhb, '1.1.0')+total_rating(zhb, '1.2.0')+total_rating(zhb, '2.0.1')+
           total_rating(zhb, '2.1.1')+total_rating(zhb, '2.2')+total_rating(zhb, '2.3')+total_rating(zhb, '2.4'))/(len(zhb[zhb.version == "1.0"])+
            len(zhb[zhb.version == "1.0.1"])+len(zhb[zhb.version == "1.1.0"])+len(zhb[zhb.version == "1.2.0"])+len(zhb[zhb.version == "2.0.1"])+
            len(zhb[zhb.version == "2.1.1"])+len(zhb[zhb.version == "2.2"])+len(zhb[zhb.version == "2.3"])+len(zhb[zhb.version == "2.4"]))
rating10 = (total_rating(zhb, '1.0')+total_rating(zhb, '1.0.1')+total_rating(zhb, '1.1.0')+total_rating(zhb, '1.2.0')+total_rating(zhb, '2.0.1')+
           total_rating(zhb, '2.1.1')+total_rating(zhb, '2.2')+total_rating(zhb, '2.3')+total_rating(zhb, '2.4')+total_rating(zhb, '2.4.1'))/(len(zhb[zhb.version == "1.0"])+
            len(zhb[zhb.version == "1.0.1"])+len(zhb[zhb.version == "1.1.0"])+len(zhb[zhb.version == "1.2.0"])+len(zhb[zhb.version == "2.0.1"])+
            len(zhb[zhb.version == "2.1.1"])+len(zhb[zhb.version == "2.2"])+len(zhb[zhb.version == "2.3"])+len(zhb[zhb.version == "2.4"])++len(zhb[zhb.version == "2.4.1"]))


In [10]:
x_data = ["1.0", "1.0.1", "1.1.0", "1.2.0", "2.0.1", "2.1.1", "2.2","2.3","2.4","2.4.1"]
y_data = [round(rating1, 2), round(rating2, 2), round(rating3, 2), round(rating4, 2), round(rating5, 2), 
          round(rating6, 2), round(rating7, 2),round(rating8, 2),round(rating9, 2),round(rating10, 2)]


line = (
    Line()
    .set_global_opts(
        tooltip_opts=opts.TooltipOpts(is_show=False),
        xaxis_opts=opts.AxisOpts(type_="category"),
        yaxis_opts=opts.AxisOpts(
            type_="value",
            axistick_opts=opts.AxisTickOpts(is_show=True),
            splitline_opts=opts.SplitLineOpts(is_show=True),
        ),
    )
    .add_xaxis(xaxis_data=x_data)
    .add_yaxis(
        series_name="",
        y_axis=y_data,
        symbol="emptyCircle",
        is_symbol_show=True,
        label_opts=opts.LabelOpts(is_show=True),
    )

)

line.render_notebook()

### 好评及差评关键词

In [11]:
from textrank4zh import TextRank4Keyword, TextRank4Sentence

#提取好评关键词
positivetext = ''
for c in zhb[zhb.rating == "5"].content:
    positivetext += c
for c in zhb[zhb.rating == "4"].content:
    positivetext += c
positive = TextRank4Keyword()

positive.analyze(text=positivetext, lower=True, window=2)  # py2中text必须是utf8编码的str或者unicode对象，py3中必须是utf8编码的bytes或者str对象

positivedata = []
for item in positive.get_keywords(50, word_min_len=1):
    positivedata.append((item.word, item.weight))
    
#提取差评关键词
negativetext = ''
for c in zhb[zhb.rating == "1"].content:
    negativetext += c
for c in zhb[zhb.rating == "2"].content:
    negativetext += c
negative = TextRank4Keyword()

negative.analyze(text=negativetext, lower=True, window=2)  # py2中text必须是utf8编码的str或者unicode对象，py3中必须是utf8编码的bytes或者str对象

negativedata = []
for item in negative.get_keywords(50, word_min_len=1):
    negativedata.append((item.word, item.weight))

Building prefix dict from the default dictionary ...
Loading model from cache /var/folders/9d/scnjgt611119939j59m_n7nm0000gn/T/jieba.cache
Loading model cost 0.794 seconds.
Prefix dict has been built successfully.


In [12]:
import pyecharts.options as opts
from pyecharts.charts import WordCloud

positiv_wordcloud = (
    WordCloud()
    .add(series_name="好评关键词", data_pair=positivedata, word_size_range=[16, 66])
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title="好评关键词", title_textstyle_opts=opts.TextStyleOpts(font_size=23)
        ),
        tooltip_opts=opts.TooltipOpts(is_show=True),
    )
)
positiv_wordcloud.render_notebook()

In [13]:
negativ_wordcloud = (
    WordCloud()
    .add(series_name="差评关键词", data_pair=negativedata, word_size_range=[16, 66])
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title="差评关键词", title_textstyle_opts=opts.TextStyleOpts(font_size=23)
        ),
        tooltip_opts=opts.TooltipOpts(is_show=True),
    )
)
negativ_wordcloud.render_notebook()

### 差评问题分类

In [14]:
category1 = round(86/269,2)
category2 = round(103/269,2)
category3 = round(3/269,2)
category4 = round(8/269,2)
category5 = round(51/269,2)
category6 = round(17/269,2)

pie = (
    Pie()
    .add("", [list(z) for z in zip(['服务器网络问题','功能bug','功能不全','办事效率','其他（无明确说明问题）','强制推广'], 
                            [category1,category2,category3,category4,category5,category6])])
    .set_global_opts(title_opts=opts.TitleOpts(title="差评问题分类"))
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
)

pie.render_notebook()

In [15]:
category1 = round(163/220,2)
category2 = round(3/220,2)
category3 = round(5/220,2)
category4 = round(46/220,2)
category5 = round(3/220,2)

pie = (
    Pie()
    .add("", [list(z) for z in zip(['方便快捷','服务质量高','功能多业务广','其他（无明确说明优点）','优惠券'], 
                            [category1,category2,category3,category4,category5])])
    .set_global_opts(title_opts=opts.TitleOpts(title="好评优点分类"))
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
)

pie.render_notebook()

## i深圳

In [16]:
flag = [1,2,3,4,5,6,7,8,9,10]
urllist = []
for i in flag:
    url = f"https://itunes.apple.com/rss/customerreviews/page={i}/id=1363830499/sortby=mostrecent/json?l=en&&cc=cn"
    urllist.append(url)
rating = [] #评分
title = [] #标题
content = [] #内容
for url in urllist:
    res = requests.get(url)
    data = res.json()['feed']['entry']
    for i in range(len(data)):
        rating.append(data[i]['im:rating']['label'])
        title.append(data[i]['title']['label'])
        content.append(data[i]['content']['label'])
data = {'rating':rating,
       'title':title,
       'content':content
       }
isz = DataFrame(data)

In [17]:
isz

Unnamed: 0,rating,title,content
0,3,更新后不可用,为什么更新后不可用了呀
1,5,公积金管理,方便，快捷，安全，高效。
2,5,办理居住证,优秀
3,5,便民，方便，省事快捷。,很好
4,5,暂住证,很好，方便
...,...,...,...
495,5,深圳人值得拥有,医社保功能很棒，社康中心绑定变更点几下就办好了，也可以在app里面预约挂号，另外公安局、交警...
496,5,公积金真是太方便了，到账还有通知,公积金联名卡原来可以直接绑定自己已有的银行卡，一直以为需要去银行网点办理，用i深圳App关联...
497,5,查询公积金非常方便,之前查询公积金还要登陆公积金网站，他们对mac系统支持不够好，导致用起来非常不方便，现在好了...
498,5,五星好评,新版本很好用，界面更加漂亮，服务更加齐全了，最关键的是可以根据我的使用习惯自定义常用服务，这...


In [18]:
isz["rating"].unique()

array(['3', '5', '1', '4', '2'], dtype=object)

In [19]:
star1 = isz[isz.rating == "1"]
star2 = isz[isz.rating == "2"]
star3 = isz[isz.rating == "3"]
star4 = isz[isz.rating == "4"]
star5 = isz[isz.rating == "5"]

positv_rating = len(star5)+len(star4)
negativ_rating = len(star1)+len(star2)
neutral_rating = len(star3)

pie = (
    Pie()
    .add("", [list(z) for z in zip(['好评（4分和5分）','差评（1分和2分）','中评（3分）'], 
                            [positv_rating,negativ_rating,neutral_rating])])
    .set_global_opts(title_opts=opts.TitleOpts(title="i深圳App评分分布"))
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
)

pie.render_notebook()

### 对i深圳好评进行分析

In [20]:
#提取好评关键词
positivetext = ''
for c in isz[isz.rating == "5"].content:
    positivetext += c
for c in isz[isz.rating == "4"].content:
    positivetext += c
positive = TextRank4Keyword()

positive.analyze(text=positivetext, lower=True, window=2)  # py2中text必须是utf8编码的str或者unicode对象，py3中必须是utf8编码的bytes或者str对象

positivedata = []
for item in positive.get_keywords(50, word_min_len=1):
    positivedata.append((item.word, item.weight))

In [21]:
positiv_wordcloud = (
    WordCloud()
    .add(series_name="好评关键词", data_pair=positivedata, word_size_range=[16, 66])
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title="好评关键词", title_textstyle_opts=opts.TextStyleOpts(font_size=23)
        ),
        tooltip_opts=opts.TooltipOpts(is_show=True),
    )
)
positiv_wordcloud.render_notebook()

In [22]:
category1 = round(205/312,2)
category2 = round(14/312,2)
category3 = round(30/312,2)
category4 = round(63/312,2)


pie = (
    Pie()
    .add("", [list(z) for z in zip(['方便快捷','服务质量高','功能多业务广','其他（无明确说明优点）'], 
                            [category1,category2,category3,category4,category5])])
    .set_global_opts(title_opts=opts.TitleOpts(title="好评优点分类"))
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
)

pie.render_notebook()

## 浙里办

In [23]:
flag = [1,2,3,4,5,6,7,8,9,10]
urllist = []
for i in flag:
    url = f"https://itunes.apple.com/rss/customerreviews/page={i}/id=910260096/sortby=mostrecent/json?l=en&&cc=cn"
    urllist.append(url)
rating = [] #评分
title = [] #标题
content = [] #内容
for url in urllist:
    res = requests.get(url)
    data = res.json()['feed']['entry']
    for i in range(len(data)):
        rating.append(data[i]['im:rating']['label'])
        title.append(data[i]['title']['label'])
        content.append(data[i]['content']['label'])
data = {'rating':rating,
       'title':title,
       'content':content
       }
zlb = DataFrame(data)

In [24]:
zlb

Unnamed: 0,rating,title,content
0,1,不用解释,劳民伤财
1,1,垃圾软件，羊水破了都只准用这个缴费,垃圾软件，单位为了完成任务，硬要求不准用医保卡只能用浙里办，结果我的死活一付款就闪退，工程师...
2,1,强制要求医院医护人员完成任务,真垃圾，还强制要求医护人员完成任务，不完成扣工资，做得好自然会有人用，整这些恶心的推广！
3,1,垃圾,垃圾至极，压榨医护人员，逼死了，跳楼了
4,1,恶心人的玩意,推广自己不会，让我们上班的人强制给你们推？算啥东西
...,...,...,...
495,1,社保查询太塞心,要查个社保和医保，社保能查出来，医保一直显示“所选参保地未查询到相关信息”，试了好多次都没有...
496,1,政府部门做的app 太难用了,各种问题问题，一言难尽
497,1,很不打不开用不了,始终在界面，进入不了程序。无数次点击，始终在界面。为什么？
498,3,实名以后没有姓名,实名以后没有姓名，导致我用这个账号登录别的网站没有名字，想修改没有修改的地方，app也登录不...


In [25]:
zlb["rating"].unique()

array(['1', '5', '4', '2', '3'], dtype=object)

In [26]:
star1 = zlb[zlb.rating == "1"]
star2 = zlb[zlb.rating == "2"]
star3 = zlb[zlb.rating == "3"]
star4 = zlb[zlb.rating == "4"]
star5 = zlb[zlb.rating == "5"]

positv_rating = len(star5)+len(star4)
negativ_rating = len(star1)+len(star2)
neutral_rating = len(star3)

pie = (
    Pie()
    .add("", [list(z) for z in zip(['好评（4分和5分）','差评（1分和2分）','中评（3分）'], 
                            [positv_rating,negativ_rating,neutral_rating])])
    .set_global_opts(title_opts=opts.TitleOpts(title="浙里办App评分分布"))
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
)

pie.render_notebook()

### 对浙里办差评进行分析

In [27]:
#提取差评关键词
negativetext = ''
for c in zlb[zlb.rating == "1"].content:
    negativetext += c
for c in zlb[zlb.rating == "2"].content:
    negativetext += c
negative = TextRank4Keyword()

negative.analyze(text=negativetext, lower=True, window=2)  # py2中text必须是utf8编码的str或者unicode对象，py3中必须是utf8编码的bytes或者str对象

negativedata = []
for item in negative.get_keywords(50, word_min_len=1):
    negativedata.append((item.word, item.weight))

In [28]:
negativ_wordcloud = (
    WordCloud()
    .add(series_name="差评关键词", data_pair=negativedata, word_size_range=[16, 66])
    .set_global_opts(
        title_opts=opts.TitleOpts(
            title="差评关键词", title_textstyle_opts=opts.TextStyleOpts(font_size=23)
        ),
        tooltip_opts=opts.TooltipOpts(is_show=True),
    )
)
negativ_wordcloud.render_notebook()

In [29]:
category1 = round(37/442,2)
category2 = round(10/442,2)
category3 = round(120/442,2)
category4 = round(77/442,2)
category5 = round(181/442,2)
category6 = round(15/442,2)


pie = (
    Pie()
    .add("", [list(z) for z in zip(['服务器网络卡顿','服务质量差','功能bug','其他（无明确说明问题）','强制推广','设计逻辑问题'], 
                            [category1,category2,category3,category4,category5])])
    .set_global_opts(title_opts=opts.TitleOpts(title="差评问题分类"))
    .set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
)

pie.render_notebook()