# 玩转微信：统计好友信息

### 登录网页版微信

In [None]:
import itchat
    
itchat.login()

### 获取好友信息

In [None]:
friends = itchat.get_friends(update=True)
friends[0].keys()

可以看到每个好友有以下有效信息：
+ NickName: 昵称；
+ Sex: 性别（0代表未知，1代表男，2代表女）;
+ Province: 省份;
+ City: 城市;
+ Signature: 签名。

提取这些信息做个列表：

In [None]:
import pandas as pd

data = list()
unknown, male, female = 0, 0, 0    
for friend in friends:
    if friend.get('Sex', '') == 0:
        gender = 'Unknown'
        unknown += 1
    elif friend.get('Sex', '') == 1:
        gender = 'Male'
        male += 1
    else:
        gender = 'Female'
        female +=1    
    loc = friend.get('Province', '?') + '-' + friend.get('City', '?')
    data.append([friend.get('NickName', ''), gender, loc, friend.get('Signature', '')])
    
wechat = pd.DataFrame(data=data, columns=['Name', 'Gender', 'Location', 'Signature'])

预览前三行：

In [None]:
wechat.head(3)

保存列表信息到Excel文件：

In [None]:
wechat.to_excel('微信好友列表.xlsx', index=False)

### 好友信息统计

In [None]:
#使用plotly进行结果可视化
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

init_notebook_mode(connected=True)

+ 性别结构

In [None]:
plotdata = [go.Pie(
    labels = ['Male', 'Female', 'Unknown'],
    values = [male, female, unknown],
    hoverinfo='label+percent',
    textinfo='label', 
    textfont=dict(size=20)
)]
iplot(plotdata, filename='gender_dist')

+ 地域分布

In [None]:
loc = wechat['Location'].values
provs, cities = [], []
for l in loc:
    prov, city = l.split('-')
    if prov != '':
        provs.append(prov)
        if prov in ('北京', '上海', '天津', '重庆'):
            cities.append(prov)
            continue
    if city != '': cities.append(city)

按频率从高到低排序：

In [None]:
from collections import Counter

city_counter = Counter(cities)
city_counter = sorted(city_counter.items(), key=lambda kv: kv[1], reverse=True)
cities_sorted = [c[0] for c in city_counter]
cities_count = [c[1] for c in city_counter]

绘制柱状图：

In [None]:
plotdata = [go.Bar(
    x=cities_sorted,
    y=cities_count
)]

iplot(plotdata, filename='city_dist')

+ 签名词云

词云图能够展示文段中词汇出现的频率，频率越高，对应词显示的尺寸越大

In [None]:
import wordcloud as wc
import pkuseg
import numpy as np
from PIL import Image

# 使用pkuseg分词
sigs = ''.join(str(wechat['Signature'].values))
seg = pkuseg.pkuseg()
sigs_cut = ' '.join(seg.cut(sigs))
stopwords = wc.STOPWORDS.copy()
# 去除微信表情码
stopwords.add('span')
stopwords.add('nan')
stopwords.add('emoji')
stopwords.add('class')
stopwords.add('emoji1f334')
stopwords.add('emoji1f388')
stopwords.add('emoji1f33a')
stopwords.add('emoji1f33c')
stopwords.add('emoji1f633')
stopwords.add('emoji1f')
bg_img = np.array(Image.open('2019.png').convert('RGB'))
bg_img[~np.any(bg_img, axis=2), :] = np.repeat(255, bg_img.shape[2])
wdcld = wc.WordCloud(width=1024, height=768, background_color='white', mask=bg_img, 
                  stopwords=stopwords, max_font_size=400, random_state=50)
wdcld.generate_from_text(sigs_cut)
image_colors = wc.ImageColorGenerator(bg_img)

In [None]:
%matplotlib notebook
import matplotlib.pyplot as plt

plt.imshow(wdcld.recolor(color_func=image_colors), interpolation='bilinear')
plt.axis('off')

### 获取订阅号列表

In [None]:
mps = itchat.get_mps(update=True)
mps_list = [mp['NickName'] for mp in mps]