In [1]:
import pandas as pd
from pyecharts import options as opts
from pyecharts.charts import Tree,TreeMap,Sunburst
import json
import time

In [2]:
#IMDb数据集的数据在imdb.tsv中，请编写程序实现标题类型（字符串）和类型（字符串数组）-包括与标题相关的类型的TREE的可视化
#第一步  读取数据
stime = time.time()
data = pd.read_csv('imdb.tsv', sep='\t')
print("Time for reading: ", time.time()-stime, 's')

Time for reading:  0.24462461471557617 s


In [3]:
#第二步  数据格式整理
data = data[['tconst', 'titleType', 'primaryTitle', 'genres']]
data = data.dropna(subset=['titleType', 'primaryTitle', 'genres'])

stime = time.time()
#构建字典
data_tree = {}
data_treemap = {}
for _, row in data.iterrows():
    titleType = row['titleType']
    PrimaryTitle = row['primaryTitle']
    genres= set(row['genres'].split(','))
    genres_ForMap = {}.fromkeys(row['genres'].split(','), 1)
    if titleType not in data_tree:
        data_tree[titleType] = genres
    else:
        data_tree[titleType].update(set(genres))
    if titleType not in data_treemap:
        data_treemap[titleType] = genres_ForMap
    else:
        for k in set(genres_ForMap.keys())|set(data_treemap[titleType].keys()):
            data_treemap[titleType][k]=genres_ForMap.get(k, 0) + data_treemap[titleType].get(k, 0)
print("Time for category: ", time.time()-stime, 's')

Time for category:  19.050490856170654 s


In [4]:
#第三步  构建树结构数据
stime = time.time()
treeData = [{'name': 'All', 'children':[]}]
for tt,gen in data_tree.items():
    node = {'name' : tt, 'children': [{'name': genre} for genre in gen if genre != '\\N']}
    treeData[0]["children"].append(node)
    
treemapData = [{'name': 'All', 'children':[]}]
sunburstData = []
for tt, gen in data_treemap.items():
    node = {
        'name':tt,
        'children': [{'name':k, 'value':v} for k, v in gen.items() if k != '\\N']
    }
    treemapData[0]['children'].append(node)
    sunburstData.append(node)

    
print('Time for building tree: ', time.time()-stime, 's')

Time for building tree:  0.0019230842590332031 s


In [5]:
#第四步  创建树可视化函数（参数在字典格式中包含treedata,layout,orient,initialTreeDepth）
def buildTree(title, treeData, layout='orthogonal', orient='LR', initialTreeDepth=1):
    tree = (Tree(
                     init_opts=opts.InitOpts(height='850px')
                ).add('', treeData)
                 .set_global_opts(title_opts=opts.TitleOpts(title=title))
                 .set_series_opts(
                     layout=layout,
                     orient=orient,
                     collaps_interval=2,
                     collapsible=True,
                     initialTreeDepth=initialTreeDepth
                 )
           )
    
    return tree

In [6]:
#第五步  可视化树，请注意jupyter的可视化方式不一样
buildTree('IMDB-Tree', treeData).render_notebook()

In [7]:
#构建树图函数
def buildTreeMap(title, data):
    treemap = (
        TreeMap()
            .add(
                'IMDB', data,
                levels=[
                    opts.TreeMapLevelsOpts(
                        treemap_itemstyle_opts=opts.TreeMapItemStyleOpts(
                            border_color="#555", border_width=4, gap_width=4
                        )
                    ),
                    opts.TreeMapLevelsOpts(
                        color_saturation=[0.3, 0.6],
                        treemap_itemstyle_opts=opts.TreeMapItemStyleOpts(
                            border_color_saturation=0.7, gap_width=2, border_width=2
                        )
                    ),  
                    opts.TreeMapLevelsOpts(
                        color_saturation=[0.3, 0.5],
                        treemap_itemstyle_opts=opts.TreeMapItemStyleOpts(
                            border_color_saturation=0.6, gap_width=1
                        )
                    ),  
                    opts.TreeMapLevelsOpts(
                        color_saturation=[0.3, 0.5]
                    ),
                ]
            )
            .set_global_opts(title_opts=opts.TitleOpts(title=title))
        )
    
    return treemap

In [8]:
#第六步  可视化树图，请注意jupyter的可视化方式不一样
buildTreeMap('IMDB-Treemap', treemapData).render_notebook()

In [9]:
#构建旭日图函数
def buildSunburst(title, data):
    sunburst=(
        Sunburst(
            init_opts=opts.InitOpts(height='950px'),
        )
        .add(
            'IMDB', data, radius=[0, '65%'], center=['400px','450px']
        )
        .set_series_opts(
            label_opts=opts.LabelOpts(formatter="{b}:{c}", position="outside")
        )
        .set_global_opts(
            title_opts=opts.TitleOpts(title=title),
        )
    )
    
    return sunburst

In [10]:
#第七步  可视化旭日图，请注意jupyter的可视化方式不一样
buildSunburst('IMDB', sunburstData).render_notebook()