In [1]:
# 导入pandas和numpy
import pandas as pd
import numpy as np
data = np.arange(1, 10)
index = np.arange(11, 20)
s = pd.Series(data, index=index)
s

11    1
12    2
13    3
14    4
15    5
16    6
17    7
18    8
19    9
dtype: int32

In [2]:
s = pd.Series(np.random.rand(5), index=['a', 'b', 'c', 'd', 'e'])
s

a    0.751280
b    0.160398
c    0.574098
d    0.593847
e    0.044903
dtype: float64

In [3]:
s = pd.Series(np.random.rand(5))
s

0    0.319889
1    0.887660
2    0.052491
3    0.385821
4    0.403192
dtype: float64

In [4]:
d = {'a': 1, 'b': 3, 'c': 5}
pd.Series(d)

a    1
b    3
c    5
dtype: int64

In [5]:
pd.Series(d, index=['a', 'b', 'c', 'd', 'e'])

a    1.0
b    3.0
c    5.0
d    NaN
e    NaN
dtype: float64

In [7]:
pd.Series([x for x in range(1, 5)], list('abcd'))

a    1
b    2
c    3
d    4
dtype: int64

In [8]:
pd.Series([x for x in range(1, 5)])

0    1
1    2
2    3
3    4
dtype: int64

In [9]:
s = pd.Series(np.random.rand(3), index=['a', 'b', 'c'])
print("s[0]:\n", s[0])
print("s['b']:\n", s['b'])

s[0]:
 0.6808780237012945
s['b']:
 0.8039394884981665


In [10]:
print("'a' in s: \n", 'a' in s)
print("'f' in s: \n", 'f' in s)
try:
    print("s['f']: \n", s['f'])
except KeyError:
    print(KeyError)
print("s.get('b'): \n", s.get('b'))
print("s.get('f'):\n", s.get('f'))
print("s.get('f', 'f不在索引列表中'):\n", s.get('f', 'f不在索引列表中'))

'a' in s: 
 True
'f' in s: 
 False
<class 'KeyError'>
s.get('b'): 
 0.8039394884981665
s.get('f'):
 None
s.get('f', 'f不在索引列表中'):
 f不在索引列表中


In [11]:
print("s[:3]:\n{}".format(s[:3]))
print("s[:'c']:\n{}".format(s[:'c']))
print("s[1:3]:\n{}".format(s[1:3]))
print("s[[1,2]]:\n{}".format(s[[1,2]]))

s[:3]:
a    0.680878
b    0.803939
c    0.543132
dtype: float64
s[:'c']:
a    0.680878
b    0.803939
c    0.543132
dtype: float64
s[1:3]:
b    0.803939
c    0.543132
dtype: float64
s[[1,2]]:
b    0.803939
c    0.543132
dtype: float64


In [12]:
# 生成从2018年9月1日开始，十天的时间序列索引，频率为10分钟
rng = pd.date_range('9/1/2018', periods=1440, freq='10Min')
rng[:5]

DatetimeIndex(['2018-09-01 00:00:00', '2018-09-01 00:10:00',
               '2018-09-01 00:20:00', '2018-09-01 00:30:00',
               '2018-09-01 00:40:00'],
              dtype='datetime64[ns]', freq='10T')

In [13]:
ts = pd.Series(np.random.rand(1440), index=rng)
ts.head()

2018-09-01 00:00:00    0.566621
2018-09-01 00:10:00    0.522399
2018-09-01 00:20:00    0.068799
2018-09-01 00:30:00    0.795320
2018-09-01 00:40:00    0.803441
Freq: 10T, dtype: float64

In [14]:
ts.count()

1440

In [15]:
ts[:5]

2018-09-01 00:00:00    0.566621
2018-09-01 00:10:00    0.522399
2018-09-01 00:20:00    0.068799
2018-09-01 00:30:00    0.795320
2018-09-01 00:40:00    0.803441
Freq: 10T, dtype: float64

In [16]:
ts[:10:2]

2018-09-01 00:00:00    0.566621
2018-09-01 00:20:00    0.068799
2018-09-01 00:40:00    0.803441
2018-09-01 01:00:00    0.399131
2018-09-01 01:20:00    0.796323
Freq: 20T, dtype: float64

In [17]:
# 时间字符串索引
ts['9/6/2018'][140:]

2018-09-06 23:20:00    0.401164
2018-09-06 23:30:00    0.081843
2018-09-06 23:40:00    0.415628
2018-09-06 23:50:00    0.269693
Freq: 10T, dtype: float64

In [18]:
# datetime类型的索引
from datetime import datetime
ts[datetime(2018, 9, 9):][::60]

2018-09-09 00:00:00    0.343626
2018-09-09 10:00:00    0.998729
2018-09-09 20:00:00    0.187074
2018-09-10 06:00:00    0.875246
2018-09-10 16:00:00    0.037678
Freq: 600T, dtype: float64

In [19]:
df1 = pd.DataFrame({'name': ['hellof', 'masonsxu', 'a', 'b', 'c']})
df2 = pd.DataFrame({'name': ['hellof', 'masonsxu', 'A', 'B', 'C']})
pd.merge(df1, df2, on='name')

Unnamed: 0,name
0,hellof
1,masonsxu


In [20]:
pd.merge(df1, df2, how='inner')

Unnamed: 0,name
0,hellof
1,masonsxu


In [21]:
pd.merge(df1, df2, how='left')

Unnamed: 0,name
0,hellof
1,masonsxu
2,a
3,b
4,c


In [22]:
pd.merge(df1, df2, how='right')

Unnamed: 0,name
0,hellof
1,masonsxu
2,A
3,B
4,C


In [23]:
pd.merge(df1, df2, how='outer')

Unnamed: 0,name
0,hellof
1,masonsxu
2,a
3,b
4,c
5,A
6,B
7,C


In [25]:
from pyecharts.globals import CurrentConfig, NotebookType
CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_LAB

import pyecharts.options as opts
from pyecharts.globals import ThemeType

In [26]:
from pyecharts.charts import Pie

vote_result = pd.read_csv('data/vote_result.csv')

pie = (
    Pie(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    .add('领域名称', [list(z) for z in zip(vote_result['Areas_of_interest'].tolist(), vote_result['Votes'].tolist())],
         radius=[None, 150], tooltip_opts=opts.TooltipOpts(textstyle_opts=opts.TextStyleOpts(align='center'),
                                                          formatter='{a}'+'<br/>'+'{b}: {c} ({d}%)'))
    .set_global_opts(title_opts=opts.TitleOpts(title="数据可视化-用户感兴趣领域",
                                               subtitle="以下是读者的投票结果。\n读者对金融、医疗保健、市场3个领域最感兴趣", 
                                               pos_left='center'),
                     legend_opts=opts.LegendOpts(orient="vertical", pos_left="2%")
                    )
    .set_series_opts(label_opts=opts.LabelOpts(formatter='{b}'))
)
pie.load_javascript()
pie.render_notebook()

In [27]:
pie = (
    Pie(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    .add('领域名称', [list(z) for z in zip(vote_result['Areas_of_interest'].tolist(), vote_result['Votes'].tolist())],
         radius=[100, 150], tooltip_opts=opts.TooltipOpts(textstyle_opts=opts.TextStyleOpts(align='center'),
                                                          formatter='{a}'+'<br/>'+'{b}: {c} ({d}%)'))
    .set_global_opts(title_opts=opts.TitleOpts(title="数据可视化-用户感兴趣领域",
                                               subtitle="以下是读者的投票结果。\n读者对金融、医疗保健、市场3个领域最感兴趣", 
                                               pos_left='center'),
                     legend_opts=opts.LegendOpts(orient="vertical", pos_left="2%")
                    )
    .set_series_opts(label_opts=opts.LabelOpts(formatter='{b}'))
)
pie.render_notebook()

In [29]:
from pyecharts.charts import Bar

pre_approval_rate = pd.read_csv('data/presidential_approval_rate.csv')

bar = (
    Bar(init_opts = opts.InitOpts(theme = ThemeType.DARK))
    .add_xaxis(pre_approval_rate['political_issue'].tolist())
    .add_yaxis("支持", pre_approval_rate['support'].tolist(), stack="1", category_gap="50%")
    .add_yaxis("反对", pre_approval_rate['oppose'].tolist(), stack="1", category_gap="50%")
    .add_yaxis("不发表意见", pre_approval_rate['no_opinion'].tolist(), stack="1", category_gap="50%")
    .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    .set_global_opts(title_opts=opts.TitleOpts(title = "柱状图数据堆叠示例"),
                    xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(rotate=30)))
)
bar.render_notebook()

In [30]:
list_support = ['支持', '反对', '不发表意见']

bar = Bar(init_opts = opts.InitOpts(theme = ThemeType.DARK))
bar.add_xaxis(list_support)
for i in range(pre_approval_rate.iloc[:,0].size):
    issue = pre_approval_rate.loc[i,'political_issue']
    bar.add_yaxis(issue, [int(x) for x in pre_approval_rate.loc[i,['support','oppose','no_opinion']]], 
                  stack="1", category_gap="50%")
bar.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
bar.set_global_opts(title_opts=opts.TitleOpts(title = "柱状图数据堆叠示例", pos_left='center'),
                    legend_opts=opts.LegendOpts(orient="vertical", pos_right="2%"))
bar.render_notebook()

In [31]:
import os
import json
import codecs
from pyecharts.charts import Tree
with codecs.open(os.path.join('data', 'GDP_data.json'), 'r', encoding='utf8') as f:
    j = json.load(f)
data = [j]
tree = (
    Tree(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    .add("", data)
    .set_global_opts(title_opts=opts.TitleOpts(title="树图"))
)
tree.render_notebook()

In [33]:
from pyecharts.charts import TreeMap
with open(os.path.join('data', 'GDP_data_1.json'), 'r', encoding='utf8') as f:
    data = json.load(f)
treemap = (
    TreeMap(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    .add("演示数据", data)
    .set_series_opts(label_opts=opts.LabelOpts(is_show=True, position='inside'))
    .set_global_opts(title_opts=opts.TitleOpts(title="矩形树图示例"))
)
treemap.render_notebook()

In [34]:
from pyecharts.charts import Line

year_population_age = pd.read_csv('data/us_population_by_age.csv')

line3 = (
    Line(init_opts=opts.InitOpts(theme=ThemeType.DARK))
    .add_xaxis(year_population_age['year'].tolist())
    .add_yaxis('5岁以下', year_population_age['year_under5'].tolist(), color='red',                
               stack='1')
    .add_yaxis('5岁至19岁', year_population_age['year5_19'].tolist(), color='blue', 
               stack='1')
    .add_yaxis('20至44岁', year_population_age['year20_44'].tolist(), color='green', 
               stack='1')
    .add_yaxis('45至64岁', year_population_age['year45_64'].tolist(), color='yellow', 
              stack='1')
    .add_yaxis('65岁以上', year_population_age['year65above'].tolist(), color='orange', 
               stack='1')
    .set_series_opts(areastyle_opts=opts.AreaStyleOpts(opacity=0.5),
                     label_opts=opts.LabelOpts(is_show=False),
                     markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_='max'),
                                                             opts.MarkPointItem(type_='min')],
                                                       symbol='pin'))
    .set_global_opts(title_opts=opts.TitleOpts(title='面积折线图'))
)
line3.render_notebook()