In [1]:
import pandas as pd
import numpy as np
import math
import random
from collections import defaultdict
from pyecharts.render import make_snapshot
from pyecharts.charts import Scatter,Grid,Line
from pyecharts.commons.utils import JsCode
from pyecharts import options as opts


In [20]:
def Dijkstra(G,v0,INF=999):
    """ 
        使用 Dijkstra 算法计算指定点 v0 到图 G 中任意点的最短路径的距离        
        INF 为设定的无限远距离值        
        此方法不能解决负权值边的图    
    """
    book = set()
    minv = v0   # 源顶点到其余各顶点的初始路程
    dis = dict((k,INF) for k in G.keys())
    dis[v0] = 0 
    while len(book)<len(G):
        book.add(minv)  # 确定当期顶点的距离
        for w in G[minv]:   # 以当前点的中心向外扩散
            if(G[minv][w]>0):
                dmn=1-math.log(G[minv][w])
                if dis[minv] + dmn < dis[w]:# 如果从当前点扩展到某一点的距离小与已知最短距离      
                    dis[w] = dis[minv] + dmn         # 对已知距离进行更新
        new =INF    # 从剩下的未确定点中选择最小距离点作为新的扩散点
        minv=-1
        for v in dis.keys():
            if v in book: continue
            if dis[v] < new: 
                new = dis[v]
                minv = v
        if minv==-1:
            return dis
    return dis

In [2]:
#画图函数
def scatter_visualmap_color(list2,data,corr_dis_time,province)->Scatter:
    c=(
        Scatter()
        .add_xaxis(list2)
        .add_yaxis("首例病例出现时间",data[[2,1]].values.tolist(),
                  label_opts=opts.LabelOpts(
                        formatter=JsCode(
                            "function(params){return params.value[2];}" #通过定义JavaScript回调函数自定义标签
                        )
                    ),
        )
        .set_series_opts(effect_opts=opts.EffectOpts(symbol_size=5, color="yellow"),)
        .set_global_opts(
            title_opts=opts.TitleOpts(title="首例病例出现时间与实际距离的关系",
                                      subtitle="相关系数="+str(corr_dis_time),
                                     pos_left='center'),
            legend_opts=opts.LegendOpts(pos_right=40,pos_top=15),
            xaxis_opts = opts.AxisOpts(
                          type_="value",#x轴数据类型是连续型的
                          min_=0,
                name='距'+province+'实际距离',
                name_location='middle'
                          ),
            yaxis_opts = opts.AxisOpts(
                          min_=5, 
                name='出现时间/天',
                          )
        )
    )
    return c
def grid_scatter_visualmap_color(list2,data,corr_dis_time,province)->Scatter:
    c=(
        Scatter()
        .add_xaxis(list2)
        .add_yaxis("首例病例出现时间",data[[2,1]].values.tolist(),
                   
                  label_opts=opts.LabelOpts(
                        formatter=JsCode(
                            "function(params){return params.value[2];}" #通过定义JavaScript回调函数自定义标签
                        )
                    ),
        )
        .set_series_opts(effect_opts=opts.EffectOpts(symbol_size=5, color="yellow"),)
        .set_global_opts(
            title_opts=opts.TitleOpts(title="首例病例出现时间与实际距离的关系",
                                      subtitle="相关系数="+str(corr_dis_time),
                                     pos_left='center', pos_top="48%"),
            legend_opts=opts.LegendOpts(pos_right=40,pos_top="48%"),
#             visualmap_opts=opts.VisualMapOpts(type_="size", max_=30, min_=0),
            xaxis_opts = opts.AxisOpts(
                          type_="value",#x轴数据类型是连续型的
                          min_=0,
                name='距'+province+'实际距离',
                name_location='middle'
                          ),
            yaxis_opts = opts.AxisOpts(
                          min_=0,      #y轴范围最小为700
                name='出现时间/天'
                          )
        )
    )
    return c
def fitting_line(x,y) -> Line:
    line=(
        Line()
        .add_xaxis(x)
        .add_yaxis("拟合线",y,linestyle_opts=opts.LineStyleOpts(color="green", width=4, type_="dashed"),
                  label_opts=opts.LabelOpts(is_show=False))
    )
    return line

<code>[(10, random.randint(1, 100)) for i in range(10)]</code>
*****
<font color='#adacad'>#随机生成序列，包含**十个**元素
#在极坐标系图中，data数据类型
##元组中第一项为*ρ\*，第二项为*角度\*
******
例如：<code>s=[(graphs.iloc[i][0],graphs.iloc[i][1]) for i in range(0,10)]</code>
s为graphs的前十行前两列</font>

In [None]:
# #测试时间线图，可用于建立疫情地图

# from pyecharts.faker import Faker
# from pyecharts import options as opts
# from pyecharts.charts import Bar, Page, Pie, Timeline


# def timeline_bar() -> Timeline:
#     x = Faker.choose()
#     tl = Timeline()
#     for i in range(2015, 2020):
#         bar = (
#             Bar()
#             .add_xaxis(x)
#             .add_yaxis("商家A", Faker.values())
#             .add_yaxis("商家B", Faker.values())
#             .set_global_opts(title_opts=opts.TitleOpts("某商店{}年营业额".format(i)))
#         )
#         tl.add(bar, "{}年".format(i))
#     return tl
# timeline_bar().render(path='test_timeline.html')

In [3]:
#各省首例病例出现时间，湖北省为第零天
first_day=pd.read_csv('data/流量数据/province.txt',sep='\s+',header=None,encoding='GB2312')
dmn=pd.read_csv('data/流量数据/province_dmn.txt',sep='\s+',header=None,encoding='GB2312')
gmn=pd.read_csv('data/流量数据/province_gmn.txt',sep='\s+',header=None,encoding='GB2312')

In [4]:
# # 湖北与其他省份的有效距离与实际距离。首例时间
# gmn_hb=pd.Series(gmn.iloc[:,0])
# dmn_hb=pd.Series(dmn.iloc[:,0])
# time=pd.Series(first_day[2])

# d_k,d_b= np.polyfit(dmn_hb, time, 1)
# g_k,g_b=np.polyfit(gmn_hb,time,1)

# #计算以有效距离,实际距离为回归变量的相对残差 
# R_d=0
# for i in range(0,len(dmn_hb)):
#     if(time[i]==0):continue
#     R_d+=((time[i]-dmn_hb[i]*d_k-d_b)/time[i])**2
# #     R_d+=abs(time[i]-dmn_hb[i]*d_k-d_b)/time[i]
# R_g=0
# for i in range(0,len(gmn_hb)):
#     if(time[i]==0):continue
#     R_g+=((time[i]-gmn_hb[i]*g_k-g_b)/time[i])**2
# #     R_g+=abs(time[i]-gmn_hb[i]*g_k-g_b)/time[i]
# R_d, R_g
# #(0.5074412202166079, 0.5102267883303312)
# # 相差不大，猜测是因为数据太少，且两省之间的距离实际上是两省会之间的距离，不够准确

In [21]:
# 湖北省和各省之间在武汉封城前的有效路径
# graphs2=pd.read_csv("data/流量数据/province-flux-before.txt",sep="\s+",header=None)
# pID=pd.read_csv('data/流量数据/provinceID.txt',sep='\s+',header=None,encoding='GB2312')

# d2= defaultdict(defaultdict) #图
# dmn = defaultdict(defaultdict) #有效路径

# for i in range(0,len(pID)):  #插入对角线元素
#     d2[i][i]=0
# for i in range(0,len(graphs2)):
#     d2[graphs2.iloc[i][0]][graphs2.iloc[i][1]]=graphs2.iloc[i][2]

# # 计算有效距离与首例病例出现时间的相关性    
# dis2=Dijkstra(d2,v0=0)
# list2=list(dis2.values())
# dis2=pd.Series(dis2)
# time=pd.Series(first_day[2])
# corr_dis_time=round(dis2.corr(time),5)
# z1 = np.polyfit(dis2, time, 1)  #一次多项式拟合，相当于线性拟合
# # p1 = np.poly1d(z1)
# line=fitting_line([0,10],[z1[1],z1[0]*10+z1[1]])
# # # 生成图片
# graph_wh=scatter_visualmap_color(list2,first_day,corr_dis_time,'湖北省')
# # graph_wh.render(path='test_map_7.html')
# graph_wh.overlap(line)
# graph_wh.render(path='test_map_7_fitting.html')

0.78638

In [None]:
#广东省和各省之间在武汉封城前的有效路径，并计算有效距离与首例病例出现时间的相关性,生成图片
# dis_gd=Dijkstra(d2,v0=3)
# list_gd=list(dis_gd.values())
# dis_gd=pd.Series(dis_gd)
# corr_dis_time_gd=round(dis_gd.corr(time),5)
# graph_gd=grid_scatter_visualmap_color(list_gd,first_day,corr_dis_time_gd,"广东省")
# graph_gd.render(path='test_gd.html')

In [None]:
#将两幅图组合在一起
# grid=(
#     Grid()
#     .add(graph_wh, grid_opts=opts.GridOpts(pos_bottom="60%"))
#     .add(graph_gd, grid_opts=opts.GridOpts(pos_top="60%"))
# )
# grid.render(path="test_wh_gd.html")

In [21]:
#各省之间在武汉封城前的有效路径，输出为txt文件
# for i in range(0,len(pID)):
#     dmn[i]=Dijkstra(d2,v0=i)
    
# dmn=pd.DataFrame(dmn)
# dmn.head()
# dmn.to_csv('data/流量数据/province_dmn.txt',sep='\t',index=False,header=None)

In [7]:
#查询每个省的经纬度并保存为文件province information.txt
# import requests
# #使用高德API-地理/逆地理编码:https://lbs.amap.com/api/webservice/guide/api/georegeo
# def geocodeG(address):
#     key = '07ac12a00f830764ebfdee2fd0bc96fd'
#     par = {'address': address, 'key': key}
#     base = 'http://restapi.amap.com/v3/geocode/geo'
#     response = requests.get(base, par)
#     answer = response.json()
#     return answer['geocodes'][0]['location']

# lng_lat=[[0 for j in range(0,3)]for i in range(0,len(pID))] #初始化

# for i in range(0,len(pID)):
#     address=pID.iloc[i][1]
#     lng_lat[i][0]=address
#     temp=geocodeG(address).split(',')
#     lng_lat[i][1]=temp[0]
#     lng_lat[i][2]=temp[1]

# lng_lat=pd.DataFrame(lng_lat)
# lng_lat.to_csv('data/流量数据/province information.txt',sep='\t',index=False,header=None)

#根据经纬度计算距离,并输出为province_gmn.txt

# from math import radians, cos, sin, asin, sqrt
# def geodistance(lng1,lat1,lng2,lat2):
#     lng1,lat1,lng2,lat2=map(radians,[float(lng1),float(lat1),float(lng2),float(lat2)])
#     dlon=lng2-lng1
#     dlat=lat2-lat1
#     a=sin(dlat/2)**2+cos(lat1)*cos(lat2)*sin(dlon/2)**2
#     distance=2*asin(sqrt(a))*6371*1000
#     distance=round(distance/1000,3)
#     return distance

# gmn=[[0 for i in range(0,len(pID))] for i in range(0,len(pID))] #实际地理距离初始化

# gmn=[[geodistance(lng_lat.iloc[i][1],lng_lat.iloc[i][2],lng_lat.iloc[j][1],lng_lat.iloc[j][2]) for j in range(0,len(pID))]for i in range(0,len(pID))]

# gmn=pd.DataFrame(gmn)

# gmn.to_csv('data/流量数据/province_gmn.txt',sep='\t',index=False,header=None)
# gmn.head()

In [5]:
#湖北省的首例病历出现时间&其与其他省的实际距离的关系图,相关系数
time=pd.Series(first_day[2])  #时间
gmn_hb=pd.Series(gmn.iloc[0])
corr_g_time_hb=round(gmn_hb.corr(time),5)
z1 = np.polyfit(gmn_hb, time, 1)  #一次多项式拟合，相当于线性拟合
line=fitting_line([0,3000],[z1[1],z1[0]*3000+z1[1]])
# 生成图片
graph_hb=scatter_visualmap_color(gmn_hb,first_day,corr_g_time_hb,'湖北省')
graph_hb.render(path='test_hb_g.html')
graph_hb.overlap(line)
graph_hb.render(path='test_hb_g_fitting.html')

'I:\\biyesheji\\test_hb_g_fitting.html'

In [None]:
# # 湖北省和各省之间在武汉封城后的有效路径
# graphs3=pd.read_csv("data/流量数据/province-flux-after.txt",sep="\s+",header=None)

# d3= defaultdict(defaultdict)

# for i in range(0,len(pID)):  #插入对角线元素
#     d3[i][i]=0
# for i in range(0,len(graphs3)):
#     d3[graphs3.iloc[i][0]][graphs3.iloc[i][1]]=graphs3.iloc[i][2]
# dis3=Dijkstra(d3,v0=5)
# dis3