In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio
"""
* pio接口设置画布风格模板(pio.templates.default="名称")
? 可用模板：
@ ggplot2,seaborn,simple_white,plotly,plotly_white,plotly_dark
@ presentation,xgridoff,ygridoff,gridon,none
"""

#! 定义函数增加一列表示年月的时间戳
def update_date(df):
    df.loc[df.年份==2018,"月份"]=12
    df.loc[df.年份==2022,"月份"]=1
def get_date(df):
    return str(df["年份"])+"-"+str(int(df["月份"]))+"-"+str(1)
#! 对极坐标图的指标进行分类
def make_class(x):
    y=0
    if x=="10万以下" or x=="10人以下" or x=="1980年以前":
        y=1
    elif x=="10万-100万" or x=="10-50人" or x=="1980-1990":
        y=2
    elif x=="100万-200万" or x=="50-100人" or x=="1990-2000":
        y=3
    elif x=="200万-500万" or x=="100-500人" or x=="2000-2010":
        y=4
    elif x=="500万-1000万" or x=="500-1000人" or x=="2010-2020":
        y=5
    elif x=="1000万-5000万" or x=="1000-10000人" or x=="2020以后":
        y=6
    elif x=="5000万以上" or x=="10000人以上":
        y=7
    return y
"""
? 2019-2021年每月数据统计
@ 2019年以前统一为2018-12-1
@ 2021年以后统一为2022-1-1
"""

'\n? 2019-2021年每月数据统计\n@ 2019年以前统一为2018-12-1\n@ 2021年以后统一为2022-1-1\n'

## 存量企业总数

In [2]:
pio.templates.default="ggplot2"

secondDataSheet1=pd.read_excel(
    "E:\\PythonProject\\Data\\2.xlsx",
    "Sheet1")
secondDataSheet1=secondDataSheet1[secondDataSheet1["辖区"]!="上海市"]

px.box(
    secondDataSheet1,
    x="辖区",
    color="辖区",
    y="企业数",
    log_y=True,
    # animation_frame=secondDataSheet1.年月.astype(str),
    color_discrete_sequence=px.colors.qualitative.Light24
).show()
px.bar(
    secondDataSheet1,
    x="辖区",
    y="企业数",
    log_y=True,
    color="辖区",
    barmode="group",
    animation_frame=secondDataSheet1.年月.astype(str),
    color_discrete_sequence=px.colors.qualitative.Light24
).show()

## 存量企业注册资本分布类型统计

In [3]:
pio.templates.default = "presentation"
secondDataSheet2=pd.read_excel(
    "E:\\PythonProject\\Data\\2.xlsx",
    "Sheet2")
secondDataSheet2=secondDataSheet2[secondDataSheet2["辖区"]!="上海市"]
secondDataSheet2["gradient"]=secondDataSheet2["注册资本分布"].apply(make_class)
secondDataSheet2=secondDataSheet2.sort_values(by='gradient',ascending=True)

px.bar_polar(
    secondDataSheet2,
    r="企业数",
    theta="辖区",
    color="gradient",
    color_discrete_sequence= px.colors.qualitative.Light24
)

## 存量企业参保人数存量统计

In [4]:
secondDataSheet3=pd.read_excel(
    "E:\\PythonProject\\Data\\2.xlsx",
    "Sheet3")
secondDataSheet3=secondDataSheet3[secondDataSheet3["辖区"]!="上海市"]
update_date(secondDataSheet3)
secondDataSheet3.loc[:, "年月"] =pd.to_datetime(secondDataSheet3.apply(get_date, axis=1))
secondDataSheet3["gradient"]=secondDataSheet3["参保人数分布"].apply(make_class)
secondDataSheet3=secondDataSheet3.sort_values(by='gradient',ascending=True)
px.bar_polar(
    secondDataSheet3,
    r="企业数",
    theta="辖区",
    color="gradient",
    color_discrete_sequence= px.colors.qualitative.Light24
)

## 存量企业创建时间存量统计

In [5]:
pio.templates.default="plotly_dark"

secondDataSheet5=pd.read_excel(
    "E:\\PythonProject\\Data\\2.xlsx",
    "Sheet5")
secondDataSheet5=secondDataSheet5[secondDataSheet5["辖区"]!="上海市"]
update_date(secondDataSheet5)
secondDataSheet5.loc[:, "年月"] =pd.to_datetime(secondDataSheet5.apply(get_date, axis=1))
secondDataSheet5["gradient"]=secondDataSheet5["创建时间"].apply(make_class)
secondDataSheet5=secondDataSheet5.sort_values(by='gradient',ascending=True)

px.bar_polar(
    secondDataSheet5,
    r="企业数",
    theta="辖区",
    color="gradient",
    color_discrete_sequence= px.colors.qualitative.Light24
)

## 存量企业类型存量统计&&行业类型存量统计

In [6]:
pio.templates.default="ggplot2"
secondDataSheet4=pd.read_excel(
    "E:\\PythonProject\\Data\\2.xlsx",
    "Sheet4")
secondDataSheet4=secondDataSheet4[secondDataSheet4["辖区"]!="上海市"]
update_date(secondDataSheet4)
secondDataSheet4.loc[:, "年月"] =pd.to_datetime(secondDataSheet4.apply(get_date, axis=1))
fig3=px.bar(
    secondDataSheet4,
    x="辖区",
    y="企业数",
    log_y=True,
    color="企业类型",
    barmode="group",
    animation_frame=secondDataSheet4.年月.astype(str),
    color_discrete_sequence=px.colors.qualitative.Light24
)
fig3.show()

In [7]:
pio.templates.default="plotly"
#! 存量企业行业类型存量统计
secondDataSheet6=pd.read_excel(
    "E:\\PythonProject\\Data\\2.xlsx",
    "Sheet6")
secondDataSheet6=secondDataSheet6[secondDataSheet6["辖区"]!="上海市"]
secondDataSheet6=secondDataSheet6[secondDataSheet6["企业数"]>1000]
update_date(secondDataSheet6)
secondDataSheet6.loc[:, "年月"] =pd.to_datetime(secondDataSheet6.apply(get_date, axis=1))

figN1=px.sunburst(
    secondDataSheet6,
    path=["辖区","国标行业"],
    values="企业数",
    title="各个辖区各个国标行业的企业数占比",
    )
figN1.update_traces(textinfo="label+percent entry").update_layout(autosize=False,height=800,width=1000).show()