In [1]:
# 读取腾讯新闻实时统计数据
import json
from datetime import datetime
import requests
import pandas as pd

cn_global_data = requests.get("https://view.inews.qq.com/g2/getOnsInfo?name=wuwei_ww_global_vars").json()
cn_global = json.loads(cn_global_data["data"])
cn_global_df = pd.DataFrame(cn_global)
cn_global_df.drop(columns=["recentTime", "useTotal", "hintWords"], inplace=True)
cn_global_df.rename(columns={
    "area": "province",
    "confirmCount": "confirmed",
    "suspectCount": "suspected",
    "cure": "cured",
    "deadCount": "dead"
}, inplace=True)
cn_global_df["date"] = datetime.today().strftime('%Y-%m-%d')
cn_global_df["country"] = "中国"
cn_global_df

Unnamed: 0,confirmed,suspected,dead,cured,date,country
0,2846,5796,81,58,2020-01-27,中国


In [2]:
# 读取腾讯新闻实时分地区数据
import json
from datetime import datetime
import requests
import pandas as pd

cn_area_data = requests.get("https://view.inews.qq.com/g2/getOnsInfo?name=wuwei_ww_area_counts").json()
cn_area = json.loads(cn_area_data["data"])
cn_area_df = pd.DataFrame(cn_area)
cn_area_df.rename(columns={
    "area": "province",
    "confirm": "confirmed",
    "suspect": "suspected",
    "heal": "cured"
}, inplace=True)
cn_area_df["date"] = datetime.today().strftime('%Y-%m-%d')
cn_area_df

Unnamed: 0,country,province,city,confirmed,suspected,dead,cured,date
0,中国,湖北,武汉,698,0,63,42,2020-01-27
1,中国,湖北,黄冈,154,0,4,2,2020-01-27
2,中国,湖北,孝感,100,0,1,0,2020-01-27
3,中国,湖北,荆门,90,0,3,0,2020-01-27
4,中国,湖北,恩施州,25,0,0,0,2020-01-27
...,...,...,...,...,...,...,...,...
296,中国,四川,宜宾,1,0,0,0,2020-01-27
297,中国,四川,凉山,1,0,0,0,2020-01-27
298,中国,陕西,渭南,1,0,0,0,2020-01-27
299,中国,山东,枣庄,2,0,0,0,2020-01-27


In [3]:
# 读取腾讯新闻日统计数据
import json
import requests
import pandas as pd

cn_day_data = requests.get("https://view.inews.qq.com/g2/getOnsInfo?name=wuwei_ww_cn_day_counts").json()
cn_day = json.loads(cn_day_data["data"])
cn_day_df = pd.DataFrame(cn_day)
cn_day_df.rename(columns={
    "confirm": "confirmed",
    "suspect": "suspected",
    "heal": "cured"
}, inplace=True)
cn_day_df["date"] = cn_day_df["date"].map(lambda x: "2020-" + x.replace(".", "-"))
cn_day_df["country"] = "中国"
cn_day_df

Unnamed: 0,date,confirmed,suspected,dead,cured,country
0,2020-01-21,440,37,9,25,中国
1,2020-01-13,41,0,1,0,中国
2,2020-01-14,41,0,1,0,中国
3,2020-01-15,41,0,2,5,中国
4,2020-01-16,45,0,2,8,中国
5,2020-01-17,62,0,2,12,中国
6,2020-01-18,198,0,3,17,中国
7,2020-01-19,275,0,4,18,中国
8,2020-01-24,1287,1965,41,38,中国
9,2020-01-23,830,1072,25,34,中国


In [6]:
# 更新数据
import re
import pandas as pd

csv_file = "Wuhan-2019-nCoV.csv"
json_file = "Wuhan-2019-nCoV.json"

df = pd.read_csv(csv_file)
df["date"] = df["date"].map(lambda x: "-".join([i.zfill(2) for i in re.split("\\D+", x)]))
df = pd.concat([df, cn_area_df, cn_global_df, cn_day_df], sort=False)
df["country"].fillna("", inplace=True)
df["countryCode"].fillna("", inplace=True)
df["province"].fillna("", inplace=True)
df["provinceCode"].fillna("", inplace=True)
df["city"].fillna("", inplace=True)
df["cityCode"].fillna("", inplace=True)
df["confirmed"] = df["confirmed"].fillna(0).astype(int)
df["suspected"] = df["suspected"].fillna(0).astype(int)
df["cured"] = df["cured"].fillna(0).astype(int)
df["dead"] = df["dead"].fillna(0).astype(int)
df.drop_duplicates(subset=["date", "country", "province", "city"], keep="last", inplace=True)
df.sort_values(["date", "country", "province", "city"], na_position="first", inplace=True)
df.to_csv(csv_file, index=False, encoding='utf-8')
df.to_json(json_file, orient="records", force_ascii=False)
df

Unnamed: 0,date,country,countryCode,province,provinceCode,city,cityCode,confirmed,suspected,cured,dead
1,2020-01-13,中国,,,,,,41,0,0,1
2,2020-01-14,中国,,,,,,41,0,0,1
3,2020-01-15,中国,,,,,,41,0,5,2
4,2020-01-16,中国,,,,,,45,0,8,2
5,2020-01-17,中国,,,,,,62,0,12,2
...,...,...,...,...,...,...,...,...,...,...,...
220,2020-01-27,澳大利亚,,,,,,5,0,0,0
216,2020-01-27,美国,,,,,,5,0,0,0
218,2020-01-27,越南,,,,,,2,0,0,0
215,2020-01-27,韩国,,,,,,4,0,0,0


In [41]:
import re
import math
from functools import lru_cache
import pandas as pd

csv_file = "Wuhan-2019-nCoV.csv"

country_code = pd.read_csv("CountryCode.csv")
china_area_code = pd.read_csv("ChinaAreaCode.csv")
china_area_code["code"] = china_area_code["code"].astype(str)
# china_area_code = china_area_code[china_area_code.apply(lambda x: bool(re.match("\\d{4}00$", x.code)), axis=1)]
china_area_code["is_province"] = china_area_code["code"].map(lambda x: bool(re.match("\\d{2}0000$", x)))
# china_area_code["city_code"] = china_area_code["code"].map(lambda x: re.sub("\\d{2}$", "00", x))
china_area_code["province_code"] = china_area_code["code"].map(lambda x: re.sub("\\d{4}$", "0000", x))

@lru_cache(maxsize = 128)
def get_country_code(name):
    result =  country_code.loc[country_code["name"].isin([name])]["code"]
    if (len(result.values) > 0):
        return result.values[0]
    return ""


@lru_cache(maxsize = 32)
def get_china_province_code(name):
    if not name:
        return ""
    result = china_area_code.loc[china_area_code["is_province"] & china_area_code["name"].str.contains(name)]["code"]
    if (len(result.values) > 0):
        return result.values[0]
    return ""


# @lru_cache(maxsize = 1024)
def get_china_city_code(province_code, name):
    if not name or not province_code:
        return ""
    result = china_area_code.loc[china_area_code["province_code"].isin([province_code]) & china_area_code["name"].str.contains(name)]["code"]
    if (len(result.values) > 0):
        return result.values[0]

    for i in range(1, len(name)):
        fuzzy_name = name[:-i] + ".*" + ".*".join(name[-i:])
        result = china_area_code.loc[china_area_code["province_code"].isin([province_code]) & china_area_code["name"].str.match(fuzzy_name)]["code"]
        if (len(result.values) > 0):
            print(f"""{province_code} {fuzzy_name} -> {",".join(result.values)}""")
            return result.values[0]
    
    # fuzzy_name = ".*".join(name)
    # result = china_area_code.loc[china_area_code["province_code"].isin([province_code]) & china_area_code["name"].str.match(fuzzy_name)]["code"]
    # print(province_code + " " + fuzzy_name)
    # if (len(result.values) > 0):
    #     print(result.values)
    #     return result.values[0]
    return ""


@lru_cache(maxsize = 1024)
def get_china_area_name(code, name):
    if not code:
        return name
    result = china_area_code.loc[china_area_code["code"].isin([code])]["name"]
    if (len(result.values) > 0):
        return result.values[0]
    return name

df = pd.read_csv(csv_file)
df["country"].fillna("", inplace=True)
df["countryCode"].fillna("", inplace=True)
df["province"].fillna("", inplace=True)
df["provinceCode"].fillna("", inplace=True)
df["city"].fillna("", inplace=True)
df["cityCode"].fillna("", inplace=True)
df["countryCode"] = df["country"].map(get_country_code)
df["provinceCode"] = df["province"].map(get_china_province_code)
df["province"] = df.apply(lambda x: get_china_area_name(x.provinceCode, x.province), axis = 1)
df["cityCode"] = df.apply(lambda x: get_china_city_code(x.provinceCode, x.city), axis = 1)
df["city"] = df.apply(lambda x: get_china_area_name(x.cityCode, x.city), axis = 1)
df.to_csv("temp.csv", index=False)
# df

530000 德宏.*州 -> 533100
530000 西双版纳.*州 -> 532800
650000 伊犁.*州 -> 654000
460000 陵水.*县 -> 469028
420000 恩施.*州 -> 422800
430000 湘西.*自.*治.*州 -> 433100
520000 黔南.*州 -> 522700
520000 黔西南.*州 -> 522300
500000 石柱.*县 -> 500240
500000 秀山.*县 -> 500241
530000 德宏.*州 -> 533100
530000 西双版纳.*州 -> 532800
650000 伊犁.*州 -> 654000
460000 琼中.*县 -> 469030
460000 陵水.*县 -> 469028
420000 恩施.*州 -> 422800
430000 湘西.*自.*治.*州 -> 433100
520000 黔南.*州 -> 522700
520000 黔西南.*州 -> 522300
500000 石柱.*县 -> 500240
500000 秀山.*县 -> 500241


In [16]:
# get_china_city_code("500000", "石柱县")

bool(re.match("石.*柱.*县", "石柱土家族自治县"))

True

In [33]:
import math
s = "黔南州"
n = math.ceil(len(s) / 2)
for i in range(1, n):
    print(s[:-i])

黔南
