In [None]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline

def use_svg_display():
    matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

def set_figsize(figsize=(3.5, 2.5)):
    use_svg_display()
    plt.rcParams['figure.figsize'] = figsize
# set_figsize()

def get_output_dir():
    script_dir = os.getcwd()
    output_dir = os.path.join(script_dir, "output")
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    return output_dir


In [None]:
factorl1list = ['经济发展度', '社会和谐度', '生活方便度', '环境舒适度']
citylist=['北京', '上海', '深圳', '重庆', '武汉', '西安', '郑州', '长春', '昆明']
# citylist=['北京', '上海', '深圳', '重庆', '武汉', '西安']
metriclist=['人均GDP', '住宅商品房价格与人均工资比', '每千人口拥有执业（助理）医师数', '城市绿化覆盖率']
skipfooterlist=[0, 0, 2, 2]

# i=3
# df = pd.ExcelFile(f'./data/{factorl1list[i]}1993-2022.xlsx').parse(sheet_name=metriclist[i], index_col=0, skiprows=3, skipfooter=skipfooterlist[i]).loc[citylist]
# df

In [None]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler

# 定义一个函数，将时间序列数据转化为监督学习问题
def to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg


plt.rc('font',family='SimHei')  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
# plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
# plt.rcParams["legend.title_fontsize"]='x-small'
set_figsize((10, 5))
cmap=plt.cm.get_cmap('hsv', len(citylist))

for i in range(0,4):
    df = pd.ExcelFile(f'./data/{factorl1list[i]}1993-2022.xlsx').parse(sheet_name=metriclist[i], index_col=0, skiprows=3, skipfooter=skipfooterlist[i]).loc[citylist]
    for cid, city in enumerate(citylist):
        # 获取数据
        data = df.loc[city, :].values[::-1]

        # 将数据归一化到 [0,1] 区间
        scaler = MinMaxScaler(feature_range=(0, 1))
        data = scaler.fit_transform(data.reshape(-1, 1))


        # 使用过去的 3 个观测值来预测下一个值
        n_in = 3
        n_out = 1
        data = to_supervised(data, n_in, n_out)

        # 分割输入数据和输出数据
        X, y = data.iloc[:, :n_in], data.iloc[:, n_in:]

        # 定义模型
        model = MLPRegressor(hidden_layer_sizes=(1000,), activation='tanh', max_iter=20000, random_state=2, tol=1e-9)

        # 拟合模型
        model.fit(X, y)

        # 预测未来的值
        predictions = model.predict(X)

        # 将预测值反归一化
        predictions = scaler.inverse_transform(predictions.reshape(-1, 1))

        # 打印出预测值
        # print(predictions)

        # 假设我们要预测的步数为 future_steps
        future_steps = 30
        predictions_future = []
        last_n_values = list(data.iloc[-n_in:, 3]) # 注意这里是取最后n_in行的第一列
        for _ in range(future_steps):
            X_pred = np.array(last_n_values).reshape(1, -1)
            y_pred = model.predict(X_pred)
            predictions_future.append(y_pred[0])
            
            # 将预测值添加到输入序列中
            last_n_values.append(y_pred[0])
            last_n_values = last_n_values[1:]
            
        # 将预测值反归一化
        predictions_future = scaler.inverse_transform(np.array(predictions_future).reshape(-1, 1))

        # 打印出预测值
        # print(predictions_future)

        # c=np.random.rand(3,)
        plt.plot(df.loc[city, :].values[::-1][3:], 'o-', linewidth=2, label=f'{city}', color=cmap(cid), alpha=0.5)
        plt.plot(np.vstack((predictions, predictions_future)), '^-', linewidth=2, label=f'{city}_预测', color=cmap(cid), alpha=0.5)
        
        # preds = np.vstack((predictions, predictions_future))
        # print(type(preds))

    plt.ylabel(metriclist[i])
    # plt.xticks(range(len(GDP.columns[1:][3:])), GDP.columns[1:][::-1][3:], rotation=45)
    plt.legend(loc=(1.02, 0.02), fontsize='medium')
    output_dir = get_output_dir()
    plt.savefig(os.path.join(output_dir, f'{metriclist[i]}_趋势预测.pdf'), bbox_inches='tight')
    # plt.show()
    plt.clf()


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

In [None]:
import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline
def use_svg_display():
    matplotlib_inline.backend_inline.set_matplotlib_formats('svg')
def set_figsize(figsize=(3.5, 2.5)):
    use_svg_display()
    plt.rcParams['figure.figsize'] = figsize
set_figsize()

In [None]:
factorl1list = ['经济发展度', '社会和谐度', '生活方便度', '环境舒适度']

In [None]:
# 经济发展度
sheets_jjfz = pd.ExcelFile(f'./data/{factorl1list[0]}1993-2022.xlsx').sheet_names
sheets_jjfz

In [None]:
# 社会和谐度
sheets_shhx = pd.ExcelFile(f'./data/{factorl1list[1]}1993-2022.xlsx').sheet_names
sheets_shhx

In [None]:
# 生活方便度
sheets_shfb = pd.ExcelFile(f'./data/{factorl1list[2]}1993-2022.xlsx').sheet_names
sheets_shfb

In [None]:
# 环境舒适度
sheets_hjss = pd.ExcelFile(f'./data/{factorl1list[3]}1993-2022.xlsx').sheet_names
sheets_hjss

In [None]:
# ExcelFile.parse(sheet_name=0, header=0, names=None, index_col=None, usecols=None, converters=None,
#                 true_values=None, false_values=None, skiprows=None, nrows=None, na_values=None,
#                 parse_dates=False, date_parser=_NoDefault.no_default, date_format=None, thousands=None,
#                 comment=None, skipfooter=0, dtype_backend=_NoDefault.no_default, **kwds)
# https://pandas.pydata.org/docs/reference/api/pandas.ExcelFile.parse.html

GDP = pd.ExcelFile('./data/经济发展度1993-2022.xlsx').parse(sheet_name='城市GDP', index_col=0, skiprows=3, skipfooter=2)
GDP


In [None]:
city='昆明'   #'北京' #'西安' #'上海' #'武汉' #'重庆' #'昆明'
metric='人均GDP'    # '城市GDP', '人均GDP', '第三产业增加值', '社会消费品零售总额', '年末户籍人口'

plt.rc('font',family='SimHei')  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
# plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
set_figsize((10, 5))
# plt.plot(GDP.iloc[0, :].values[::-1], 'o-', linewidth=2, color='r', label='GDP', alpha=0.5)
plt.plot(GDP.loc[city, :].values[::-1], 'o-', linewidth=2, color='r', label=city, alpha=0.5)
# GDP = GDP.iloc[0:1, :]
# print(GDP)
# plt.plot(GDP, 'o-', linewidth=2, alpha=0.5)
# plt.xlabel('xlabel')
plt.ylabel(metric)
print(GDP.columns[::-1])
print(len(GDP.columns))
# print(GDP.info())
# print(GDP.describe())
# plt.xticks(ticks=range(len(GDP.columns[0:])), labels=GDP.columns[0:][::-1], rotation=45)
plt.xticks(ticks=range(len(GDP.columns)), labels=GDP.columns[::-1], rotation=45)
plt.legend()
plt.show()


In [None]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler

# 获取数据
data = GDP.loc[city, :].values[::-1]

# 将数据归一化到 [0,1] 区间
scaler = MinMaxScaler(feature_range=(0, 1))
data = scaler.fit_transform(data.reshape(-1, 1))

# 定义一个函数，将时间序列数据转化为监督学习问题
def to_supervised(data, n_in=1, n_out=1, dropnan=True):
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

# 使用过去的 3 个观测值来预测下一个值
n_in = 3
n_out = 1
data = to_supervised(data, n_in, n_out)

# 分割输入数据和输出数据
X, y = data.iloc[:, :n_in], data.iloc[:, n_in:]

# 定义模型
model = MLPRegressor(hidden_layer_sizes=(1000,), activation='tanh', max_iter=20000, random_state=2, tol=1e-9)

# 拟合模型
model.fit(X, y)

# 预测未来的值
predictions = model.predict(X)

# 将预测值反归一化
predictions = scaler.inverse_transform(predictions.reshape(-1, 1))

# 打印出预测值
# print(predictions)

In [None]:
plt.rc('font',family='SimHei')  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
# plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
set_figsize((10, 5))
# plt.plot(GDP.iloc[0, 1:].values[::-1][3:], 'o-', linewidth=2, color='r', label='label', alpha=0.5)
plt.plot(GDP.loc[city, :].values[::-1][3:], 'o-', linewidth=2, color='r', label=f'{city}', alpha=0.5)
plt.plot(predictions, '^-', linewidth=2, color='b', label=f'{city}_预测', alpha=0.5)
# plt.xlabel('xlabel')
plt.ylabel('GDP')
plt.xticks(range(len(GDP.columns[3:])), GDP.columns[::-1][3:], rotation=45)
plt.legend()
plt.show()

In [None]:
# 假设我们要预测的步数为 future_steps
future_steps = 30
predictions_future = []
last_n_values = list(data.iloc[-n_in:, 3]) # 注意这里是取最后n_in行的第一列
for _ in range(future_steps):
    X_pred = np.array(last_n_values).reshape(1, -1)
    y_pred = model.predict(X_pred)
    predictions_future.append(y_pred[0])
    
    # 将预测值添加到输入序列中
    last_n_values.append(y_pred[0])
    last_n_values = last_n_values[1:]
    
# 将预测值反归一化
predictions_future = scaler.inverse_transform(np.array(predictions_future).reshape(-1, 1))

# 打印出预测值
print(predictions_future)

In [None]:
plt.rc('font',family='SimHei')  #用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False #用来正常显示负号
# plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
set_figsize((10, 5))
plt.plot(GDP.loc[city, :].values[::-1][3:], 'o-', linewidth=2, color='r', label=f'{city}', alpha=0.5)
plt.plot(np.vstack((predictions, predictions_future)), '^-', linewidth=2, color='b', label=f'{city}_预测', alpha=0.5)
# plt.xlabel('xlabel')
plt.ylabel('GDP')
# plt.xticks(range(len(GDP.columns[1:][3:])), GDP.columns[1:][::-1][3:], rotation=45)
plt.legend()
plt.show()