In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn import tree
import requests
import io
import ipywidgets as widgets
output = widgets.Output()
from IPython.display import display, clear_output, Javascript
from random import randrange
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import warnings
import time
warnings.filterwarnings('ignore')

In [2]:
"""本資料共計風速5個欄位；風向4個欄位
逐十分鐘一筆
統計資料區間 = 2021/08/01 00:00:01 ~ 2021/10/01 00:00:00


欄位代碼解說：
．儀器類型+高度_N分鐘統計數據
．WS = Wind Speed（風速計，單位：m/s = 公尺/秒）
．WD = Wind Direction（風向計，單位：度）
．10mAVG = 10分鐘平均值

例如：WS95A_10mAVG ，即代表「95公尺高度風速計A的10分鐘統計資料」
※僅95公尺風速計有2支，所以最後會多帶有A、B之尾碼 """;

In [3]:
data = pd.read_csv('./wind_demo.txt')

In [4]:
feature_list = [
    'WS10_10mAVG', 
#     'WS30_10mAVG', 
#     'WS50_10mAVG',
#     'WS95A_10mAVG', 
#     'WS95B_10mAVG', 
    'WD10_10mAVG', 
#     'WD30_10mAVG',
#     'WD50_10mAVG', 
#     'WD95_10mAVG'
]

In [6]:
data[['WS95A_10mAVG', 'WS95B_10mAVG']].corr()

Unnamed: 0,WS95A_10mAVG,WS95B_10mAVG
WS95A_10mAVG,1.0,0.998621
WS95B_10mAVG,0.998621,1.0


In [None]:
## 測試期間
test_period = 30

## 移動窗格大小
moving_window_size = 360

for x_col in feature_list:
    for lag in range(moving_window_size):
        data[f"{x_col}_{lag}"] = data[x_col].shift(lag)

data =data.dropna()

train_data = data.iloc[:-test_period,:]
test_data = data.iloc[-test_period:,:]

In [None]:
x_column_list = []
for x_col in feature_list:
    for lag in range(1, moving_window_size):
        x_column_list.append(f"{x_col}_{lag}")
        
y_column = feature_list[0]

In [None]:
train_y = train_data[[y_column]]
train_x = train_data[x_column_list]

test_y  = test_data[[y_column]]
test_x  = test_data[x_column_list]

In [None]:
# import matplotlib.pylab as plt


# plt.figure(figsize=(40, 20))
# plt.plot(data[feature])
# plt.legend()
# plt.show()

In [None]:
tree_max_depth = 4
model = DecisionTreeRegressor(max_depth=tree_max_depth)

In [None]:
model.fit(
    train_x.values,
    train_y.values
)

In [None]:
test_prediction = model.predict(test_x.values)

In [None]:
display_data = test_y.copy()
display_data['pred'] = test_prediction
display_data.index = range(test_y.shape[0])

In [None]:
plt.plot(display_data)

In [None]:
top_k = 50
feature_importances = pd.DataFrame(
    model.feature_importances_, 
    index=train_x.columns, 
    columns=['value']
).sort_values('value', ascending=False)
feature_importances['name'] = feature_importances.index
fig = px.pie(feature_importances.head(top_k), values='value',  names='name', title=f'前{top_k}個重要的特徵')
fig.show()

In [None]:
from sklearn.metrics import mean_squared_error

In [None]:
df

In [None]:
import plotly.graph_objects as px
import numpy


# creating random data through randomint
# function of numpy.random
np.random.seed(42)

random_x = np.random.randint(1, 101, 100)
random_y = np.random.randint(1, 101, 100)

x = ['A', 'B', 'C', 'D']

plot = px.Figure(
    data=[go.Bar(
	name='Data 1',
	x=x,
	y=[100, 200, 500, 673]
),
	go.Bar(
	name='Data 2',
	x=x,
	y=[56, 123, 982, 213]
)
])


# Add dropdown
plot.update_layout(
	updatemenus=[
		dict(
			type="buttons",
			direction="left",
			buttons=list([
				dict(label="Both",
					method="update",
					args=[{"visible": [True, True]},
						{"title": "Both"}]),
				dict(label="Data 1",
					method="update",
					args=[{"visible": [True, False]},
						{"title": "Data 1",
							}]),
				dict(label="Data 2",
					method="update",
					args=[{"visible": [False, True]},
						{"title": "Data 2",
							}]),
			]),
		)
	])

plot.show()
