In [2]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [2]:
%pip install statsmodels --upgrade

You should consider upgrading via the '/Users/enrico/Documents/Faculdade/TCC Local/Projeto/.venv/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
from methods.ar import ar, arima
from methods.naive import average_forecast, all_true, all_false
from methods.lstm import lstm
from methods.mlp import mlp
from methods.mlp import mlp_keras
from methods.cnn import cnn
from methods.classifiers import knn, gaussian, random_forest
from methods.runner import Runner
import functools


In [None]:
# ar -> n_steps = 2, 4, 8
# arima -> n_steps = 2, 4, 8; order = 2, 4, 8; differencing = 1, 2, 3
# KNN -> n_steps = 2, 4, 8; n_neighbours = 2, 4, 8; distances = 1, 2, 3, 4, 5; 
# MLP -> n_steps = 2, 4, 8; epochs = 50, 100, 200; layers = 1, 2, 3; cells = 50, 100, 200
# LSTM -> n_steps = 2, 4, 8; epochs = 50, 100, 200; layers = 1, 2, 3; cells = 50, 100, 200

models = {}
epochs = [50, 100, 200]
layers = [1, 2, 3]
cells = [50, 100, 200]
orders = [2, 4, 8]
differencing = [1, 2, 3]
n_neighbours = [2, 4, 8]
distances = [1, 2, 3, 4, 5]

for order in orders:
	for diff in differencing:
		models[f"ARIMA_{order}_{diff}"] = functools.partial(arima, order=order, differencing=diff)

for n in n_neighbours:
	for d in distances:
		models[f"KNN_{n}_{d}"] = functools.partial(knn, n_neighbors=n, distance=d)

for epoch in epochs:
	for layer in layers:
		for cell in cells:
			models[f"MLP_{epoch}_{layer}_{cell}"] = functools.partial(mlp_keras, epochs=epoch, layers=layer, cells=cell)

for epoch in epochs:
	for layer in layers:
		for cell in cells:
			models[f"LSTM_{epoch}_{layer}_{cell}"] = functools.partial(lstm, epochs=epoch, layers=layer, cells=cell)

In [18]:
models = {
	"Naive": average_forecast,
	"All True": all_true,
	"All False": all_false,
	# "AR": ar,
	# "ARIMA": arima,
	"MLP": mlp,
	"KNN": knn,
	# "Gaussian": gaussian,
	# "RandomForest": random_forest,
	# "MLP Keras": mlp_keras,
	"CNN": cnn,
	"LSTM": lstm,
	# "Stacked LSTM": stacked_lstm
}

In [20]:
models = {}
models["MLP_200_3_200"] = functools.partial(mlp_keras, epochs=10, layers=3, cells=200)
models["LSTM_100_1_100"] = functools.partial(lstm, cells=50, layers=2, epochs=10, verbose=0)

epoch = 10
filter = 32
kernel_size = 2
n = 8
d = 1

models[f"KNN_{n}_{d}"] = functools.partial(knn, n_neighbors=n, distance=d)
models[f"CNN_{epoch}_{filter}_{kernel_size}"] = functools.partial(cnn, epochs=epoch, kernel_size=kernel_size, filters=filter)
# models["LSTM_100_1_100"] = functools.partial(lstm, cells=50, layers=1, epochs=10, verbose=1)
# models["All True"] = all_true
# models["All False"] = all_false
# models["AVG_FORECAST"] = average_forecast

In [21]:
def callback(self):
	print(self)

runner = Runner(models, callback = callback)
# runner.run_all("accuracy", delta_separator = 0.056)
runner.run_all("accuracy", min_validation_size = 4)
# runner.run("accuracy")


# runner.run_transfer("accuracy")
# runner.continue_run_transfer("202205022349", "accuracy")
# runner.continue_run_transfer("202205011401", "accuracy")

{'mse': 0, 'accuracy': 0.6666666666666666, 'roc': 0.8, 'fmeasure': 0.7083333333333334, 'periods': 6}
{'mse': 0, 'accuracy': 0.5, 'roc': 0.7, 'fmeasure': 0.5428571428571429, 'periods': 6}
{'mse': 0, 'accuracy': 0.5, 'roc': 0.3, 'fmeasure': 0.5555555555555555, 'periods': 6}
{'mse': 0, 'accuracy': 1.0, 'roc': 1.0, 'fmeasure': 1.0, 'periods': 6}
Errors: 0


In [4]:
import pandas as pd
results = pd.read_csv('results/202205312027.csv')

results.head()

Unnamed: 0,y,y_pred,y_label,time,file,model
0,0,0,202102,0.839018,All.csv,LSTM_100_1_100
1,0,0,202103,0.839018,All.csv,LSTM_100_1_100
2,1,0,202104,0.839018,All.csv,LSTM_100_1_100
3,0,0,202008,0.839018,All.csv,LSTM_100_1_100
4,0,0,202009,0.839018,All.csv,LSTM_100_1_100


In [7]:
from methods.preprocess import Preprocess

df = results.copy()
preprocess = Preprocess()
models = list(df.model.unique())
accuracies = {}

for model in models:
	df_model = df[df["model"] == model]
	y = df_model["y"].tolist()
	y_pred = df_model["y_pred"].tolist()

	metrics = preprocess.evaluate(y, y_pred)
	accuracies[model] = metrics["fmeasure"]


print(sorted(accuracies.items(), key=lambda x: x[1], reverse=True))

{'LSTM_100_1_100': [0.6451612903225806, 0.5674761935023236, 0.5464821981256976]}


In [21]:
import os
import pandas as pd

folder = "data"
files = sorted(os.listdir(folder))

greater = None
lower = None
greater_country = None
lower_country = None


for file in files:
	df = pd.read_csv(os.path.join(folder, file), index_col='Period').sort_index()
	df = runner.get_df(df)

	if df.shape[0] < 10:
		continue

	local_greater = float(df.loc[df.value.idxmax()])
	local_lower = float(df.loc[df.value.idxmin()])

	if greater is None or local_greater > greater:
		greater = local_greater
		greater_country = file
	
	if lower is None or local_lower < lower:
		lower = local_lower
		lower_country = file


print(lower, lower_country)
print(greater, greater_country)



8794.0 Bermuda.csv
328170728965.0 United Rep. of Tanzania.csv


In [None]:
328,170,728,965.0