<a href="https://colab.research.google.com/github/boazcosta/Flor-de-Mandacaru/blob/master/Boaz_Sousa_Desafio_RNP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>



* Discente: Boaz de Sousa Costa
* Disciplina: Tópicos Avançados em Redes de Computadores
* Data Challenge CT-Mon/RNP






In [None]:
# Conexão do Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Bibliotecas e Pacotes que Utilizei
import os
import json
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.model_selection import train_test_split
from glob import glob
import csv
from sklearn.model_selection import train_test_split, cross_val_score

In [None]:
# Caminhos dos arquivos no meu Drive
test_path = '/content/drive/MyDrive/open-data-challenge-ct-mon-rnp/Test'
train_path = '/content/drive/MyDrive/open-data-challenge-ct-mon-rnp/Train'

In [None]:
# Clientes e Servidores
dash_paths = glob(os.path.join(train_path, 'dash', '*', '*'))
clientes_servidores = {'ba': 0, 'rj': 1, 'ce': 0, 'df': 1, 'es': 2, 'pi': 3}

In [None]:
# Função para processar arquivos DASH
def process_dash(path):
    dash_values = []
    files = glob(os.path.join(path, '*.jsonl'))
    for file_path in files:
        with open(file_path, 'r') as file:
            lines = file.readlines()
            for line in lines[:-1]:
                data = json.loads(line)
                dash_values.append({
                    'timestamp': pd.to_datetime(data['timestamp'], unit='s'),
                    'rate': data['rate']
                })
    return dash_values

In [None]:
# Função para agregar dados
def aggregate_dash(dash_data):
    dash_serie = pd.DataFrame(dash_data)
    dash_serie.set_index("timestamp", inplace=True)
    dash_serie = dash_serie.sort_index()
    dash_serie_5min = dash_serie.resample('5min').agg({'rate': ['mean', 'std']}).dropna()
    return dash_serie_5min

In [None]:
# Processar e consolidar dados de treino
X = []
y = []

for path in dash_paths:
    cliente = path.split('/')[-2]
    servidor = path.split('/')[-1]

    dash_data = process_dash(path)
    dash_serie_5min = aggregate_dash(dash_data)

    grouped = [dash_serie_5min.iloc[i:i+12] for i in range(0, len(dash_serie_5min), 12) if len(dash_serie_5min.iloc[i:i+12]) == 12]

    for group in grouped:
        X.append([
            clientes_servidores[cliente],
            clientes_servidores[servidor],
            group[('rate', 'mean')].iloc[0:10].mean(),
            group[('rate', 'std')].iloc[0:10].mean(),
            group[('rate', 'mean')].iloc[9],
            group[('rate', 'std')].iloc[9]
        ])
        y.append([
            group[('rate', 'mean')].iloc[10],  # 5min
            group[('rate', 'std')].iloc[10],   # 5min
            group[('rate', 'mean')].iloc[11],  # 10min
            group[('rate', 'std')].iloc[11]    # 10min
        ])

In [None]:
# Complemento da Função
X_np = np.array(X)
y_np = np.array(y)

In [None]:
# Divisão em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X_np, y_np, test_size=0.05, random_state=42)

In [None]:
# Ajustes de hiperparâmetros
rf_regressor = RandomForestRegressor(
    n_estimators=3,
    max_features=0.3,
    max_depth=15,
    min_samples_split=2,
    min_samples_leaf=162,
    random_state=67

)

In [None]:
# Validação cruzada para avaliar a generalização
for i, target in enumerate(["Mean Rate (5min)", "Std Rate (5min)", "Mean Rate (10min)", "Std Rate (10min)"]):
    scores = cross_val_score(rf_regressor, X_train, y_train[:, i], cv=5, scoring="neg_mean_absolute_percentage_error")
    print(f"Cross-validation MAPE for {target}: {-np.mean(scores):.10f}")

Cross-validation MAPE for Mean Rate (5min): 0.1176683202
Cross-validation MAPE for Std Rate (5min): 0.0902586340
Cross-validation MAPE for Mean Rate (10min): 0.3937239932
Cross-validation MAPE for Std Rate (10min): 0.2147305568


In [None]:
# Treinamento do modelo para os dados de teste (sem re-treinar com todo o conjunto)
for i, target in enumerate(["Mean Rate (5min)", "Std Rate (5min)", "Mean Rate (10min)", "Std Rate (10min)"]):
    rf_regressor.fit(X_train, y_train[:, i])  # Apenas com os dados de treino
    y_pred = rf_regressor.predict(X_test)  # Previsões nos dados de teste
    mape = mean_absolute_percentage_error(y_test[:, i], y_pred)
    print(f"MAPE {target}: {mape:.10f}")

MAPE Mean Rate (5min): 0.0785841175
MAPE Std Rate (5min): 0.0768569738
MAPE Mean Rate (10min): 0.0763323598
MAPE Std Rate (10min): 0.0760710438


In [None]:
# Processamento dos dados de teste
features = []
submission = []

Test_jsons = glob(os.path.join(test_path, "*.json"))
for test in Test_jsons:
    with open(test, 'r') as file:
        data = json.load(file)
        submission.append([test.split('/')[-1].split('.')[0]])

        rates_mean = []
        rates_std = []
        for dash in data['dash']:
            rates_mean.append(np.array(dash['rate']).mean())
            rates_std.append(np.array(dash['rate']).std())

        features.append([
            clientes_servidores[data['cliente']],
            clientes_servidores[data['servidor']],
            np.array(rates_mean).mean(),
            np.array(rates_mean).std(),
            rates_mean[-1],
            rates_std[-1]
        ])

features_np = np.array(features)

In [None]:
# Previsões para submissão
for i in range(4):
    rf_regressor.fit(X_train, y_train[:, i])  # Certifique-se de treinar com os dados de treino
    y_pred = rf_regressor.predict(features_np)  # Previsões com os dados de teste
    for j, y_pred_value in enumerate(y_pred):
        submission[j].append(y_pred_value)

In [None]:
# Geração do arquivo de submissão
header = ["id", "mean_1", "stdev_1", "mean_2", "stdev_2"]

with open("output_f11.csv", mode="w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(header)
    writer.writerows(submission)

print("Arquivo de submissão criado com sucesso!")

Arquivo de submissão criado com sucesso!
