# Addestramento Modello per Previsione Produzione Olio d'Oliva

Questo notebook utilizza le funzioni modularizzate per:
1. Caricare e preprocessare i dati meteorologici
2. Preparare i dati per il training
3. Configurare e addestrare il modello

In [1]:
#!apt-get update
#!apt-get install graphviz -y

!pip install tensorflow
!pip install numpy
!pip install pandas

!pip install keras
!pip install scikit-learn
!pip install matplotlib
!pip install joblib
!pip install pyarrow
!pip install fastparquet
!pip install scipy
!pip install seaborn
!pip install tqdm
!pip install pydot
!pip install tensorflow-io
!pip install pvlib

Collecting tqdm
  Downloading tqdm-4.67.0-py3-none-any.whl.metadata (57 kB)
Downloading tqdm-4.67.0-py3-none-any.whl (78 kB)
Installing collected packages: tqdm
Successfully installed tqdm-4.67.0
Collecting pydot
  Downloading pydot-3.0.2-py3-none-any.whl.metadata (10 kB)
Downloading pydot-3.0.2-py3-none-any.whl (35 kB)
Installing collected packages: pydot
Successfully installed pydot-3.0.2
Collecting tensorflow-io
  Downloading tensorflow_io-0.37.1-cp39-cp39-macosx_12_0_arm64.whl.metadata (14 kB)
Downloading tensorflow_io-0.37.1-cp39-cp39-macosx_12_0_arm64.whl (31.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.8/31.8 MB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m0m
[?25hInstalling collected packages: tensorflow-io
Successfully installed tensorflow-io-0.37.1
Collecting pvlib
  Downloading pvlib-0.11.1-py3-none-any.whl.metadata (2.8 kB)
Downloading pvlib-0.11.1-py3-none-any.whl (29.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import tensorflow as tf
import keras

print(f"Keras version: {keras.__version__}")
print(f"TensorFlow version: {tf.__version__}")
print(f"TensorFlow version: {tf.__version__}")
print(f"CUDA available: {tf.test.is_built_with_cuda()}")
print(f"GPU devices: {tf.config.list_physical_devices('GPU')}")

# GPU configuration
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

In [None]:
# Test semplice per verificare che la GPU funzioni
def test_gpu():
    print("TensorFlow version:", tf.__version__)
    print("\nDispositivi disponibili:")
    print(tf.config.list_physical_devices())

    # Creiamo e moltiplichiamo due tensori sulla GPU
    with tf.device('/GPU:0'):
        a = tf.random.normal([10000, 10000])
        b = tf.random.normal([10000, 10000])
        c = tf.matmul(a, b)

    print("\nShape del risultato:", c.shape)
    print("Device del tensore:", c.device)
    return "Test completato con successo!"


test_gpu()

In [None]:
# Imports necessari
from src.data.data_loader import load_weather_data, load_olive_varieties
from src.data.data_processor import prepare_solar_data, prepare_transformer_data
from src.features.weather_features import add_solar_features, add_environmental_features
from src.features.temporal_features import add_time_features
from src.models.training import train_transformer, setup_transformer_training
from src.utils.helpers import get_optimal_workers
from src.visualization.plots import plot_correlation_matrix
import pandas as pd
import os

## 1. Caricamento e Preparazione Dati

In [None]:
random_state_value = 42

base_dir = './kaggle'
input_dir = f'{base_dir}/input'
working_dir = f'{base_dir}/working'
working_data_dir = f'{working_dir}/data'
data_models_dir = f'{working_data_dir}/models'

os.makedirs(working_dir, exist_ok=True)
os.makedirs(working_data_dir, exist_ok=True)
os.makedirs(data_models_dir, exist_ok=True)

# Carica i dati meteorologici
weather_data = load_weather_data(
    f'{input_dir}/olive-oil/weather_data.parquet',
    start_year=2010
)

# Carica i dati delle varietà di olive
olive_varieties = load_olive_varieties(
    f'{input_dir}/olive-oil/variety_olive_oil_production.csv'
)

print(f"Shape dati meteo: {weather_data.shape}")
print(f"Shape dati olive: {olive_varieties.shape}")

## 2. Feature Engineering

In [None]:
# Aggiungi feature temporali
weather_data = add_time_features(weather_data)

# Aggiungi feature solari e ambientali
weather_data = add_solar_features(weather_data)
weather_data = add_environmental_features(weather_data)

# Definisci le feature da utilizzare
features = [
               'temp', 'tempmin', 'tempmax', 'humidity', 'cloudcover',
               'windspeed', 'pressure', 'visibility',
               'hour_sin', 'hour_cos', 'month_sin', 'month_cos',
               'day_of_year_sin', 'day_of_year_cos',
               'temp_humidity', 'temp_cloudcover', 'visibility_cloudcover',
               'clear_sky_factor', 'day_length',
               'temp_1h_lag', 'cloudcover_1h_lag', 'humidity_1h_lag',
               'temp_rolling_mean_6h', 'cloudcover_rolling_mean_6h'
           ] + [col for col in weather_data.columns if 'season_' in col or 'time_period_' in col]

print(f"Numero totale di feature: {len(features)}")

## 3. Analisi delle Correlazioni

In [None]:
# Analizza correlazioni tra feature
plot_correlation_matrix(
    weather_data[features + ['solarradiation', 'solarenergy', 'uvindex']],
    title='Correlazioni tra Feature Meteorologiche'
)

## 4. Preparazione Dati per il Training

In [None]:
# Prepara i dati per il modello
X_scaled, scaler_X, y_scaled, scaler_y, data_after_2010 = prepare_solar_data(
    weather_data,
    features
)

# Prepara i dati per il transformer
(train_data, train_targets), (val_data, val_targets), (test_data, test_targets), scalers = prepare_transformer_data(
    data_after_2010, olive_varieties)

print("\nShape dei dati:")
print(f"Training - Temporal: {train_data['temporal'].shape}, Static: {train_data['static'].shape}")
print(f"Validation - Temporal: {val_data['temporal'].shape}, Static: {val_data['static'].shape}")
print(f"Test - Temporal: {test_data['temporal'].shape}, Static: {test_data['static'].shape}")

## 5. Training del Modello

In [None]:
# Training del transformer
model, history = train_transformer(
    train_data=train_data,
    train_targets=train_targets,
    val_data=val_data,
    val_targets=val_targets,
    epochs=150,
    batch_size=64,
    save_name='weather_transformer'
)

## 6. Valutazione del Modello

In [None]:
from src.utils.metrics import calculate_real_error, evaluate_model_performance

# Calcola gli errori reali
percentage_errors, absolute_errors = calculate_real_error(
    model,
    test_data,
    test_targets,
    scaler_y,
    target_names=['solarradiation', 'solarenergy', 'uvindex']
)

# Valuta le performance del modello
metrics = evaluate_model_performance(
    model,
    test_data,
    test_targets,
    'test'
)

## 7. Visualizzazione dei Risultati

In [None]:
from src.visualization.plots import (
    plot_production_trends,
    plot_correlation_matrix
)

# Plot dei trend di produzione
predictions = model.predict(test_data)
predictions_real = scaler_y.inverse_transform(predictions)

# Crea DataFrame con predizioni
results_df = pd.DataFrame(
    predictions_real,
    columns=['solarradiation', 'solarenergy', 'uvindex']
)

# Plot delle correlazioni tra predizioni
plot_correlation_matrix(
    results_df,
    title='Correlazioni tra Predizioni'
)

# Plot dei trend temporali
plot_production_trends(results_df)