In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
import warnings
import itertools


In [2]:
# Data collected from https://www.cpa.unicamp.br/graficos
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
historical_average = [25.2, 25.3, 24.7, 22.3, 19.4, 18.2, 18.3, 20.4, 22.7, 24.1, 24.0, 24.9]
average_2017 = [26.00, 26.00, 23.00, 23.00, 20.00, 18.00, 17.00, 21.00, 23.00, 25.00, 22.00, 25.00]
average_2018 = [24.18, 25.39, 26.13, 22.30, 19.18, 21.40, 18.51, 19.88, 21.87, 23.62, 24.00, 24.00]
average_2019 = [26.58, 26.0, 24.86, 23.5, 20.68, 19.23, 17.41, 21.14, 24.5, 25.73, 24.9, 24.9]
average_2020_partial = [np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 26.12, 26.13, 23.44, 25.15]
average_2021 = [25.41, 25.3, 24.19, 21.34, 18.99, 20.07, 20.2, 22.78, 24.95, 23.43, 23.51, 23.46]
average_2022 = [25.27, 24.99, 24.84, 21.84, 17.48, 18.4, 18.96, 20.46, 22.04, 24.4, 22.0, 24.56]
average_2023 = [24.91, 24.96, 25.14, 21.51, 19.49, 18.34, 19.41, 22.82, 24.54, 25.27, 26.02, 26.18]
average_2024 = [25.84, 25.46, 26.12, 24.05, 22.0, 21.04, 20.08, 19.9, 25.87, 26.42, 24.17, 25.16]
average_2025_partial = [25.0, 26.08, 25.03, 23.04, 18.4, 16.05, 18.9, 19.1, np.nan, np.nan, np.nan, np.nan]

The data of average_2017 and average_2018 were inserted to support the imputation of missing temperature values in early 2020 and to generate a robust forecast for the final months of 2025.

In [3]:
# Ignoring warnings for cleaner output
warnings.filterwarnings("ignore")
# Combining all data into a single list
complete_temperatures = average_2019 + average_2020_partial + average_2021 + average_2022 + average_2023 + average_2024 + average_2025_partial
complete_dates = pd.date_range(start='2017-01', periods=len(complete_temperatures), freq='ME')
time_series = pd.Series(complete_temperatures, index=complete_dates)

# Parameter Optimization (Grid Search)
p = d = q = range(0, 2)
P = D = Q = range(0, 2)
s = 12

pdq = list(itertools.product(p, d, q))
seasonal_pdq = [(x[0], x[1], x[2], s) for x in list(itertools.product(P, D, Q))]

best_aic = float('inf')
best_parameters = None

for param in pdq:
    for param_seasonal in seasonal_pdq:
        try:
            sarima_model = SARIMAX(time_series,
                                    order=param,
                                    seasonal_order=param_seasonal,
                                    enforce_stationarity=False,
                                    enforce_invertibility=False)
            result = sarima_model.fit(disp=False)

            if result.aic < best_aic:
                best_aic = result.aic
                best_parameters = (param, param_seasonal)
        except Exception as e:
            continue

# Adicionando a verificação de erro aqui
if best_parameters is None:
    print("Erro: Nenhum conjunto de parâmetros SARIMA válido foi encontrado.")
else:
    print(f"Best SARIMA parameters found:: {best_parameters} with AIC of {best_aic:.2f}")

    # Training the model with the best parameters
    optimized_model = SARIMAX(time_series,
                               order=best_parameters[0],
                               seasonal_order=best_parameters[1],
                               enforce_stationarity=False,
                               enforce_invertibility=False)
    optimized_result = optimized_model.fit(disp=False)

Best SARIMA parameters found:: ((0, 1, 1), (1, 1, 1, 12)) with AIC of 189.51


### 2020 - Temperature data for the first 8 months was imputed using time series analysis.

In [4]:
    # Generating the new forecasts
    forecast_2020_df = optimized_result.get_prediction(start='2020-01-31', end='2020-08-31')
    forecast_2020 = forecast_2020_df.predicted_mean.round(2).tolist()

    average_2020 = forecast_2020 + average_2020_partial[8:]

    # Displaying t e new results
    print("\nAverage_2020 with new values imputed for the first 8 months of 2020:")
    print(average_2020)


Average_2020 with new values imputed for the first 8 months of 2020:
[24.52, 24.75, 23.75, 22.06, 19.42, 18.6, 17.72, 21.31, 26.12, 26.13, 23.44, 25.15]


### 2025 - Temperature data for the forecast for the last 4 months.

In [5]:
# Previsão para os últimos 4 meses de 2025
forecast_2025 = result.forecast(steps=4).round(2).tolist()
average_2025 = average_2025_partial[:8] + forecast_2025

print("\nAverage 2025 with new values forecasted for the last 4 months of 2025:")
print(average_2025)


Average 2025 with new values forecasted for the last 4 months of 2025:
[25.0, 26.08, 25.03, 23.04, 18.4, 16.05, 18.9, 19.1, 24.07, 24.18, 23.78, 21.3]


In [6]:
# Mapping the data for each year
temperature_data = {
    2019: average_2019,
    2020: average_2020,
    2021: average_2021,
    2022: average_2022,
    2023: average_2023,
    2024: average_2024,
    2025: average_2025,
}

# Criating DataFrame
df_temperatures = pd.DataFrame(temperature_data)

# Setting months as the row index
df_temperatures.index = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

# Displaying tables
df_temperatures.head(12)



Unnamed: 0,2019,2020,2021,2022,2023,2024,2025
Jan,26.58,24.52,25.41,25.27,24.91,25.84,25.0
Feb,26.0,24.75,25.3,24.99,24.96,25.46,26.08
Mar,24.86,23.75,24.19,24.84,25.14,26.12,25.03
Apr,23.5,22.06,21.34,21.84,21.51,24.05,23.04
May,20.68,19.42,18.99,17.48,19.49,22.0,18.4
Jun,19.23,18.6,20.07,18.4,18.34,21.04,16.05
Jul,17.41,17.72,20.2,18.96,19.41,20.08,18.9
Aug,21.14,21.31,22.78,20.46,22.82,19.9,19.1
Sep,24.5,26.12,24.95,22.04,24.54,25.87,24.07
Oct,25.73,26.13,23.43,24.4,25.27,26.42,24.18


In [7]:
# Dictionary with data from the years to facilitate access
years_data = temperature_data

# Generating individual graphs

for current_year in range(2020, 2026):
    last_year = current_year - 1
    
    plt.figure(figsize=(10, 6))
    
    # Plotting the previous year
    plt.plot(months, temperature_data[last_year], marker='o', label=f'Average {last_year}', color='black')
    plt.plot(historical_average, marker='o', label=f'Historical Average 1990-2024', color='red', alpha=0.4)
    
    # Plotting the current year, with treatment for the imputed/predicted data
    if current_year == 2020:
        #2020: Dashed line for imputation (Jan-Aug) and solid line for actual (Sep-Dec)
        plt.plot(months[:8], years_data[current_year][:8], marker='o', label=f'Average {current_year} (Imputed)', color='blue', linestyle=':')
        plt.plot(months[7:], years_data[current_year][7:], marker='o', color='blue', linestyle='-')
       
    elif current_year == 2025:
        #2025: Solid line for actual data (Jan-Aug) and dashed for forecast (Sep-Dec)
        plt.plot(months[:8], years_data[current_year][:8], marker='o', label=f'Average {current_year} (Forecast)', color='blue', linestyle='-')
        plt.plot(months[7:], years_data[current_year][7:], marker='o', color='blue', linestyle=':')
        
    else:
        # Other years: Solid line
        plt.plot(months, years_data[current_year], marker='o', label=f'Average {current_year}', color='blue')
       
                
    plt.title(f'Temperature Comparison: {current_year} vs. {last_year}')
    plt.xlabel('Month')
    plt.ylabel('Temperature (°C)')
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.ylim(0, 40)
    plt.legend(loc='upper center', bbox_to_anchor=(0.5, 1.15), ncol=3)
    plt.tight_layout(rect=[0, 0.05, 1, 0.95])

    # Save the graph to a file with a dynamic name
    file_name = f'comparison_temperature_{current_year}_vs_{last_year}.png'
    plt.savefig(file_name)
    print(f'Saved chart: {file_name}')

    plt.close() # Close the figure to avoid overloading memory


Saved chart: comparison_temperature_2020_vs_2019.png
Saved chart: comparison_temperature_2021_vs_2020.png
Saved chart: comparison_temperature_2022_vs_2021.png
Saved chart: comparison_temperature_2023_vs_2022.png
Saved chart: comparison_temperature_2024_vs_2023.png
Saved chart: comparison_temperature_2025_vs_2024.png
