# Implementazione di un algoritmo di previsione delle serie storiche

Importiamo le librerie necessarie per svolgere il progetto

In [59]:
import pandas as pd
import numpy as np

Acquisiamo i dati in input

In [60]:
df = pd.read_csv("https://raw.githubusercontent.com/iantomasinicola/17dicembre/refs/heads/main/Dati%20input/Forecast_ets_example.csv",
            skiprows = 2,
            sep = ";")

Visualizziamo le prime 5 righe

In [61]:
df.head(5)

Unnamed: 0,Date,Airport Passengers
0,2009-01-01,2644539
1,2009-02-01,2359800
2,2009-03-01,2925918
3,2009-04-01,3024973
4,2009-05-01,3177100


Modifico le impostazioni di visualizzazione dei dati su pandas

In [62]:
pd.options.display.float_format = '{:.0f}'.format

Creo le medie mobili centrate

In [63]:
df["Media_mobile_centrata"] = df["Airport Passengers"].rolling(window=12,center=True).mean()
df.head(12)

Unnamed: 0,Date,Airport Passengers,Media_mobile_centrata
0,2009-01-01,2644539,
1,2009-02-01,2359800,
2,2009-03-01,2925918,
3,2009-04-01,3024973,
4,2009-05-01,3177100,
5,2009-06-01,3419595,
6,2009-07-01,3649702,3111578.0
7,2009-08-01,3650668,3123322.0
8,2009-09-01,3191526,3136286.0
9,2009-10-01,3249428,3151289.0


Calcolo la componente di stagionalità e casualità

In [64]:
df["stagionalita_casualita"] = df["Airport Passengers"]-df["Media_mobile_centrata"]
df.head(12)

Unnamed: 0,Date,Airport Passengers,Media_mobile_centrata,stagionalita_casualita
0,2009-01-01,2644539,,
1,2009-02-01,2359800,,
2,2009-03-01,2925918,,
3,2009-04-01,3024973,,
4,2009-05-01,3177100,,
5,2009-06-01,3419595,,
6,2009-07-01,3649702,3111578.0,538124.0
7,2009-08-01,3650668,3123322.0,527346.0
8,2009-09-01,3191526,3136286.0,55240.0
9,2009-10-01,3249428,3151289.0,98139.0


Converto la colonna Data nel tipo date

In [65]:
df["Date"] = pd.to_datetime(df["Date"])

Creo una colonna che estrapola il mese della data

In [66]:
df["mese"]=df["Date"].dt.month
df.head(12)

Unnamed: 0,Date,Airport Passengers,Media_mobile_centrata,stagionalita_casualita,mese
0,2009-01-01,2644539,,,1
1,2009-02-01,2359800,,,2
2,2009-03-01,2925918,,,3
3,2009-04-01,3024973,,,4
4,2009-05-01,3177100,,,5
5,2009-06-01,3419595,,,6
6,2009-07-01,3649702,3111578.0,538124.0,7
7,2009-08-01,3650668,3123322.0,527346.0,8
8,2009-09-01,3191526,3136286.0,55240.0,9
9,2009-10-01,3249428,3151289.0,98139.0,10


Calcolo le stagionalità

In [67]:
df_stagionalita = df.groupby(by="mese",
           as_index=False, 
           dropna=False)["stagionalita_casualita"].mean().rename(columns={"stagionalita_casualita":"stagionalita"})
df_stagionalita

Unnamed: 0,mese,stagionalita
0,1,-433144
1,2,-692841
2,3,-154764
3,4,-115615
4,5,146648
5,6,378445
6,7,535543
7,8,539138
8,9,85406
9,10,135183


Combino i due dataframe

In [68]:
df = pd.merge(df, df_stagionalita, on = "mese")
df.head(12)

Unnamed: 0,Date,Airport Passengers,Media_mobile_centrata,stagionalita_casualita,mese,stagionalita
0,2009-01-01,2644539,,,1,-433144
1,2009-02-01,2359800,,,2,-692841
2,2009-03-01,2925918,,,3,-154764
3,2009-04-01,3024973,,,4,-115615
4,2009-05-01,3177100,,,5,146648
5,2009-06-01,3419595,,,6,378445
6,2009-07-01,3649702,3111578.0,538124.0,7,535543
7,2009-08-01,3650668,3123322.0,527346.0,8,539138
8,2009-09-01,3191526,3136286.0,55240.0,9,85406
9,2009-10-01,3249428,3151289.0,98139.0,10,135183


Calcoliamo i passeggeri destagionalizzati

In [69]:
df["passeggeri_destagionalizzati"] = df["Airport Passengers"]-df["stagionalita"]

Trasformo l'indice in una colonna

In [70]:
df = df.reset_index(names="Id")
df.head(12)

Unnamed: 0,Id,Date,Airport Passengers,Media_mobile_centrata,stagionalita_casualita,mese,stagionalita,passeggeri_destagionalizzati
0,0,2009-01-01,2644539,,,1,-433144,3077683
1,1,2009-02-01,2359800,,,2,-692841,3052641
2,2,2009-03-01,2925918,,,3,-154764,3080682
3,3,2009-04-01,3024973,,,4,-115615,3140588
4,4,2009-05-01,3177100,,,5,146648,3030452
5,5,2009-06-01,3419595,,,6,378445,3041150
6,6,2009-07-01,3649702,3111578.0,538124.0,7,535543,3114159
7,7,2009-08-01,3650668,3123322.0,527346.0,8,539138,3111530
8,8,2009-09-01,3191526,3136286.0,55240.0,9,85406,3106120
9,9,2009-10-01,3249428,3151289.0,98139.0,10,135183,3114245


Calcolo la retta di regressione

In [72]:
m, q = np.polyfit(df.query("passeggeri_destagionalizzati.notna()")["Id"], 
                  df.query("passeggeri_destagionalizzati.notna()")["passeggeri_destagionalizzati"], 
                  1)
print(m,q)

14292.583169939895 3022157.5068898285


Calcolo il trend

In [74]:
df["Trend"]= df["Id"]*m + q 
df.head(5)

Unnamed: 0,Id,Date,Airport Passengers,Media_mobile_centrata,stagionalita_casualita,mese,stagionalita,passeggeri_destagionalizzati,Trend
0,0,2009-01-01,2644539,,,1,-433144,3077683,3022158
1,1,2009-02-01,2359800,,,2,-692841,3052641,3036450
2,2,2009-03-01,2925918,,,3,-154764,3080682,3050743
3,3,2009-04-01,3024973,,,4,-115615,3140588,3065035
4,4,2009-05-01,3177100,,,5,146648,3030452,3079328


Calcolo delle previsioni

In [77]:
df["previsione"]=df["Trend"]+df["stagionalita"]
df.head(12)

Unnamed: 0,Id,Date,Airport Passengers,Media_mobile_centrata,stagionalita_casualita,mese,stagionalita,passeggeri_destagionalizzati,Trend,previsione
0,0,2009-01-01,2644539,,,1,-433144,3077683,3022158,2589013
1,1,2009-02-01,2359800,,,2,-692841,3052641,3036450,2343609
2,2,2009-03-01,2925918,,,3,-154764,3080682,3050743,2895979
3,3,2009-04-01,3024973,,,4,-115615,3140588,3065035,2949420
4,4,2009-05-01,3177100,,,5,146648,3030452,3079328,3225975
5,5,2009-06-01,3419595,,,6,378445,3041150,3093620,3472065
6,6,2009-07-01,3649702,3111578.0,538124.0,7,535543,3114159,3107913,3643456
7,7,2009-08-01,3650668,3123322.0,527346.0,8,539138,3111530,3122206,3661343
8,8,2009-09-01,3191526,3136286.0,55240.0,9,85406,3106120,3136498,3221904
9,9,2009-10-01,3249428,3151289.0,98139.0,10,135183,3114245,3150791,3285974


Visualizzo le previsioni

In [78]:
df.query("`Airport Passengers`.isna()")[["Date","previsione"]]

Unnamed: 0,Date,previsione
57,2013-10-01,3972018
58,2013-11-01,3662726
59,2013-12-01,3707564
60,2014-01-01,3446568
61,2014-02-01,3201164
62,2014-03-01,3753534
