In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from keras.models import Sequential, Model
from keras.layers import Dense, LSTM, Input, Bidirectional, Concatenate
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from skimpy import *
import seaborn as sns
import matplotlib.pyplot as plt
import random
from sklearn.metrics import mean_absolute_error, mean_squared_error
from scipy.interpolate import CubicSpline
from tensorflow.keras.callbacks import EarlyStopping
from math import radians, sin, cos, sqrt, atan2

num_stations=3
data = pd.read_csv("meteofrance.csv", sep = ";")
ss = np.unique(data['NUM_POSTE'].values)
colonnes_a_garder = ['AAAAMMJJHH',  ' T', 'NUM_POSTE', 'LAT', 'LON']
df = data.loc[:,colonnes_a_garder].copy()  # Création d'une copie
df.loc[:, 'AAAAMMJJHH'] = pd.to_datetime(df['AAAAMMJJHH'], format='%Y%m%d%H')
df.rename(columns={
    'AAAAMMJJHH' : 'date',
    ' T': 'temperature'
}, inplace=True)
df1 = df.loc[df['NUM_POSTE'] == 20004002]

start_column = 7
max_hole=6
N=10
tab_hole=[1,2,3,4,5,6]
column='temperature'

def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0  # Rayon de la Terre en kilomètres
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat / 2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    distance = R * c
    return distance

main_station = df.loc[df['NUM_POSTE'] == 20004002].copy()
unique_stations = df.drop_duplicates(subset=['NUM_POSTE'])
lat1, lon1 = main_station['LAT'].values[0], main_station['LON'].values[0]
unique_stations['distance'] = unique_stations.apply(lambda row: haversine(lat1, lon1, row['LAT'], row['LON']), axis=1)
neighbor_stations = unique_stations[unique_stations['NUM_POSTE'] != 20004002].sort_values('distance').head(num_stations)
neighbor_stations = np.unique(neighbor_stations['NUM_POSTE'].values)
#print(neighbor_stations)
i=1
for station in neighbor_stations:
    station_data = df.loc[df['NUM_POSTE'] == station, ['date', 'temperature']].rename(columns={'temperature': f'temperature_station{i}'})
    i=i+1
    df1 = df1.merge(station_data, on='date', how='left')
    
combined_temps = np.concatenate([df1['temperature'].values] + [df1[f'temperature_station{j}'].values for j in range(1, i)])
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(combined_temps.reshape(-1, 1))

df1['temperature_scaled'] = scaler.transform(df1[['temperature']])
for j in range(1, i):
    df1[f'temperature_scaled{j}'] = scaler.transform(df1[[f'temperature_station{j}']])
rows_with_nan = df1[df1.isna().any(axis=1)].index.tolist()
rows_with_nan.insert(0, -1)
rows_with_nan.append(len(df1))
df1.interpolate(method='linear', inplace=True)
print(df1)

2024-12-10 09:32:20.111497: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


                     date  temperature  NUM_POSTE     LAT       LON  \
0     2023-01-01 00:00:00         11.0   20004002  41.918  8.792667   
1     2023-01-01 01:00:00         10.4   20004002  41.918  8.792667   
2     2023-01-01 02:00:00          9.9   20004002  41.918  8.792667   
3     2023-01-01 03:00:00          9.6   20004002  41.918  8.792667   
4     2023-01-01 04:00:00          9.7   20004002  41.918  8.792667   
...                   ...          ...        ...     ...       ...   
12311 2024-05-27 23:00:00         17.6   20004002  41.918  8.792667   
12312 2024-05-28 00:00:00         16.0   20004002  41.918  8.792667   
12313 2024-05-28 01:00:00         15.4   20004002  41.918  8.792667   
12314 2024-05-28 02:00:00         15.0   20004002  41.918  8.792667   
12315 2024-05-28 03:00:00         13.5   20004002  41.918  8.792667   

       temperature_station1  temperature_station2  temperature_station3  \
0                      14.6                  11.0                  11.9 

In [2]:
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True, verbose=1)
data = df1[['temperature']]  
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data)

def create_dataset(dataset, look_back=24):
    dataX, dataY = [], []
    for i in range(len(dataset) - look_back):
        a = dataset[i:(i + look_back), 0]  # Utilisation de la seule colonne
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])  # Utilisation de la même colonne pour y
    return np.array(dataX), np.array(dataY)


X, y = create_dataset(data_scaled, look_back=24)
print(y)
X = X.reshape((X.shape[0], X.shape[1], 1))
model = Sequential()
model.add(LSTM(50, input_shape=(X.shape[1], X.shape[2])))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mean_squared_error')
trainX, testX, trainY, testY = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(trainX, trainY, epochs=50, batch_size=64,callbacks=[early_stop], validation_data=(testX, testY), verbose=2)

[0.2757732  0.24226804 0.23969072 ... 0.42525773 0.41494845 0.37628866]
Epoch 1/50
154/154 - 3s - 19ms/step - loss: 0.0204 - val_loss: 0.0077
Epoch 2/50
154/154 - 1s - 8ms/step - loss: 0.0058 - val_loss: 0.0028
Epoch 3/50
154/154 - 1s - 8ms/step - loss: 0.0021 - val_loss: 0.0023
Epoch 4/50
154/154 - 1s - 8ms/step - loss: 0.0018 - val_loss: 0.0017
Epoch 5/50
154/154 - 1s - 8ms/step - loss: 0.0016 - val_loss: 0.0015
Epoch 6/50
154/154 - 1s - 8ms/step - loss: 0.0015 - val_loss: 0.0013
Epoch 7/50
154/154 - 1s - 8ms/step - loss: 0.0013 - val_loss: 0.0012
Epoch 8/50
154/154 - 1s - 8ms/step - loss: 0.0011 - val_loss: 9.4583e-04
Epoch 9/50
154/154 - 1s - 8ms/step - loss: 0.0011 - val_loss: 9.1496e-04
Epoch 10/50
154/154 - 1s - 8ms/step - loss: 9.5297e-04 - val_loss: 8.0290e-04
Epoch 11/50
154/154 - 1s - 8ms/step - loss: 8.4454e-04 - val_loss: 7.3696e-04
Epoch 12/50
154/154 - 1s - 8ms/step - loss: 7.9788e-04 - val_loss: 8.0862e-04
Epoch 13/50
154/154 - 1s - 8ms/step - loss: 7.8789e-04 - val_los

<keras.src.callbacks.history.History at 0x7f02200d21d0>

In [3]:
model.save("forecast_model.h5")

