In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import PoissonRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
import geopandas as gpd
from shapely.geometry import Point
from shapely.ops import nearest_points

In [113]:
%load_ext kedro.ipython

The kedro.ipython extension is already loaded. To reload it, use:
  %reload_ext kedro.ipython


In [114]:
df_incidents = catalog.load('preprocessed_incidents')
df_incidents.head()

Unnamed: 0,FIRE_STATION_ID,CREATION_DATE,INCIDENT_CATEGORY,AREA_ID,INCIDENT_NBR,INCIDENT_TYPE,LATITUDE,LONGITUDE,UNITS,AREA,CREATION_TIME
0,18,2005-01-01,Sans incendie,21,1,Inondation,45.620274,-73.619956,1,Montréal-Nord,00:03:47
1,10,2005-01-01,Alarmes-incendies,18,2,Alarme privé ou locale,45.494087,-73.582587,7,Ville-Marie / Parc Jean-Drapeau / Centre-Sud,00:03:56
2,72,2005-01-01,Alarmes-incendies,13,3,Alarme privé ou locale,45.484406,-73.693038,6,St-Laurent,00:03:57
3,13,2005-01-01,Sans incendie,23,4,Inondation,45.541383,-73.545944,1,Mercier / Hochelaga-Maisonneuve,00:05:01
4,18,2005-01-01,Sans incendie,21,5,Inondation,45.611304,-73.63244,1,Montréal-Nord,00:06:20


In [115]:
# Select only the fire incidents
df_fires = df_incidents[(df_incidents['INCIDENT_CATEGORY'] == 'Autres incendies') | (df_incidents['INCIDENT_CATEGORY'] == 'Incendies de bâtiments')]
df_fires.head()

Unnamed: 0,FIRE_STATION_ID,CREATION_DATE,INCIDENT_CATEGORY,AREA_ID,INCIDENT_NBR,INCIDENT_TYPE,LATITUDE,LONGITUDE,UNITS,AREA,CREATION_TIME
14,22,2005-01-01,Autres incendies,20,15,Déchets en feu,45.585124,-73.566324,4,St-Léonard,00:27:42
90,47,2005-01-01,Autres incendies,22,92,Déchets en feu,45.534898,-73.594995,1,Rosemont / Petite-Patrie,05:21:26
127,43,2005-01-01,Autres incendies,21,129,Feu de cheminée *,45.570738,-73.650221,4,Ahuntsic / Cartierville,13:17:42
133,15,2005-01-01,Incendies de bâtiments,15,135,10-22 avec feu,45.474804,-73.561068,7,Sud-Ouest,14:24:03
137,41,2005-01-01,Incendies de bâtiments,20,139,10-22 avec feu,45.525359,-73.621536,8,Villeray / St-Michel / Parc Extension,14:31:47


In [87]:
# Define the grid size in degrees (approximately 500 meters at Montreal's latitude)
grid_size = 0.0045

# Compute grid coordinates for each incident
df_fires.loc[:,'grid_lat'] = np.floor(df_fires['LATITUDE'] / grid_size) * grid_size
df_fires.loc[:,'grid_long'] = np.floor(df_fires['LONGITUDE'] / grid_size) * grid_size

# Group data by grid coordinates to count the number of incidents per grid cell
grid_fire_counts = df_fires.groupby(['grid_lat', 'grid_long']).size().reset_index(name='count')

In [88]:
# Prepare the data
X = grid_fire_counts[['grid_lat', 'grid_long']]
y = grid_fire_counts['count']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Poisson regression model
poisson_model = PoissonRegressor()
poisson_model.fit(X_train, y_train)

# Predict on the testing set
y_pred = poisson_model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
rmse = (mean_squared_error(y_test, y_pred))**0.5

print("Mean Absolute Error (MAE):", mae)
print("Root Mean Squared Error (RMSE):", rmse)

Mean Absolute Error (MAE): 25.238569997115313
Root Mean Squared Error (RMSE): 37.03099590954711


In [89]:
# Convert to datetime and handle errors
df_fires['CREATION_DATE'] = pd.to_datetime(df_fires['CREATION_DATE'], errors='coerce')

# Verify the conversion
print(df_fires['CREATION_DATE'].dtype)

datetime64[ns]


In [90]:
# Load and prepare data
df_fires.loc[:, 'CREATION_DATE'] = pd.to_datetime(df_fires['CREATION_DATE'])
df_fires.loc[:, 'day_of_week'] = df_fires['CREATION_DATE'].dt.dayofweek
df_fires.loc[:, 'month'] = df_fires['CREATION_DATE'].dt.month

# Group data by grid and temporal features
group_columns = ['grid_lat', 'grid_long', 'day_of_week', 'month']
grid_fire_counts = df_fires.groupby(group_columns).size().reset_index(name='count')

# Prepare the data
X = grid_fire_counts.drop(columns=['count'])
y = grid_fire_counts['count']

# Define categorical features for one-hot encoding
categorical_features = ['day_of_week', 'month']
column_transformer = ColumnTransformer([
    ('cat', OneHotEncoder(), categorical_features)
], remainder='passthrough')

# Create a pipeline with transformation and regression model
pipeline = Pipeline(steps=[
    ('preprocessor', column_transformer),
    ('regressor', PoissonRegressor())
])

In [91]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
pipeline.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rmse = (mean_squared_error(y_test, y_pred))**0.5

print("Enhanced Model - Mean Absolute Error (MAE):", mae)
print("Enhanced Model - Root Mean Squared Error (RMSE):", rmse)

Enhanced Model - Mean Absolute Error (MAE): 0.6816190644165078
Enhanced Model - Root Mean Squared Error (RMSE): 0.9241021771086967


In [45]:
# Load and prepare data (assuming it's already loaded as 'fire_data')
df_fires.loc[:, 'CREATION_DATE'] = pd.to_datetime(df_fires['CREATION_DATE'])
df_fires.loc[:, 'day_of_week'] = df_fires['CREATION_DATE'].dt.dayofweek
df_fires.loc[:, 'month'] = df_fires['CREATION_DATE'].dt.month

# Group data by grid and temporal features
group_columns = ['grid_lat', 'grid_long', 'day_of_week', 'month']
grid_fire_counts = df_fires.groupby(group_columns).size().reset_index(name='count')

# Adding synthetic features for demonstration
grid_fire_counts['simulated_urban_density'] = np.random.rand(len(grid_fire_counts))  # Simulated urban density
grid_fire_counts['simulated_weather_index'] = np.random.rand(len(grid_fire_counts))  # Simulated weather condition

# Prepare the data
X = grid_fire_counts.drop(columns=['count'])
y = grid_fire_counts['count']

# Define categorical features for one-hot encoding
categorical_features = ['day_of_week', 'month']
column_transformer = ColumnTransformer([
    ('cat', OneHotEncoder(), categorical_features)
], remainder='passthrough')

# Create a pipeline with transformation and Random Forest model
pipeline_rf = Pipeline(steps=[
    ('preprocessor', column_transformer),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])


In [46]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
pipeline_rf.fit(X_train, y_train)

# Predict and evaluate
y_pred_rf = pipeline_rf.predict(X_test)
mae_rf = mean_absolute_error(y_test, y_pred_rf)
rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf))

print("Random Forest Model - Mean Absolute Error (MAE):", mae_rf)
print("Random Forest Model - Root Mean Squared Error (RMSE):", rmse_rf)

Random Forest Model - Mean Absolute Error (MAE): 0.5975838758137206
Random Forest Model - Root Mean Squared Error (RMSE): 0.8512077342872101


In [116]:
# Generate synthetic weather data
periods = 7000

weather_data = pd.DataFrame({
    'date': pd.date_range(start='2005-01-01', periods=periods, freq='D'),
    'max_temp': np.random.uniform(-10, 30, periods),
    'min_temp': np.random.uniform(-15, 25, periods),
    'precipitation': np.random.uniform(0, 20, periods),
    'wind_speed': np.random.uniform(0, 15, periods)
})

# Generate synthetic fire station data
fire_stations = pd.DataFrame({
    'LATITUDE': np.random.uniform(45.4, 45.7, 10),
    'LONGITUDE': np.random.uniform(-73.9, -73.5, 10)
})

# Generate synthetic land use data
land_use = pd.DataFrame({
    'LATITUDE': np.random.uniform(45.4, 45.7, 50000),
    'LONGITUDE': np.random.uniform(-73.9, -73.5, 50000),
    'land_use_type': np.random.choice(['residential', 'commercial', 'industrial', 'park'], 50000)
})

# Convert to GeoDataFrames
df_fires.loc[:,'geometry'] = [Point(xy) for xy in zip(df_fires.LONGITUDE, df_fires.LATITUDE)]
fire_data_gdf = gpd.GeoDataFrame(df_fires, geometry='geometry')

fire_stations['geometry'] = [Point(xy) for xy in zip(fire_stations.LONGITUDE, fire_stations.LATITUDE)]
fire_stations_gdf = gpd.GeoDataFrame(fire_stations, geometry='geometry')

land_use['geometry'] = [Point(xy) for xy in zip(land_use.LONGITUDE, land_use.LATITUDE)]
land_use_gdf = gpd.GeoDataFrame(land_use, geometry='geometry')

In [117]:
# Define the grid size in degrees (approximately 500 meters at Montreal's latitude)
grid_size = 0.0045

# Ensure datetime conversion and clean data
df_fires['CREATION_DATE'] = pd.to_datetime(df_fires['CREATION_DATE'], errors='coerce')
df_fires = df_fires.dropna(subset=['CREATION_DATE'])
df_fires['date'] = df_fires['CREATION_DATE'].dt.date
weather_data['date'] = pd.to_datetime(weather_data['date']).dt.date

# Merge fire data with weather data
df_fires = pd.merge(df_fires, weather_data, on='date', how='left')

# Calculate proximity to nearest fire station
fire_data_gdf = gpd.GeoDataFrame(df_fires, geometry=gpd.points_from_xy(df_fires.LONGITUDE, df_fires.LATITUDE))
fire_stations_gdf = gpd.GeoDataFrame(fire_stations, geometry=gpd.points_from_xy(fire_stations.LONGITUDE, fire_stations.LATITUDE))
nearest_station = fire_data_gdf.geometry.apply(lambda x: nearest_points(x, fire_stations_gdf.unary_union)[1])
fire_data_gdf['distance_to_station'] = fire_data_gdf.geometry.distance(nearest_station)

# Include land use data
land_use_gdf = gpd.GeoDataFrame(land_use, geometry=gpd.points_from_xy(land_use.LONGITUDE, land_use.LATITUDE))
fire_data_gdf = gpd.sjoin(fire_data_gdf, land_use_gdf[['geometry', 'land_use_type']], how='left', op='intersects')

# Aggregate fire incident counts by grid and temporal features
fire_data_gdf['grid_lat'] = np.floor(fire_data_gdf['LATITUDE'] / grid_size) * grid_size
fire_data_gdf['grid_long'] = np.floor(fire_data_gdf['LONGITUDE'] / grid_size) * grid_size
fire_data_gdf['day_of_week'] = fire_data_gdf['CREATION_DATE'].dt.dayofweek
fire_data_gdf['month'] = fire_data_gdf['CREATION_DATE'].dt.month

group_columns = ['grid_lat', 'grid_long', 'day_of_week', 'month']
grid_fire_counts = fire_data_gdf.groupby(group_columns).size().reset_index(name='count')

# Merge additional features
enhanced_features = fire_data_gdf[['grid_lat', 'grid_long', 'day_of_week', 'month',
                                   'max_temp', 'precipitation', 'distance_to_station', 'land_use_type']].drop_duplicates()
enhanced_fire_data = pd.merge(grid_fire_counts, enhanced_features, on=['grid_lat', 'grid_long', 'day_of_week', 'month'], how='left')

# Aggregate historical fire frequency by grid cell
#historical_frequency = fire_data_gdf.groupby(['grid_lat', 'grid_long']).size().reset_index(name='historical_frequency')
#enhanced_fire_data = pd.merge(enhanced_fire_data, historical_frequency, on=['grid_lat', 'grid_long'], how='left')

In [118]:
fire_data_gdf.head()

Unnamed: 0,FIRE_STATION_ID,CREATION_DATE,INCIDENT_CATEGORY,AREA_ID,INCIDENT_NBR,INCIDENT_TYPE,LATITUDE,LONGITUDE,UNITS,AREA,...,min_temp,precipitation,wind_speed,distance_to_station,index_right,land_use_type,grid_lat,grid_long,day_of_week,month
0,22,2005-01-01,Autres incendies,20,15,Déchets en feu,45.585124,-73.566324,4,St-Léonard,...,8.834197,10.557957,0.487509,0.047241,,,45.585,-73.5705,5,1
1,47,2005-01-01,Autres incendies,22,92,Déchets en feu,45.534898,-73.594995,1,Rosemont / Petite-Patrie,...,8.834197,10.557957,0.487509,0.046224,,,45.531,-73.5975,5,1
2,43,2005-01-01,Autres incendies,21,129,Feu de cheminée *,45.570738,-73.650221,4,Ahuntsic / Cartierville,...,8.834197,10.557957,0.487509,0.063394,,,45.567,-73.6515,5,1
3,15,2005-01-01,Incendies de bâtiments,15,135,10-22 avec feu,45.474804,-73.561068,7,Sud-Ouest,...,8.834197,10.557957,0.487509,0.042741,,,45.4725,-73.5615,5,1
4,41,2005-01-01,Incendies de bâtiments,20,139,10-22 avec feu,45.525359,-73.621536,8,Villeray / St-Michel / Parc Extension,...,8.834197,10.557957,0.487509,0.041492,,,45.522,-73.6245,5,1


In [119]:
print(fire_data_gdf.describe())


       FIRE_STATION_ID                  CREATION_DATE       AREA_ID  \
count     89857.000000                          89857  89857.000000   
mean         38.102563  2013-10-04 22:45:26.011328768      9.718642   
min           3.000000            2005-01-01 00:00:00      0.000000   
25%          20.000000            2009-01-03 00:00:00      4.000000   
50%          37.000000            2013-04-08 00:00:00      8.000000   
75%          55.000000            2018-06-10 00:00:00     14.000000   
max          79.000000            2023-12-31 00:00:00     25.000000   
std          20.491920                            NaN      6.804593   

        INCIDENT_NBR      LATITUDE     LONGITUDE         UNITS      max_temp  \
count   89857.000000  89857.000000  89857.000000  89857.000000  89857.000000   
mean    52254.166164     45.526405    -73.621876      4.269985     10.014235   
min         1.000000     45.402687    -73.983189      1.000000     -9.985120   
25%     28212.000000     45.481329    -7

In [120]:
# Percentage of missing values for each feature
print(fire_data_gdf.isnull().mean() * 100)

FIRE_STATION_ID          0.0
CREATION_DATE            0.0
INCIDENT_CATEGORY        0.0
AREA_ID                  0.0
INCIDENT_NBR             0.0
INCIDENT_TYPE            0.0
LATITUDE                 0.0
LONGITUDE                0.0
UNITS                    0.0
AREA                     0.0
CREATION_TIME            0.0
geometry                 0.0
date                     0.0
max_temp                 0.0
min_temp                 0.0
precipitation            0.0
wind_speed               0.0
distance_to_station      0.0
index_right            100.0
land_use_type          100.0
grid_lat                 0.0
grid_long                0.0
day_of_week              0.0
month                    0.0
dtype: float64


In [111]:
enhanced_fire_data['land_use_type'].unique()

[1;35marray[0m[1m([0m[1m[[0mnan[1m][0m, [33mdtype[0m=[35mobject[0m[1m)[0m

In [121]:
# Prepare the data
X = enhanced_fire_data[['grid_lat', 'grid_long', 'day_of_week', 'month', 'max_temp', 'precipitation', 'distance_to_station', 'land_use_type']]
y = enhanced_fire_data['count']
y = (grid_counts['is_fire'] > 0).astype(int)  # Binary target: whether there was a fire or not


# Define categorical features for one-hot encoding
categorical_features = ['day_of_week', 'month', 'land_use_type']
numerical_features = ['grid_lat', 'grid_long', 'max_temp', 'precipitation', 'distance_to_station']

# Create transformers for numerical and categorical features
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean'))
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine transformers into a preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Create a pipeline with preprocessing and Random Forest model
pipeline_rf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
pipeline_rf.fit(X_train, y_train)

# Predict and evaluate
y_pred_rf = pipeline_rf.predict(X_test)
mae_rf = mean_absolute_error(y_test, y_pred_rf)
rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf))

print("Random Forest Model - Mean Absolute Error (MAE):", mae_rf)
print("Random Forest Model - Root Mean Squared Error (RMSE):", rmse_rf)

Random Forest Model - Mean Absolute Error (MAE): 0.7118915277075963
Random Forest Model - Root Mean Squared Error (RMSE): 0.9263510028333124


In [122]:
enhanced_fire_data.head()

Unnamed: 0,grid_lat,grid_long,day_of_week,month,count,max_temp,precipitation,distance_to_station,land_use_type
0,45.4005,-73.9575,0,1,1,20.563667,16.810872,0.114917,
1,45.4005,-73.9575,0,11,1,25.328385,17.472852,0.112547,
2,45.4005,-73.9575,1,8,1,20.002359,19.497581,0.114913,
3,45.4005,-73.9575,1,11,2,17.986185,11.069764,0.114917,
4,45.4005,-73.9575,1,11,2,1.990366,19.84652,0.114917,


In [123]:
# Create lagged features for time series analysis
def create_lagged_features(df, group_columns, lag_column, max_lag):
    for lag in range(1, max_lag + 1):
        df[f'{lag_column}_lag_{lag}'] = df.groupby(group_columns)[lag_column].shift(lag)
    return df

# Create lagged features for fire counts and weather conditions
max_lag = 3
lagged_fire_data = create_lagged_features(enhanced_fire_data, ['grid_lat', 'grid_long'], 'count', max_lag)
lagged_fire_data = create_lagged_features(lagged_fire_data, ['grid_lat', 'grid_long'], 'max_temp', max_lag)
lagged_fire_data = create_lagged_features(lagged_fire_data, ['grid_lat', 'grid_long'], 'precipitation', max_lag)

In [124]:
lagged_fire_data.head()

Unnamed: 0,grid_lat,grid_long,day_of_week,month,count,max_temp,precipitation,distance_to_station,land_use_type,count_lag_1,count_lag_2,count_lag_3,max_temp_lag_1,max_temp_lag_2,max_temp_lag_3,precipitation_lag_1,precipitation_lag_2,precipitation_lag_3
0,45.4005,-73.9575,0,1,1,20.563667,16.810872,0.114917,,,,,,,,,,
1,45.4005,-73.9575,0,11,1,25.328385,17.472852,0.112547,,1.0,,,20.563667,,,16.810872,,
2,45.4005,-73.9575,1,8,1,20.002359,19.497581,0.114913,,1.0,1.0,,25.328385,20.563667,,17.472852,16.810872,
3,45.4005,-73.9575,1,11,2,17.986185,11.069764,0.114917,,1.0,1.0,1.0,20.002359,25.328385,20.563667,19.497581,17.472852,16.810872
4,45.4005,-73.9575,1,11,2,1.990366,19.84652,0.114917,,2.0,1.0,1.0,17.986185,20.002359,25.328385,11.069764,19.497581,17.472852


In [125]:
# Create additional temporal features such as rolling averages
def create_rolling_features(df, group_columns, feature_column, window_size):
    df[f'{feature_column}_rolling_mean_{window_size}'] = df.groupby(group_columns)[feature_column].transform(lambda x: x.rolling(window_size, min_periods=1).mean())
    return df

# Create rolling averages for fire counts and weather conditions
window_size = 3
rolling_fire_data = create_rolling_features(lagged_fire_data, ['grid_lat', 'grid_long'], 'count', window_size)
rolling_fire_data = create_rolling_features(rolling_fire_data, ['grid_lat', 'grid_long'], 'max_temp', window_size)
rolling_fire_data = create_rolling_features(rolling_fire_data, ['grid_lat', 'grid_long'], 'precipitation', window_size)

# Drop rows with NaN values generated by lagging and rolling
rolling_fire_data = rolling_fire_data.dropna()


In [126]:
rolling_fire_data.head()

Unnamed: 0,grid_lat,grid_long,day_of_week,month,count,max_temp,precipitation,distance_to_station,land_use_type,count_lag_1,...,count_lag_3,max_temp_lag_1,max_temp_lag_2,max_temp_lag_3,precipitation_lag_1,precipitation_lag_2,precipitation_lag_3,count_rolling_mean_3,max_temp_rolling_mean_3,precipitation_rolling_mean_3


In [57]:
# Prepare the data
X = rolling_fire_data.drop(columns=['count'])
y = rolling_fire_data['count']

# Check for missing values before splitting
print(X.isnull().sum())

# Define categorical features for one-hot encoding
categorical_features = ['day_of_week', 'month', 'land_use_type']
numerical_features = [col for col in X.columns if col not in categorical_features]

# Create transformers for numerical and categorical features
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean'))
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

# Combine transformers into a preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)
    ])

# Create a pipeline with preprocessing and Random Forest model
pipeline_rf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Check the number of samples
print("Number of samples:", len(X))

# If there are not enough samples, adjust the lag/rolling window sizes or handle missing values differently
if len(X) > 0:
    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train the model
    pipeline_rf.fit(X_train, y_train)

    # Predict and evaluate
    y_pred_rf = pipeline_rf.predict(X_test)
    mae_rf = mean_absolute_error(y_test, y_pred_rf)
    rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf))

    print("Random Forest Model with Time Series Features - Mean Absolute Error (MAE):", mae_rf)
    print("Random Forest Model with Time Series Features - Root Mean Squared Error (RMSE):", rmse_rf)
else:
    print("Not enough samples to train the model. Please adjust the lag/rolling window sizes or handle missing values differently.")

grid_lat                        0.0
grid_long                       0.0
day_of_week                     0.0
month                           0.0
max_temp                        0.0
precipitation                   0.0
distance_to_station             0.0
land_use_type                   0.0
count_lag_1                     0.0
count_lag_2                     0.0
count_lag_3                     0.0
max_temp_lag_1                  0.0
max_temp_lag_2                  0.0
max_temp_lag_3                  0.0
precipitation_lag_1             0.0
precipitation_lag_2             0.0
precipitation_lag_3             0.0
count_rolling_mean_3            0.0
max_temp_rolling_mean_3         0.0
precipitation_rolling_mean_3    0.0
count_rolling_mean_1            0.0
max_temp_rolling_mean_1         0.0
precipitation_rolling_mean_1    0.0
dtype: float64
Number of samples: 0
Not enough samples to train the model. Please adjust the lag/rolling window sizes or handle missing values differently.
