In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

# Load the dataset
data = pd.read_csv('/content/sri_lanka_precipitation_cleaned.csv')

# Selecting relevant features
features = ['city', 'temperature_2m_max', 'temperature_2m_min', 'temperature_2m_mean',
            'apparent_temperature_max', 'apparent_temperature_min', 'apparent_temperature_mean',
            'shortwave_radiation_sum', 'windspeed_10m_max', 'windgusts_10m_max',
            'winddirection_10m_dominant', 'et0_fao_evapotranspiration', 'latitude', 'longitude', 'elevation',
            'precipitation_hours', 'weathercode', 'year', 'month']
target = 'precipitation_sum'

# Splitting the dataset into features and target
X = data[features]
y = data[target]

# Define a column transformer for One-Hot Encoding
column_transformer = ColumnTransformer([
    ('city_encoder', OneHotEncoder(), ['city']),
    ('weathercode_encoder', OneHotEncoder(), ['weathercode'])
], remainder='passthrough')

# Apply the transformations
X_transformed = column_transformer.fit_transform(X)

# Scaling the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_transformed)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Training the Random Forest model
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Making predictions and evaluating the model
predictions = rf.predict(X_test)
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, predictions)

# Print evaluation metrics
print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"R^2 Score: {r2}")

# Load the forecast data
forecast_data = pd.read_csv('/content/last_10_days_weather_data.csv')
forecast_data['time'] = pd.to_datetime(forecast_data['time'])
forecast_data['year'] = forecast_data['time'].dt.year
forecast_data['month'] = forecast_data['time'].dt.month

# Select the same features as used for training
X_forecast = forecast_data[features]

# Apply the same transformations as the training data
X_forecast_transformed = column_transformer.transform(X_forecast)
X_forecast_scaled = scaler.transform(X_forecast_transformed)

# Predict using the RandomForestRegressor model
forecast_predictions = rf.predict(X_forecast_scaled)

# Add predictions to the forecast data
forecast_data['predicted_precipitation'] = forecast_predictions


Mean Absolute Error: 0.7875217420845223
Mean Squared Error: 9.230744168025547
Root Mean Squared Error: 3.038213976668784
R^2 Score: 0.9116852343362055


# Average over the Predicted Dataset (20% of Dataset)



In [5]:
daily_precipitation_per_city = forecast_data.pivot(index='city', columns='time', values='predicted_precipitation')


In [6]:
data_with_city = data.copy()

# Perform the same split as you did initially
_, X_test_original = train_test_split(data_with_city, test_size=0.2, random_state=42)

# Ensure the length of 'predictions' matches the number of rows in 'X_test_original'
assert len(predictions) == len(X_test_original)

# Add the predictions to the test dataset
X_test_original['predicted_precipitation'] = predictions

# Group the data by 'city' and display the predictions
grouped_predictions = X_test_original.groupby('city')['predicted_precipitation'].mean()

# Print the grouped predictions
print(grouped_predictions)




city
Athurugiriya                 6.743186
Badulla                      5.640986
Bentota                      6.952230
Colombo                      7.530518
Galle                        5.472978
Gampaha                      6.926449
Hambantota                   3.265523
Hatton                       7.615066
Jaffna                       3.030826
Kalmunai                     3.830339
Kalutara                     6.910860
Kandy                        6.292459
Kesbewa                      7.441196
Kolonnawa                    7.139093
Kurunegala                   5.384287
Mabole                       6.892312
Maharagama                   6.675352
Mannar                       3.435466
Matale                       5.638719
Matara                       5.894616
Moratuwa                     6.966922
Mount Lavinia                6.907740
Negombo                      6.878022
Oruwala                      7.033505
Pothuhera                    5.283162
Puttalam                     5.207516
Ratnapu

In [7]:
np.set_printoptions(threshold=np.inf)

# Print the predictions
print(predictions)

[3.36000e-01 0.00000e+00 4.87800e+00 3.39900e+00 3.46700e+00 0.00000e+00
 1.17690e+01 1.88140e+01 1.46890e+01 0.00000e+00 2.98000e-01 4.12300e+00
 1.25560e+01 7.92000e-01 0.00000e+00 9.26300e+00 1.66300e+01 1.50440e+01
 8.58500e+00 3.51900e+00 6.29000e+00 0.00000e+00 0.00000e+00 4.47400e+00
 7.30000e-01 2.83500e+00 0.00000e+00 2.60700e+00 1.16700e+00 3.21000e-01
 3.81900e+00 0.00000e+00 0.00000e+00 1.09780e+01 1.93260e+01 4.18900e+00
 6.20000e+00 0.00000e+00 1.79200e+00 0.00000e+00 9.72800e+00 0.00000e+00
 1.38360e+01 2.08600e+00 9.65000e-01 3.91000e-01 8.90000e-01 8.71500e+00
 0.00000e+00 0.00000e+00 0.00000e+00 0.00000e+00 6.20800e+00 1.08510e+01
 0.00000e+00 2.07250e+01 1.02000e-01 2.22200e+00 5.73000e-01 7.01200e+00
 1.33000e+00 2.02550e+01 2.09000e-01 1.91000e+00 4.43000e-01 0.00000e+00
 3.65000e-01 8.48800e+00 2.23000e-01 6.49500e+00 0.00000e+00 0.00000e+00
 1.48430e+01 2.48900e+00 0.00000e+00 4.68000e-01 8.11800e+00 4.40800e+00
 5.23100e+00 0.00000e+00 6.00000e-01 0.00000e+00 2.

# Predicted Precipitation for Next 10 Days (based on data from next 10 days excluding precipitation)
::

In [8]:
print(daily_precipitation_per_city)

time                       2023-06-08  2023-06-09  2023-06-10  2023-06-11  \
city                                                                        
Athurugiriya                    9.204      10.324       7.513       8.459   
Badulla                         0.189       0.182       1.119       0.235   
Bentota                         9.265      12.707       8.171      17.354   
Colombo                         8.856      14.835       8.330      20.163   
Galle                           7.103       4.725       3.849       4.535   
Gampaha                         9.017      10.545       7.464       8.452   
Hambantota                      0.155       0.140       0.000       0.179   
Hatton                          2.479       4.752       2.498       2.566   
Jaffna                          0.164       0.000       0.000       0.000   
Kalmunai                        0.000       1.103       0.000       0.000   
Kalutara                        9.422      13.582       8.241      17.233   