In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

# Load the dataset
data = pd.read_csv('/content/sri_lanka_precipitation_cleaned.csv')

# Selecting relevant features
features = ['city', 'temperature_2m_max', 'temperature_2m_min', 'temperature_2m_mean',
            'apparent_temperature_max', 'apparent_temperature_min', 'apparent_temperature_mean',
            'shortwave_radiation_sum', 'windspeed_10m_max', 'windgusts_10m_max',
            'winddirection_10m_dominant', 'et0_fao_evapotranspiration', 'latitude', 'longitude', 'elevation',
            'precipitation_hours', 'weathercode', 'year', 'month']
target = 'precipitation_sum'

# Splitting the dataset into features and target
X = data[features]
y = data[target]

# Define a column transformer for One-Hot Encoding
column_transformer = ColumnTransformer([
    ('city_encoder', OneHotEncoder(), ['city']),
    ('weathercode_encoder', OneHotEncoder(), ['weathercode'])
], remainder='passthrough')

# Apply the transformations
X_transformed = column_transformer.fit_transform(X)

# Scaling the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_transformed)

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Training the Random Forest model
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Making predictions and evaluating the model
predictions = rf.predict(X_test)
mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, predictions)



# Print evaluation metrics
print(f"Mean Absolute Error: {mae}")
print(f"Mean Squared Error: {mse}")
print(f"Root Mean Squared Error: {rmse}")
print(f"R^2 Score: {r2}")

# Load the forecast data
forecast_data = pd.read_csv('/content/modified_precipitation_data.csv')

# Select the same features as used for training
X_forecast = forecast_data[features]

# Apply the same transformations as the training data
X_forecast_transformed = column_transformer.transform(X_forecast)
X_forecast_scaled = scaler.transform(X_forecast_transformed)

# Predict using the RandomForestRegressor model
forecast_predictions = rf.predict(X_forecast_scaled)

# Add predictions to the forecast data
forecast_data['predicted_precipitation'] = forecast_predictions

# If 'time' column exists in forecast_data, you can organize predictions by day and city
# daily_precipitation_per_city = forecast_data.pivot(index='city', columns='time', values='predicted_precipitation')


Mean Absolute Error: 0.6018018888096935
Mean Squared Error: 2.7779525132929437
Root Mean Squared Error: 1.6667190864968648
R^2 Score: 0.943466027487109


ValueError: ignored

# Average over the Predicted Dataset (20% of Dataset)



In [None]:
daily_precipitation_per_city = forecast_data.pivot(index='city', columns='time', values='predicted_precipitation')


In [None]:
# data_with_city = data.copy()  # Ensure 'city' is a column in this DataFrame

# # Perform the same split as you did initially
# _, X_test_original = train_test_split(data_with_city, test_size=0.2, random_state=42)

# # Ensure the length of 'predictions' matches the number of rows in 'X_test_original'
# assert len(predictions) == len(X_test_original)

# # Add the predictions to the test dataset
# X_test_original['predicted_precipitation'] = predictions

# # Now, group the data by 'city' and display the predictions
# grouped_predictions = X_test_original.groupby('city')['predicted_precipitation']

# # Print the grouped predictions
# print(grouped_predictions)


data_with_city = data.copy()  # Ensure 'city' is a column in this DataFrame

# Perform the same split as you did initially
_, X_test_original = train_test_split(data_with_city, test_size=0.2, random_state=42)

# Ensure the length of 'predictions' matches the number of rows in 'X_test_original'
assert len(predictions) == len(X_test_original)

# Add the predictions to the test dataset
X_test_original['predicted_precipitation'] = predictions

# Now, group the data by 'city' and display the predictions
grouped_predictions = X_test_original.groupby('city')['predicted_precipitation'].mean()

# Print the grouped predictions
print(grouped_predictions)




In [None]:
np.set_printoptions(threshold=np.inf)

# Now print the predictions
print(predictions)

# Predicted Precipitation for Next 10 Days (based on data from next 10 days excluding precipitation)
::

In [None]:
print(daily_precipitation_per_city)