# Import Required Libraries
Import the necessary libraries, including pandas, numpy, matplotlib, seaborn, and scikit-learn.

In [1]:
# Import the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load and Explore the Dataset
Load the historical climate data into a pandas DataFrame and perform exploratory data analysis (EDA) to understand the data.

In [None]:
# Load and Explore the Dataset

# Load the historical climate data into a pandas DataFrame
data = pd.read_csv('.csv')

# Display the first few rows of the dataset
data.head()

# Display basic information about the dataset
data.info()

# Display summary statistics of the dataset
data.describe()

# Check for missing values in the dataset
missing_values = data.isnull().sum()
print("Missing values in each column:\n", missing_values)

# Visualize the distribution of temperature anomalies
plt.figure(figsize=(10, 6))
sns.histplot(data['temperature_anomaly'], kde=True)
plt.title('Distribution of Temperature Anomalies')
plt.xlabel('Temperature Anomaly')
plt.ylabel('Frequency')
plt.show()

# Visualize the distribution of precipitation patterns
plt.figure(figsize=(10, 6))
sns.histplot(data['precipitation'], kde=True)
plt.title('Distribution of Precipitation Patterns')
plt.xlabel('Precipitation')
plt.ylabel('Frequency')
plt.show()

# Visualize the distribution of sea level changes
plt.figure(figsize=(10, 6))
sns.histplot(data['sea_level_change'], kde=True)
plt.title('Distribution of Sea Level Changes')
plt.xlabel('Sea Level Change')
plt.ylabel('Frequency')
plt.show()

# Visualize the correlation matrix
plt.figure(figsize=(12, 8))
correlation_matrix = data.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Matrix')
plt.show()

FileNotFoundError: [Errno 2] No such file or directory: 'historical_climate_data.csv'

# Preprocess the Data
Handle missing values, normalize/standardize the data, and encode categorical variables if necessary.

In [None]:
# Preprocess the Data

# Handle missing values by filling them with the mean of the respective columns
data.fillna(data.mean(), inplace=True)

# Normalize/standardize the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data[['temperature_anomaly', 'precipitation', 'sea_level_change']])

# Convert the scaled data back to a DataFrame
scaled_data = pd.DataFrame(scaled_data, columns=['temperature_anomaly', 'precipitation', 'sea_level_change'])

# Encode categorical variables if necessary (assuming there are no categorical variables in this dataset)
# If there were categorical variables, we would use pd.get_dummies or similar methods to encode them

# Display the first few rows of the preprocessed dataset
scaled_data.head()

# Feature Engineering
Create new features from the existing data that may help improve the model's performance.

In [None]:
# Feature Engineering

# Create new features from the existing data that may help improve the model's performance

# Create a rolling mean feature for temperature anomaly
data['temp_anomaly_rolling_mean'] = data['temperature_anomaly'].rolling(window=12).mean()

# Create a rolling mean feature for precipitation
data['precipitation_rolling_mean'] = data['precipitation'].rolling(window=12).mean()

# Create a rolling mean feature for sea level change
data['sea_level_change_rolling_mean'] = data['sea_level_change'].rolling(window=12).mean()

# Create a lag feature for temperature anomaly (shifted by 1 month)
data['temp_anomaly_lag_1'] = data['temperature_anomaly'].shift(1)

# Create a lag feature for precipitation (shifted by 1 month)
data['precipitation_lag_1'] = data['precipitation'].shift(1)

# Create a lag feature for sea level change (shifted by 1 month)
data['sea_level_change_lag_1'] = data['sea_level_change'].shift(1)

# Drop rows with NaN values created by rolling mean and lag features
data.dropna(inplace=True)

# Display the first few rows of the dataset with new features
data.head()

# Split the Data into Training and Testing Sets
Split the dataset into training and testing sets to evaluate the model's performance on unseen data.

In [None]:
# Split the Data into Training and Testing Sets

# Define the features (X) and target variables (y)
X = data[['temperature_anomaly', 'precipitation', 'sea_level_change', 
          'temp_anomaly_rolling_mean', 'precipitation_rolling_mean', 'sea_level_change_rolling_mean',
          'temp_anomaly_lag_1', 'precipitation_lag_1', 'sea_level_change_lag_1']]
y_temp_anomaly = data['temperature_anomaly']
y_precipitation = data['precipitation']
y_sea_level_change = data['sea_level_change']

# Split the data into training and testing sets for temperature anomaly prediction
X_train_temp, X_test_temp, y_train_temp, y_test_temp = train_test_split(X, y_temp_anomaly, test_size=0.2, random_state=42)

# Split the data into training and testing sets for precipitation prediction
X_train_precip, X_test_precip, y_train_precip, y_test_precip = train_test_split(X, y_precipitation, test_size=0.2, random_state=42)

# Split the data into training and testing sets for sea level change prediction
X_train_sea, X_test_sea, y_train_sea, y_test_sea = train_test_split(X, y_sea_level_change, test_size=0.2, random_state=42)

# Display the shapes of the training and testing sets
print("Temperature Anomaly - Training set shape:", X_train_temp.shape, y_train_temp.shape)
print("Temperature Anomaly - Testing set shape:", X_test_temp.shape, y_test_temp.shape)
print("Precipitation - Training set shape:", X_train_precip.shape, y_train_precip.shape)
print("Precipitation - Testing set shape:", X_test_precip.shape, y_test_precip.shape)
print("Sea Level Change - Training set shape:", X_train_sea.shape, y_train_sea.shape)
print("Sea Level Change - Testing set shape:", X_test_sea.shape, y_test_sea.shape)

# Train Machine Learning Models
Train different machine learning models (e.g., Linear Regression, Random Forest, Gradient Boosting) on the training data.

In [None]:
# Train Machine Learning Models

# Import additional necessary libraries
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

# Train a Linear Regression model for temperature anomaly prediction
lr_temp = LinearRegression()
lr_temp.fit(X_train_temp, y_train_temp)
y_pred_temp_lr = lr_temp.predict(X_test_temp)

# Evaluate the Linear Regression model for temperature anomaly prediction
mse_temp_lr = mean_squared_error(y_test_temp, y_pred_temp_lr)
r2_temp_lr = r2_score(y_test_temp, y_pred_temp_lr)
print("Linear Regression - Temperature Anomaly - MSE:", mse_temp_lr)
print("Linear Regression - Temperature Anomaly - R2 Score:", r2_temp_lr)

# Train a Random Forest model for temperature anomaly prediction
rf_temp = RandomForestRegressor(random_state=42)
rf_temp.fit(X_train_temp, y_train_temp)
y_pred_temp_rf = rf_temp.predict(X_test_temp)

# Evaluate the Random Forest model for temperature anomaly prediction
mse_temp_rf = mean_squared_error(y_test_temp, y_pred_temp_rf)
r2_temp_rf = r2_score(y_test_temp, y_pred_temp_rf)
print("Random Forest - Temperature Anomaly - MSE:", mse_temp_rf)
print("Random Forest - Temperature Anomaly - R2 Score:", r2_temp_rf)

# Train a Gradient Boosting model for temperature anomaly prediction
gb_temp = GradientBoostingRegressor(random_state=42)
gb_temp.fit(X_train_temp, y_train_temp)
y_pred_temp_gb = gb_temp.predict(X_test_temp)

# Evaluate the Gradient Boosting model for temperature anomaly prediction
mse_temp_gb = mean_squared_error(y_test_temp, y_pred_temp_gb)
r2_temp_gb = r2_score(y_test_temp, y_pred_temp_gb)
print("Gradient Boosting - Temperature Anomaly - MSE:", mse_temp_gb)
print("Gradient Boosting - Temperature Anomaly - R2 Score:", r2_temp_gb)

# Train a Linear Regression model for precipitation prediction
lr_precip = LinearRegression()
lr_precip.fit(X_train_precip, y_train_precip)
y_pred_precip_lr = lr_precip.predict(X_test_precip)

# Evaluate the Linear Regression model for precipitation prediction
mse_precip_lr = mean_squared_error(y_test_precip, y_pred_precip_lr)
r2_precip_lr = r2_score(y_test_precip, y_pred_precip_lr)
print("Linear Regression - Precipitation - MSE:", mse_precip_lr)
print("Linear Regression - Precipitation - R2 Score:", r2_precip_lr)

# Train a Random Forest model for precipitation prediction
rf_precip = RandomForestRegressor(random_state=42)
rf_precip.fit(X_train_precip, y_train_precip)
y_pred_precip_rf = rf_precip.predict(X_test_precip)

# Evaluate the Random Forest model for precipitation prediction
mse_precip_rf = mean_squared_error(y_test_precip, y_pred_precip_rf)
r2_precip_rf = r2_score(y_test_precip, y_pred_precip_rf)
print("Random Forest - Precipitation - MSE:", mse_precip_rf)
print("Random Forest - Precipitation - R2 Score:", r2_precip_rf)

# Train a Gradient Boosting model for precipitation prediction
gb_precip = GradientBoostingRegressor(random_state=42)
gb_precip.fit(X_train_precip, y_train_precip)
y_pred_precip_gb = gb_precip.predict(X_test_precip)

# Evaluate the Gradient Boosting model for precipitation prediction
mse_precip_gb = mean_squared_error(y_test_precip, y_pred_precip_gb)
r2_precip_gb = r2_score(y_test_precip, y_pred_precip_gb)
print("Gradient Boosting - Precipitation - MSE:", mse_precip_gb)
print("Gradient Boosting - Precipitation - R2 Score:", r2_precip_gb)

# Train a Linear Regression model for sea level change prediction
lr_sea = LinearRegression()
lr_sea.fit(X_train_sea, y_train_sea)
y_pred_sea_lr = lr_sea.predict(X_test_sea)

# Evaluate the Linear Regression model for sea level change prediction
mse_sea_lr = mean_squared_error(y_test_sea, y_pred_sea_lr)
r2_sea_lr = r2_score(y_test_sea, y_pred_sea_lr)
print("Linear Regression - Sea Level Change - MSE:", mse_sea_lr)
print("Linear Regression - Sea Level Change - R2 Score:", r2_sea_lr)

# Train a Random Forest model for sea level change prediction
rf_sea = RandomForestRegressor(random_state=42)
rf_sea.fit(X_train_sea, y_train_sea)
y_pred_sea_rf = rf_sea.predict(X_test_sea)

# Evaluate the Random Forest model for sea level change prediction
mse_sea_rf = mean_squared_error(y_test_sea, y_pred_sea_rf)
r2_sea_rf = r2_score(y_test_sea, y_pred_sea_rf)
print("Random Forest - Sea Level Change - MSE:", mse_sea_rf)
print("Random Forest - Sea Level Change - R2 Score:", r2_sea_rf)

# Train a Gradient Boosting model for sea level change prediction
gb_sea = GradientBoostingRegressor(random_state=42)
gb_sea.fit(X_train_sea, y_train_sea)
y_pred_sea_gb = gb_sea.predict(X_test_sea)

# Evaluate the Gradient Boosting model for sea level change prediction
mse_sea_gb = mean_squared_error(y_test_sea, y_pred_sea_gb)
r2_sea_gb = r2_score(y_test_sea, y_pred_sea_gb)
print("Gradient Boosting - Sea Level Change - MSE:", mse_sea_gb)
print("Gradient Boosting - Sea Level Change - R2 Score:", r2_sea_gb)

# Evaluate the Models
Evaluate the performance of the trained models using appropriate metrics (e.g., RMSE, MAE, R^2 score) on the testing data.

In [None]:
# Evaluate the Models

# Import additional necessary libraries
from sklearn.metrics import mean_absolute_error

# Evaluate the Linear Regression model for temperature anomaly prediction
mae_temp_lr = mean_absolute_error(y_test_temp, y_pred_temp_lr)
print("Linear Regression - Temperature Anomaly - MAE:", mae_temp_lr)

# Evaluate the Random Forest model for temperature anomaly prediction
mae_temp_rf = mean_absolute_error(y_test_temp, y_pred_temp_rf)
print("Random Forest - Temperature Anomaly - MAE:", mae_temp_rf)

# Evaluate the Gradient Boosting model for temperature anomaly prediction
mae_temp_gb = mean_absolute_error(y_test_temp, y_pred_temp_gb)
print("Gradient Boosting - Temperature Anomaly - MAE:", mae_temp_gb)

# Evaluate the Linear Regression model for precipitation prediction
mae_precip_lr = mean_absolute_error(y_test_precip, y_pred_precip_lr)
print("Linear Regression - Precipitation - MAE:", mae_precip_lr)

# Evaluate the Random Forest model for precipitation prediction
mae_precip_rf = mean_absolute_error(y_test_precip, y_pred_precip_rf)
print("Random Forest - Precipitation - MAE:", mae_precip_rf)

# Evaluate the Gradient Boosting model for precipitation prediction
mae_precip_gb = mean_absolute_error(y_test_precip, y_pred_precip_gb)
print("Gradient Boosting - Precipitation - MAE:", mae_precip_gb)

# Evaluate the Linear Regression model for sea level change prediction
mae_sea_lr = mean_absolute_error(y_test_sea, y_pred_sea_lr)
print("Linear Regression - Sea Level Change - MAE:", mae_sea_lr)

# Evaluate the Random Forest model for sea level change prediction
mae_sea_rf = mean_absolute_error(y_test_sea, y_pred_sea_rf)
print("Random Forest - Sea Level Change - MAE:", mae_sea_rf)

# Evaluate the Gradient Boosting model for sea level change prediction
mae_sea_gb = mean_absolute_error(y_test_sea, y_pred_sea_gb)
print("Gradient Boosting - Sea Level Change - MAE:", mae_sea_gb)

# Make Predictions
Use the trained models to make predictions on new data.

In [None]:
# Make Predictions

# Use the trained Linear Regression model to make predictions on new data for temperature anomaly
new_data_temp = X_test_temp.iloc[:5]  # Example new data
predictions_temp_lr = lr_temp.predict(new_data_temp)
print("Linear Regression - Temperature Anomaly Predictions:", predictions_temp_lr)

# Use the trained Random Forest model to make predictions on new data for temperature anomaly
predictions_temp_rf = rf_temp.predict(new_data_temp)
print("Random Forest - Temperature Anomaly Predictions:", predictions_temp_rf)

# Use the trained Gradient Boosting model to make predictions on new data for temperature anomaly
predictions_temp_gb = gb_temp.predict(new_data_temp)
print("Gradient Boosting - Temperature Anomaly Predictions:", predictions_temp_gb)

# Use the trained Linear Regression model to make predictions on new data for precipitation
new_data_precip = X_test_precip.iloc[:5]  # Example new data
predictions_precip_lr = lr_precip.predict(new_data_precip)
print("Linear Regression - Precipitation Predictions:", predictions_precip_lr)

# Use the trained Random Forest model to make predictions on new data for precipitation
predictions_precip_rf = rf_precip.predict(new_data_precip)
print("Random Forest - Precipitation Predictions:", predictions_precip_rf)

# Use the trained Gradient Boosting model to make predictions on new data for precipitation
predictions_precip_gb = gb_precip.predict(new_data_precip)
print("Gradient Boosting - Precipitation Predictions:", predictions_precip_gb)

# Use the trained Linear Regression model to make predictions on new data for sea level change
new_data_sea = X_test_sea.iloc[:5]  # Example new data
predictions_sea_lr = lr_sea.predict(new_data_sea)
print("Linear Regression - Sea Level Change Predictions:", predictions_sea_lr)

# Use the trained Random Forest model to make predictions on new data for sea level change
predictions_sea_rf = rf_sea.predict(new_data_sea)
print("Random Forest - Sea Level Change Predictions:", predictions_sea_rf)

# Use the trained Gradient Boosting model to make predictions on new data for sea level change
predictions_sea_gb = gb_sea.predict(new_data_sea)
print("Gradient Boosting - Sea Level Change Predictions:", predictions_sea_gb)

# Visualize the Results
Create visualizations to compare the predicted values with the actual values and to understand the model's performance.

In [None]:
# Visualize the Results

# Visualize the predicted vs actual values for temperature anomaly using Linear Regression
plt.figure(figsize=(10, 6))
plt.scatter(y_test_temp, y_pred_temp_lr, color='blue', label='Predicted')
plt.plot(y_test_temp, y_test_temp, color='red', label='Actual')
plt.title('Temperature Anomaly Prediction (Linear Regression)')
plt.xlabel('Actual Temperature Anomaly')
plt.ylabel('Predicted Temperature Anomaly')
plt.legend()
plt.show()

# Visualize the predicted vs actual values for temperature anomaly using Random Forest
plt.figure(figsize=(10, 6))
plt.scatter(y_test_temp, y_pred_temp_rf, color='blue', label='Predicted')
plt.plot(y_test_temp, y_test_temp, color='red', label='Actual')
plt.title('Temperature Anomaly Prediction (Random Forest)')
plt.xlabel('Actual Temperature Anomaly')
plt.ylabel('Predicted Temperature Anomaly')
plt.legend()
plt.show()

# Visualize the predicted vs actual values for temperature anomaly using Gradient Boosting
plt.figure(figsize=(10, 6))
plt.scatter(y_test_temp, y_pred_temp_gb, color='blue', label='Predicted')
plt.plot(y_test_temp, y_test_temp, color='red', label='Actual')
plt.title('Temperature Anomaly Prediction (Gradient Boosting)')
plt.xlabel('Actual Temperature Anomaly')
plt.ylabel('Predicted Temperature Anomaly')
plt.legend()
plt.show()

# Visualize the predicted vs actual values for precipitation using Linear Regression
plt.figure(figsize=(10, 6))
plt.scatter(y_test_precip, y_pred_precip_lr, color='blue', label='Predicted')
plt.plot(y_test_precip, y_test_precip, color='red', label='Actual')
plt.title('Precipitation Prediction (Linear Regression)')
plt.xlabel('Actual Precipitation')
plt.ylabel('Predicted Precipitation')
plt.legend()
plt.show()

# Visualize the predicted vs actual values for precipitation using Random Forest
plt.figure(figsize=(10, 6))
plt.scatter(y_test_precip, y_pred_precip_rf, color='blue', label='Predicted')
plt.plot(y_test_precip, y_test_precip, color='red', label='Actual')
plt.title('Precipitation Prediction (Random Forest)')
plt.xlabel('Actual Precipitation')
plt.ylabel('Predicted Precipitation')
plt.legend()
plt.show()

# Visualize the predicted vs actual values for precipitation using Gradient Boosting
plt.figure(figsize=(10, 6))
plt.scatter(y_test_precip, y_pred_precip_gb, color='blue', label='Predicted')
plt.plot(y_test_precip, y_test_precip, color='red', label='Actual')
plt.title('Precipitation Prediction (Gradient Boosting)')
plt.xlabel('Actual Precipitation')
plt.ylabel('Predicted Precipitation')
plt.legend()
plt.show()

# Visualize the predicted vs actual values for sea level change using Linear Regression
plt.figure(figsize=(10, 6))
plt.scatter(y_test_sea, y_pred_sea_lr, color='blue', label='Predicted')
plt.plot(y_test_sea, y_test_sea, color='red', label='Actual')
plt.title('Sea Level Change Prediction (Linear Regression)')
plt.xlabel('Actual Sea Level Change')
plt.ylabel('Predicted Sea Level Change')
plt.legend()
plt.show()

# Visualize the predicted vs actual values for sea level change using Random Forest
plt.figure(figsize=(10, 6))
plt.scatter(y_test_sea, y_pred_sea_rf, color='blue', label='Predicted')
plt.plot(y_test_sea, y_test_sea, color='red', label='Actual')
plt.title('Sea Level Change Prediction (Random Forest)')
plt.xlabel('Actual Sea Level Change')
plt.ylabel('Predicted Sea Level Change')
plt.legend()
plt.show()

# Visualize the predicted vs actual values for sea level change using Gradient Boosting
plt.figure(figsize=(10, 6))
plt.scatter(y_test_sea, y_pred_sea_gb, color='blue', label='Predicted')
plt.plot(y_test_sea, y_test_sea, color='red', label='Actual')
plt.title('Sea Level Change Prediction (Gradient Boosting)')
plt.xlabel('Actual Sea Level Change')
plt.ylabel('Predicted Sea Level Change')
plt.legend()
plt.show()