In [None]:
!pip install rasterio

Collecting rasterio
  Downloading rasterio-1.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl.metadata (6.4 kB)
Downloading rasterio-1.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Installing collected packages: cligj, click-plugins, affine, rasterio
Successfully installed affine-2.4.0 click-plugins-1.1.1 cligj-0.7.2 rasterio-1.4.1


In [None]:
import requests
import os
import rasterio
import geopandas as gpd
from shapely.geometry import mapping
import matplotlib.pyplot as plt
from rasterio.plot import show
from rasterio.warp import calculate_default_transform, reproject, Resampling
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [None]:
# Crop CO₂ Budget Grid URLs
crop_co2_budget_urls = [
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_Crop_CO2_Budget_grid_v1_2015.tif',
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_Crop_CO2_Budget_grid_v1_2016.tif',
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_Crop_CO2_Budget_grid_v1_2017.tif',
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_Crop_CO2_Budget_grid_v1_2018.tif',
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_Crop_CO2_Budget_grid_v1_2019.tif',
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_Crop_CO2_Budget_grid_v1_2020.tif'
]

# Fossil Fuel CO₂ Budget Grid URLs
ff_co2_budget_urls = [
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_FF_CO2_Budget_grid_v1_2015.tif',
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_FF_CO2_Budget_grid_v1_2016.tif',
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_FF_CO2_Budget_grid_v1_2017.tif',
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_FF_CO2_Budget_grid_v1_2018.tif',
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_FF_CO2_Budget_grid_v1_2019.tif',
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_FF_CO2_Budget_grid_v1_2020.tif'
]

# Land Carbon Loss CO₂ Budget Grid URLs
land_carbon_loss_urls = [
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_IS_dC_loss_CO2_Budget_grid_v1_2015.tif',
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_IS_dC_loss_CO2_Budget_grid_v1_2016.tif',
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_IS_dC_loss_CO2_Budget_grid_v1_2017.tif',
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_IS_dC_loss_CO2_Budget_grid_v1_2018.tif',
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_IS_dC_loss_CO2_Budget_grid_v1_2019.tif',
    'https://dljsq618eotzp.cloudfront.net/oco2-mip-co2budget-yeargrid-v1/pilot_topdown_IS_dC_loss_CO2_Budget_grid_v1_2020.tif'
]

In [None]:
# Base directory to store the downloaded files
base_dir = 'CO2_Budget_TIF_Files'
os.makedirs(base_dir, exist_ok=True)

# Create subdirectories for each type of data
crop_dir = os.path.join(base_dir, 'Crop_CO2_Budget')
ff_dir = os.path.join(base_dir, 'FF_CO2_Budget')
land_carbon_loss_dir = os.path.join(base_dir, 'Land_Carbon_Loss_CO2_Budget')

os.makedirs(crop_dir, exist_ok=True)
os.makedirs(ff_dir, exist_ok=True)
os.makedirs(land_carbon_loss_dir, exist_ok=True)

# Function to download files
def download_file(url, save_path):
    response = requests.get(url, stream=True)
    with open(save_path, 'wb') as file:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                file.write(chunk)
    print(f"Downloaded {save_path}")


In [None]:
# Download each file for the respective subdirectory
for url in crop_co2_budget_urls:
    file_name = os.path.basename(url)
    save_path = os.path.join(crop_dir, file_name)
    download_file(url, save_path)

for url in ff_co2_budget_urls:
    file_name = os.path.basename(url)
    save_path = os.path.join(ff_dir, file_name)
    download_file(url, save_path)

for url in land_carbon_loss_urls:
    file_name = os.path.basename(url)
    save_path = os.path.join(land_carbon_loss_dir, file_name)
    download_file(url, save_path)

Downloaded CO2_Budget_TIF_Files/Crop_CO2_Budget/pilot_topdown_Crop_CO2_Budget_grid_v1_2015.tif
Downloaded CO2_Budget_TIF_Files/Crop_CO2_Budget/pilot_topdown_Crop_CO2_Budget_grid_v1_2016.tif
Downloaded CO2_Budget_TIF_Files/Crop_CO2_Budget/pilot_topdown_Crop_CO2_Budget_grid_v1_2017.tif
Downloaded CO2_Budget_TIF_Files/Crop_CO2_Budget/pilot_topdown_Crop_CO2_Budget_grid_v1_2018.tif
Downloaded CO2_Budget_TIF_Files/Crop_CO2_Budget/pilot_topdown_Crop_CO2_Budget_grid_v1_2019.tif
Downloaded CO2_Budget_TIF_Files/Crop_CO2_Budget/pilot_topdown_Crop_CO2_Budget_grid_v1_2020.tif
Downloaded CO2_Budget_TIF_Files/FF_CO2_Budget/pilot_topdown_FF_CO2_Budget_grid_v1_2015.tif
Downloaded CO2_Budget_TIF_Files/FF_CO2_Budget/pilot_topdown_FF_CO2_Budget_grid_v1_2016.tif
Downloaded CO2_Budget_TIF_Files/FF_CO2_Budget/pilot_topdown_FF_CO2_Budget_grid_v1_2017.tif
Downloaded CO2_Budget_TIF_Files/FF_CO2_Budget/pilot_topdown_FF_CO2_Budget_grid_v1_2018.tif
Downloaded CO2_Budget_TIF_Files/FF_CO2_Budget/pilot_topdown_FF_CO2

# **2. Pre-Processing**

In [None]:
# Function to preprocess GeoTIFF files and extract data
def preprocess_geotiff(file_path):
    with rasterio.open(file_path) as dataset:
        data = dataset.read(1)  # Read CO₂ values for each pixel (1st band)
        bounds = dataset.bounds  # Geographical bounds of the image

        # Generate the longitude and latitude coordinates for each pixel
        rows, cols = data.shape
        lon_min, lat_min = bounds.left, bounds.bottom
        lon_max, lat_max = bounds.right, bounds.top
        lon_coords = np.linspace(lon_min, lon_max, cols)
        lat_coords = np.linspace(lat_min, lat_max, rows)

        # Reshape to 1D for DataFrame
        lon_coords = np.repeat(lon_coords, rows)
        lat_coords = np.tile(lat_coords, cols)
        co2_values = data.flatten()

        # Extract the year from the file name
        year = int(file_path.split('_')[-1].split('.')[0])

        # Return a DataFrame with CO₂ values and coordinates
        return pd.DataFrame({
            'Longitude': lon_coords,
            'Latitude': lat_coords,
            'CO2': co2_values,
            'Year': year
        })

# Directories for the categories
crop_dir = 'CO2_Budget_TIF_Files/Crop_CO2_Budget'
ff_dir = 'CO2_Budget_TIF_Files/FF_CO2_Budget'
land_carbon_loss_dir = 'CO2_Budget_TIF_Files/Land_Carbon_Loss_CO2_Budget'

# Initialize an empty DataFrame to store merged data
merged_data = pd.DataFrame()

In [None]:
# Process Crop CO₂ Budget Grid
for file_name in sorted(os.listdir(crop_dir)):
    file_path = os.path.join(crop_dir, file_name)
    file_data = preprocess_geotiff(file_path)
    file_data['Source'] = 'Crop'  # Add source label
    merged_data = pd.concat([merged_data, file_data], ignore_index=True)


In [None]:
# Process Fossil Fuel CO₂ Budget Grid
for file_name in sorted(os.listdir(ff_dir)):
    file_path = os.path.join(ff_dir, file_name)
    file_data = preprocess_geotiff(file_path)
    file_data['Source'] = 'Fossil_Fuel'
    merged_data = pd.concat([merged_data, file_data], ignore_index=True)

In [None]:
# Process Land Carbon Loss CO₂ Budget Grid
for file_name in sorted(os.listdir(land_carbon_loss_dir)):
    file_path = os.path.join(land_carbon_loss_dir, file_name)
    file_data = preprocess_geotiff(file_path)
    file_data['Source'] = 'Land_Carbon_Loss'
    merged_data = pd.concat([merged_data, file_data], ignore_index=True)

In [None]:
# Drop any rows with missing CO₂ values
merged_data.dropna(subset=['CO2'], inplace=True)


In [None]:
merged_data

Unnamed: 0,Longitude,Latitude,CO2,Year,Source
0,-180.0,-90.000000,-9999.0,2015,Crop
1,-180.0,-88.994413,-9999.0,2015,Crop
2,-180.0,-87.988827,-9999.0,2015,Crop
3,-180.0,-86.983240,-9999.0,2015,Crop
4,-180.0,-85.977654,-9999.0,2015,Crop
...,...,...,...,...,...
1166395,180.0,85.977654,-9999.0,2020,Land_Carbon_Loss
1166396,180.0,86.983240,-9999.0,2020,Land_Carbon_Loss
1166397,180.0,87.988827,-9999.0,2020,Land_Carbon_Loss
1166398,180.0,88.994413,-9999.0,2020,Land_Carbon_Loss


In [None]:
# **2. Prepare Data for LSTM**
# Pivot the data so that each column represents the CO2 for a year
pivot_data = merged_data.pivot_table(index=['Longitude', 'Latitude', 'Source'], columns='Year', values='CO2').reset_index()

# Use only the years as columns
years = sorted([col for col in pivot_data.columns if isinstance(col, int)])
co2_data = pivot_data[years].values

In [None]:
# Normalize the CO₂ data
scaler = MinMaxScaler()
co2_scaled = scaler.fit_transform(co2_data)

In [None]:
# Reshape the data for LSTM: (samples, time steps, features)
X, y = [], []
n_steps = 5  # Number of years to look back
for i in range(n_steps, co2_scaled.shape[1]):
    X.append(co2_scaled[:, i - n_steps:i])
    y.append(co2_scaled[:, i])

X, y = np.array(X), np.array(y)

---

# **3.Modeling**

In [None]:
# **3. Build and Train LSTM Model with Accuracy Calculation**
# Define the LSTM model
model = Sequential()
model.add(LSTM(100, activation='relu', input_shape=(n_steps, X.shape[2])))
model.add(Dense(1))
model.summary()

  super().__init__(**kwargs)


In [None]:
# Compile the model with Mean Squared Error and accuracy as a metric
model.compile(optimizer='adam', loss='mse', metrics=['mean_squared_error'])

# Train the model with a smaller batch size
history = model.fit(X, y, epochs=10, verbose=1, batch_size=8)

Epoch 1/10


In [None]:
# Evaluate the model
loss, mse = model.evaluate(X, y, verbose=1)
print(f'Mean Squared Error: {mse}')

In [None]:
# **4. Predict CO₂ for Next 5 Years**
# Predict for the next 5 years using the last sequence
last_sequence = co2_scaled[:, -n_steps:]  # Last n_steps years of data
future_co2_predictions = []

for _ in range(5):
    prediction = model.predict(last_sequence)
    future_co2_predictions.append(prediction)
    last_sequence = np.hstack([last_sequence[:, 1:], prediction])

# Rescale the predictions back to the original scale
future_co2_predictions_rescaled = scaler.inverse_transform(np.array(future_co2_predictions).reshape(-1, 1))

# Convert to DataFrame
future_years = range(years[-1] + 1, years[-1] + 6)
future_predictions_df = pd.DataFrame(future_co2_predictions_rescaled, columns=['Predicted_CO2'])
future_predictions_df['Year'] = future_years

In [None]:
future_predictions_df

In [None]:
# Plot training history to visualize the loss over time
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss Over Epochs')
plt.ylabel('Loss (MSE)')
plt.xlabel('Epoch')
plt.legend()
plt.show()