# Feature Engineering Notebook

In [2]:
# Import libraries
import pandas as pd
import numpy as np

In [3]:
# Load cleaned data
data_path = './data/processed/geo_df.csv'
df = pd.read_csv(data_path)

In [4]:
# 1. Population Density
df['population_density'] = df['population'] / df['agland']  # Population per agricultural land unit

# 2. Agricultural Intensity (Yield per Area)
df['yield_per_area'] = df['yield(tonnes)'] / df['agland']  # Yield per unit of agricultural land

# 3. Pest Usage Intensity
# Assuming that higher pest usage might correlate with areas prone to erosion due to soil degradation
df['pest_intensity'] = df['pestUse(kg/ha)'] / df['agland']

In [7]:
# 4. Climate Zone (Categorical Feature)
df['climate_zone'] = pd.cut(
    df['Latitude'].abs(),
    bins=[0, 23.5, 45, 66.5, 90],
    labels=['Tropical', 'Subtropical', 'Temperate', 'Polar']
)

In [9]:
# 5. Decade for Time-Based Trends
df['decade'] = (pd.to_datetime(df['Year']).dt.year // 10) * 10

In [13]:
df['decade']

0       2020
1       2020
2       2020
3       2020
4       2020
        ... 
9825    2020
9826    2020
9827    2020
9828    2020
9829    2020
Name: decade, Length: 9830, dtype: int32

In [14]:
# 6. Save Feature-Engineered Data
df.to_csv('./data/processed/feature_engineered_data.csv', index=False)
print("Feature-engineered data saved to './data/processed/feature_engineered_data.csv'")

Feature-engineered data saved to './data/processed/feature_engineered_data.csv'
