In [93]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns


In [94]:
# Load the data
data = pd.read_csv('data.csv')

In [95]:
# Feature engineering - create an afforestation suitability score based on domain knowledge
data['afforestation_score'] = (data['Average Annual Rainfall (inches)'] * 0.3 + data['Soil Suitability (0 to 1)'] * 0.4 + data['Wildlife Benefit Potential (0 to 1)'] * 0.2 - (data['Population'] / 1000000) * 0.1            )

In [141]:
data["afforestation_score"] = (data["afforestation_score"] - data["afforestation_score"].min()) / (data["afforestation_score"].max() - data["afforestation_score"].min())
data["good_for_afforestation"] = (data["afforestation_score"] > 0.6).astype(int)

In [97]:
# Select features for modeling
features = ['Average Annual Rainfall (inches)', 'Soil Suitability (0 to 1)',
           'Wildlife Benefit Potential (0 to 1)', 'Population']

In [158]:
X = data[features]
y = data['good_for_afforestation']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [159]:
for i in range(0,61):
  print(y[i])

0
0
0
1
0
0
0
1
0
0
1
1
0
0
0
0
0
0
1
1
1
1
1
1
1
0
0
1
0
0
0
1
1
0
1
1
1
1
1
1
1
0
0
0
0
1
1
1
1
1
1
1
0
0
0
0
1
0
0
0
0


In [160]:
xgb_model = xgb.XGBClassifier(
    max_depth=4,
    learning_rate=0.1,
    n_estimators=100,
    objective="binary:logistic",
    random_state=42,
)
xgb_model.fit(X_train_scaled, y_train)


In [161]:
# Make predictions
xgb_predictions = xgb_model.predict(X_test_scaled)

In [162]:
# Evaluate XGBoost model
print("\nXGBoost Model Results:")
print(f"Accuracy: {accuracy_score(y_test, xgb_predictions):.4f}")
print("\nClassification Report:")
print(classification_report(y_test, xgb_predictions))




XGBoost Model Results:
Accuracy: 0.9000

Classification Report:
              precision    recall  f1-score   support

           0       0.83      1.00      0.91         5
           1       1.00      0.80      0.89         5

    accuracy                           0.90        10
   macro avg       0.92      0.90      0.90        10
weighted avg       0.92      0.90      0.90        10



In [180]:
# Function to get afforestation suitability by state
def get_afforestation_locations(state, model, features, scaler):
    """
    Input: State name
    Output: List of suitable locations for afforestation in the specified state.
    """
    state_data = data[data["State"] == state].copy()

    if state_data.empty:
        return f"No data available for {state}."

    # Prepare features for prediction
    X_state = state_data[features]
    X_state_scaled = scaler.transform(X_state)

    # Predict suitability
    predictions = model.predict(X_state_scaled)
    probabilities = model.predict_proba(X_state_scaled)[:, 1]  # Probability of class 1

    # Add predictions to the state data
    state_data["Prediction"] = predictions
    state_data["Probability"] = probabilities

    # Filter for good locations (Prediction == 1)
    good_locations = state_data[state_data["Prediction"] == 1]

    if good_locations.empty:
        return f"No suitable locations found for afforestation in {state}."

    # Return only the location names
    return good_locations[["City", "Probability"]].sort_values(by="Probability", ascending=False)



In [181]:
# Basic Input-Output system
def main():
    state_input = input("Enter the state you want to check for afforestation suitability: ")
    result = get_afforestation_locations(state_input, xgb_model, features, scaler)

    if isinstance(result, str):  # If the result is a message (e.g., "No data available")
        print(result)
    else:
        print(f"Suitable locations for afforestation in {state_input}:")
        for index, row in result.iterrows():
            print(f"- {row['City']} (Probability: {row['Probability']:.4f})")


In [182]:
def predict_afforestation_suitability(model):
    # Create a feature array for the new location
    rainfall = float(input("Enter rainfall in inches"))
    soil_suitability = float(input("Enter soil suitability (0 to 1)"))
    wildlife_potential = float(input("Enter wildlife potential (0 to 1)"))
    population = float(input("Enter population"))
    new_location = np.array(
        [[rainfall, soil_suitability, wildlife_potential, population]]
    )

    # Scale the features
    new_location_scaled = scaler.transform(new_location)

    # Make prediction
    prediction = model.predict(new_location_scaled)[0]
    probability = model.predict_proba(new_location_scaled)[0][1]

    if prediction == 1:
        suitability = "Good"
    else:
        suitability = "Not Good"

    return suitability, probability*100


In [184]:
main()

Enter the state you want to check for afforestation suitability: Oregon
Suitable locations for afforestation in Oregon:
- Eugene (Probability: 0.9578)
- Salem (Probability: 0.9578)
- Corvallis (Probability: 0.9578)


In [179]:
predict_afforestation_suitability(xgb_model)

KeyboardInterrupt: Interrupted by user