<a href="https://colab.research.google.com/github/hamidi311/Population-Collapse-Analysis/blob/main/Collapse_Predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# Load labeled data from GitHub
url = "https://raw.githubusercontent.com/hamidi311/Population-Collapse-Analysis/main/ML_model/labeled_population_data.csv"
df = pd.read_csv(url)

# Filter recent years
df = df[df['Year'] >= 2010]

# Drop missing values from key columns
df = df[['Fertility Rate', 'Population Over 65', 'Status']].dropna().copy()

# Encode the labeled status (e.g. 'Collapsing') into numbers
le = LabelEncoder()
df['status_encoded'] = le.fit_transform(df['Status'])

# Features and labels
X = df[['Fertility Rate', 'Population Over 65']]
y = df['status_encoded']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train ML model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
print("Evaluation Report:")
print(classification_report(y_test, y_pred, target_names=le.classes_))

# Prediction function using trained model
def predict_status(fertility_rate, pop_over_65):
    input_df = pd.DataFrame([[fertility_rate, pop_over_65]],
                            columns=['Fertility Rate', 'Population Over 65'])
    prediction_encoded = model.predict(input_df)[0]
    prediction_label = le.inverse_transform([prediction_encoded])[0]
    return prediction_label

# Interactive input
def interactive_prediction():
    print("\n📍 Population Collapse Risk Predictor (ML-powered)")
    country = input("Enter Country Name: ")
    fertility_rate = float(input("Enter Fertility Rate: "))
    pop_over_65 = float(input("Enter Population Over 65 (%): "))

    prediction = predict_status(fertility_rate, pop_over_65)
    print(f"\n🔍 Prediction for {country}: {prediction}")

# Run interactive CLI
interactive_prediction()


Evaluation Report:
                       precision    recall  f1-score   support

At Risk of collapsing       1.00      1.00      1.00       187
           Collapsing       1.00      1.00      1.00       134
              Growing       1.00      1.00      1.00       351

             accuracy                           1.00       672
            macro avg       1.00      1.00      1.00       672
         weighted avg       1.00      1.00      1.00       672


📍 Population Collapse Risk Predictor (ML-powered)
Enter Country Name: algeria 
Enter Fertility Rate: 1
Enter Population Over 65 (%): 1

🔍 Prediction for algeria : At Risk of collapsing


In [19]:
import pandas as pd

# Load your CSV file (make sure it's in your working directory or use the full path)
df = pd.read_csv("predi_model_data.csv")  # Replace with your actual file name if different

# Filter from 2010 onward
df = df[df['Year'] >= 2010]

# Drop missing values from important columns
df = df[['Year', 'Country Name', 'Fertility Rate', 'Population Over 65']].dropna().copy()

# Apply label logic
def label_status(row):
    if row['Fertility Rate'] < 2.1 and row['Population Over 65'] > 15:
        return 'Collapsing'
    elif row['Fertility Rate'] < 2.1 or row['Population Over 65'] > 15:
        return 'At Risk of collapsing'
    else:
        return 'Growing'

df['Status'] = df.apply(label_status, axis=1)

# Save to a new CSV file
df.to_csv("labeled_population_data.csv", index=False)
print("✅ Data labeled and saved as labeled_population_data.csv")


✅ Data labeled and saved as labeled_population_data.csv
