In [2]:
import pandas as pd

# Base path
BASE_PATH = "../data/raw/"

# Load datasets
df  = pd.read_csv(BASE_PATH + "disaster_response_messages_training.csv")
dft = pd.read_csv(BASE_PATH + "disaster_response_messages_test.csv")
dfv = pd.read_csv(BASE_PATH + "disaster_response_messages_validation.csv")


  df  = pd.read_csv(BASE_PATH + "disaster_response_messages_training.csv")


In [3]:
SEVERITY_LABELS = ["low", "medium", "high", "critical"]


In [4]:
DISASTER_BASE_SCORE = {
    "earthquake": 3.0,
    "fire": 2.8,
    "flood": 2.5,
    "landslide": 2.6,
    "storm": 2.2,
    "accident": 2.0,
    "other": 1.5
}


In [5]:
CRITICAL_KEYWORDS = [
    "dead", "killed", "collapsed", "trapped",
    "buried", "missing", "rescue"
]

HIGH_KEYWORDS = [
    "injured", "bleeding", "destroyed",
    "fire spreading", "flood rising"
]

MEDIUM_KEYWORDS = [
    "urgent", "emergency", "help",
    "evacuation", "damage"
]

LOW_KEYWORDS = [
    "minor", "small fire", "water logging",
    "power cut"
]


In [6]:
def intensity_score(text):
    score = 0
    if sum(1 for c in text if c.isupper()) / max(len(text), 1) > 0.3:
        score += 0.5
    if text.count("!") >= 2:
        score += 0.3
    if len(text.split()) > 20:
        score += 0.4
    return score


In [7]:
def compute_severity_score(text, disaster_type):
    text_lower = text.lower()
    
    score = DISASTER_BASE_SCORE.get(disaster_type, 1.5)

    for kw in CRITICAL_KEYWORDS:
        if kw in text_lower:
            score += 2.0

    for kw in HIGH_KEYWORDS:
        if kw in text_lower:
            score += 1.5

    for kw in MEDIUM_KEYWORDS:
        if kw in text_lower:
            score += 1.0

    for kw in LOW_KEYWORDS:
        if kw in text_lower:
            score += 0.5

    score += intensity_score(text)
    return round(score, 2)


In [8]:
def severity_label(score):
    if score < 2.0:
        return "low"
    elif score < 3.5:
        return "medium"
    elif score < 5.0:
        return "high"
    else:
        return "critical"


In [9]:
def severity_pipeline(message, disaster_type):
    score = compute_severity_score(message, disaster_type)
    label = severity_label(score)

    return {
        "severity": label,
        "severity_score": score
    }


In [10]:
LOW_IMPACT_CONTEXT = [
    "water logging", "power cut", "traffic", "road blocked"
]

def apply_context_cap(score, text, disaster_type):
    text = text.lower()

    if disaster_type == "earthquake":
        for kw in LOW_IMPACT_CONTEXT:
            if kw in text:
                return min(score, 2.0)  # cap at MEDIUM
    return score


In [11]:
def severity_pipeline(message, disaster_type):
    score = compute_severity_score(message, disaster_type)
    score = apply_context_cap(score, message, disaster_type)
    label = severity_label(score)

    return {
        "severity": label,
        "severity_score": round(score, 2)
    }


In [12]:
test_msg = "Water logging reported on main road after rain"
test_disaster = "flood"

severity_pipeline(test_msg, test_disaster)


{'severity': 'medium', 'severity_score': 3.0}

In [13]:
results = []

for _, row in dft.iterrows():
    res = severity_pipeline(row["message"], "flood")
    results.append(res["severity"])

pd.Series(results).value_counts()


medium      2137
high         436
critical      56
Name: count, dtype: int64

### ðŸ”¢ Severity Score Ranges (Feature-3)

| **Severity Score** | **Severity Label** | **Interpretation** |
|-------------------|--------------------|--------------------|
| **0.0 â€“ 1.9** | **Low** | Minor incident, low impact, no immediate danger |
| **2.0 â€“ 3.4** | **Medium** | Noticeable impact, needs attention |
| **3.5 â€“ 4.9** | **High** | Serious situation, response required |
| **â‰¥ 5.0** | **Critical** | Life-threatening, urgent emergency |


> **Note:** Severity scores are computed using disaster type, urgency keywords, and message intensity signals. These thresholds are empirically chosen based on observed score distributions.


In [14]:
SEVERITY_MAX_SCORE = 7.0


In [15]:
PRIORITY_WEIGHTS = {
    "severity": 0.40,
    "reliability": 0.30,
    "confidence": 0.20,
    "urgency": 0.10
}

URGENCY_KEYWORDS = [
    "urgent", "asap", "immediately",
    "rescue", "help", "trapped"
]

In [16]:
def normalize_severity(severity_score, max_score=SEVERITY_MAX_SCORE):
    """
    Normalize severity score to 0â€“1 range
    """
    return min(severity_score / max_score, 1.0)

In [17]:
def reliability_score(is_fake, model_confidence):
    """
    Convert Feature-2 output to reliability score
    """
    if is_fake:
        return 0.0
    return max(0.0, min(model_confidence, 1.0))


In [18]:

def urgency_score(text):
    """
    Compute urgency score from message text
    """
    text = text.lower()
    score = 0.0

    for kw in URGENCY_KEYWORDS:
        if kw in text:
            score += 0.2

    return min(score, 1.0)


In [19]:
def priority_level(score):
    """
    Convert priority score to label
    """
    if score < 0.25:
        return "low"
    elif score < 0.50:
        return "medium"
    elif score < 0.75:
        return "high"
    else:
        return "very_high"

In [20]:
def priority_pipeline(
    message,
    severity_score,
    disaster_confidence,
    is_fake,
    reliability_confidence
):  
        sev = normalize_severity(severity_score)
        rel = reliability_score(is_fake, reliability_confidence)
        urg = urgency_score(message)

        priority_score = (
            PRIORITY_WEIGHTS["severity"] * sev +
            PRIORITY_WEIGHTS["reliability"] * rel +
            PRIORITY_WEIGHTS["confidence"] * disaster_confidence +
            PRIORITY_WEIGHTS["urgency"] * urg
        )

        priority_score = round(priority_score, 3)

        return {
            "priority_score": priority_score,
            "priority_level": priority_level(priority_score)
        }

In [21]:
priority_pipeline(
    message="People trapped after building collapse, urgent rescue needed",
    severity_score=6.2,
    disaster_confidence=0.92,
    is_fake=False,
    reliability_confidence=0.91
)

{'priority_score': 0.871, 'priority_level': 'very_high'}

In [23]:
dft.columns


Index(['id', 'split', 'message', 'original', 'genre', 'related', 'PII',
       'request', 'offer', 'aid_related', 'medical_help', 'medical_products',
       'search_and_rescue', 'security', 'military', 'child_alone', 'water',
       'food', 'shelter', 'clothing', 'money', 'missing_people', 'refugees',
       'death', 'other_aid', 'infrastructure_related', 'transport',
       'buildings', 'electricity', 'tools', 'hospitals', 'shops',
       'aid_centers', 'other_infrastructure', 'weather_related', 'floods',
       'storm', 'fire', 'earthquake', 'cold', 'other_weather',
       'direct_report'],
      dtype='object')

In [24]:
DISASTER_COLUMNS = [
    "earthquake",
    "fire",
    "floods",
    "storm",
    "cold",
    "other_weather"
]


In [25]:
def extract_disaster_type(row):
    for col in DISASTER_COLUMNS:
        if col in row and row[col] == 1:
            # normalize naming
            if col == "floods":
                return "flood"
            return col
    return "other"


In [26]:
dft["disaster_type"] = dft.apply(extract_disaster_type, axis=1)


In [28]:
dft["disaster_type"].value_counts()


disaster_type
other            1898
flood             236
earthquake        227
storm             169
other_weather      63
cold               25
fire               11
Name: count, dtype: int64

In [32]:
print("severity_score" in dft.columns)


False


In [33]:
dft["severity_score"] = dft.apply(
    lambda row: severity_pipeline(
        row["message"],
        row["disaster_type"]
    )["severity_score"],
    axis=1
)


In [34]:
dft[["message", "disaster_type", "severity_score"]].head()


Unnamed: 0,message,disaster_type,severity_score
0,UN reports Leogane 80-90 destroyed. Only Hospi...,other,3.0
1,We are at Gressier we needs assistance right a...,flood,3.5
2,"Delmas 33 in Silo, need water.",other,1.5
3,"SOS SOS, please provide police officers on the...",other,1.5
4,"I am a driver, a mechanic ,. I want to help",other,2.5


In [35]:
import numpy as np

# For priority testing
dft["is_fake"] = False
dft["disaster_confidence"] = np.random.uniform(0.6, 0.95, size=len(dft))
dft["reliability_confidence"] = np.random.uniform(0.7, 0.95, size=len(dft))


In [36]:
dft["priority_output"] = dft.apply(
    lambda row: priority_pipeline(
        message=row["message"],
        severity_score=row["severity_score"],   # <-- NOW EXISTS
        disaster_confidence=row["disaster_confidence"],
        is_fake=row["is_fake"],
        reliability_confidence=row["reliability_confidence"]
    ),
    axis=1
)

dft["priority_score"] = dft["priority_output"].apply(lambda x: x["priority_score"])
dft["priority_level"] = dft["priority_output"].apply(lambda x: x["priority_level"])



In [37]:
dft[["message", "severity_score", "priority_score", "priority_level"]].head()


Unnamed: 0,message,severity_score,priority_score,priority_level
0,UN reports Leogane 80-90 destroyed. Only Hospi...,3.0,0.607,high
1,We are at Gressier we needs assistance right a...,3.5,0.608,high
2,"Delmas 33 in Silo, need water.",1.5,0.487,medium
3,"SOS SOS, please provide police officers on the...",1.5,0.474,medium
4,"I am a driver, a mechanic ,. I want to help",2.5,0.622,high
