In [1]:
import re
import torch
import torch.nn.functional as F
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
import spacy

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load Pre-trained BERT Model for Disaster Classification
MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = DistilBertTokenizer.from_pretrained(MODEL_NAME)
model = DistilBertForSequenceClassification.from_pretrained(MODEL_NAME) # 2 classes (Disaster/Non-Disaster)

In [3]:
# Load spaCy NER Model for Location & Disaster Type Extraction
nlp = spacy.load("en_core_web_sm")

In [4]:
# Disaster Keywords List
disaster_keywords = ["earthquake", "flood", "hurricane", "tornado", "wildfire", "tsunami", "landslide", "cyclone", "storm", "volcano", "drought"]

In [5]:
def preprocess_tweet(text):
    text = text.lower()
    text = re.sub(r'http\S+|www\S+', '', text)  # Remove URLs
    text = re.sub(r'[^a-zA-Z0-9 ]', '', text)  # Remove special characters
    return text.strip()

In [6]:
def classify_tweet(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
    outputs = model(**inputs)
    probs = F.softmax(outputs.logits, dim=1)  # Get probabilities
    disaster_prob = probs[0][1].item()
    non_disaster_prob = probs[0][0].item()

    # Ensure disaster_prob is significantly higher than non_disaster_prob
    if disaster_prob > 0.7 and disaster_prob > non_disaster_prob + 0.2:
        return "Disaster"
    return "Non-Disaster"

In [8]:
def extract_info(text):
    doc = nlp(text)
    locations = [ent.text for ent in doc.ents if ent.label_ == "GPE"]  # GPE = Geopolitical Entity
    disaster_type = [word for word in text.split() if word in disaster_keywords]  # Match keywords in text
    return {
        "locations": locations if locations else "No location detected",
        "disaster_type": disaster_type if disaster_type else "No disaster type detected"
    }

In [9]:
def analyze_tweet(tweet):
    cleaned_tweet = preprocess_tweet(tweet)
    classification = classify_tweet(cleaned_tweet)
    extracted_info = extract_info(cleaned_tweet) if classification == "Disaster" else {"locations": "N/A", "disaster_type": "N/A"}
    return {"classification": classification, **extracted_info}

In [10]:
# Example Tweets
test_tweet1 = "Breaking: A 6.5 earthquake hit Los Angeles! Stay safe!"
test_tweet2 = "hi"
test_tweet3 = "There's a hurricane warning for Florida. Be prepared!"
print(analyze_tweet(test_tweet1))
print(analyze_tweet(test_tweet2))
print(analyze_tweet(test_tweet3))

: 

In [76]:
from transformers import pipeline

classifier = pipeline("text-classification", model="distilbert/distilbert-base-uncased-finetuned-sst-2-english")

tweet = "no"
result = classifier(tweet)
print(result)

Device set to use cpu


[{'label': 'NEGATIVE', 'score': 0.9964459538459778}]


In [1]:
from transformers import pipeline

# Load a pre-trained NER model (or your custom NER model for location extraction)
ner = pipeline("ner", model="dslim/bert-base-NER")

# Example disaster-related text
text = "Flooding reported in Jakarta, Indonesia."

# Extract named entities (locations)
entities = ner(text)
locations = [entity['word'] for entity in entities if entity['entity'] == 'B-LOC']
print("Locations:", locations)


  from .autonotebook import tqdm as notebook_tqdm


RuntimeError: Failed to import transformers.models.bert.modeling_tf_bert because of the following error (look up to see its traceback):
Your currently installed version of Keras is Keras 3, but this is not yet supported in Transformers. Please install the backwards-compatible tf-keras package with `pip install tf-keras`.