Data Collection from Twitter

In [None]:
import time
import tweepy
import pandas as pd

# Authenticate (Replace with your actual keys)
client = tweepy.Client(bearer_token="AAAAAAAAAAAAAAAAAAAAAA1%2BzgEAAAAAmMjwePvGSh7YaljizYO3tbdNl0Q%3Dyytf5hkyE6oan8fQ9WiIflg9mrfzWRdw3ZywFXyCoO3wrEb08b")

query = "disaster relief -is:retweet lang:en"

# CSV file setup
csv_filename = "tweets.csv"

# Function to save tweets
def save_tweets(tweets):
    data = []

    for tweet in tweets.data:
        data.append([tweet.created_at, tweet.id, tweet.text])

    df = pd.DataFrame(data, columns=["Timestamp", "Tweet ID", "Tweet Text"])

    # Append to CSV (without overwriting)
    df.to_csv(csv_filename, mode="a", index=False, header=not pd.io.common.file_exists(csv_filename))
    print(f"Saved {len(df)} tweets to {csv_filename}")

# Fetch tweets at regular intervals
while True:
    try:
        tweets = client.search_recent_tweets(query=query, max_results=10, tweet_fields=["created_at"])

        if tweets.data:
            save_tweets(tweets)
        else:
            print("No new tweets found.")

        print("Waiting 5 minutes before next fetch...\n")
        time.sleep(300)

    except tweepy.TooManyRequests:
        print("Rate limit hit. Waiting for 15 minutes...")
        time.sleep(900)


In [None]:
import pandas as pd
import re

# Load tweets
df = pd.read_csv("tweets.csv")

# Basic text cleaning function
def clean_text(text):
    text = str(text)
    text = re.sub(r"http\S+", "", text)                 # Remove URLs
    text = re.sub(r"@\w+", "", text)                    # Remove mentions
    text = re.sub(r"#", "", text)                       # Remove hashtags
    text = re.sub(r"[^A-Za-z0-9\s]", "", text)          # Remove special characters
    text = re.sub(r"\s+", " ", text)                    # Remove extra spaces
    text = text.lower()                                 # Convert to lowercase
    return text.strip()

df["Cleaned_Text"] = df["Tweet Text"].apply(clean_text)

# Save to new CSV
df.to_csv("cleaned_tweets.csv", index=False)
print("Preprocessing complete. Saved to 'cleaned_tweets.csv'")


In [None]:
df = pd.read_csv("cleaned_tweets.csv")
df.head()
df.info()

#✅ Step 3: Sentiment & Emotion Classification

In [None]:
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline

# Load cleaned tweets
df = pd.read_csv("cleaned_tweets.csv")

# Load sentiment model & tokenizer
sentiment_model = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment")

# Load emotion model
emotion_model = pipeline("text-classification", model="nateraw/bert-base-uncased-emotion", return_all_scores=False)


In [None]:
# Apply sentiment analysis
df["Sentiment"] = df["Cleaned_Text"].apply(lambda x: sentiment_model(x)[0]['label'])

# Apply emotion classification
# Access the 'label' from the first element of the list returned by emotion_model
df["Emotion"] = df["Cleaned_Text"].apply(lambda x: emotion_model(x)[0]['label'])

# Save results
df.to_csv("analyzed_tweets.csv", index=False)
print("Sentiment and emotion classification done. Saved to 'analyzed_tweets.csv'")

In [None]:
df = pd.read_csv("analyzed_tweets.csv")
df.head()

In [None]:
# prompt: Make a pie chart or bar graph for Sentiment: Positive / Neutral / Negative
# Emotion: Joy / Anger / Sadness / etc.

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the analyzed tweets data
df = pd.read_csv("analyzed_tweets.csv")

# Sentiment Analysis Visualization
plt.figure(figsize=(8, 6))
sentiment_counts = df['Sentiment'].value_counts()
plt.pie(sentiment_counts, labels=sentiment_counts.index, autopct='%1.1f%%', startangle=90)
plt.title('Sentiment Distribution of Tweets')
plt.show()

# Emotion Analysis Visualization
plt.figure(figsize=(10, 6))
sns.countplot(x='Emotion', data=df)
plt.title('Emotion Distribution of Tweets')
plt.xlabel('Emotion')
plt.ylabel('Number of Tweets')
plt.xticks(rotation=45, ha='right')
plt.show()


In [None]:
df.info()

#🔥 Step 4: Urgency Detection using Zero-Shot Classification

In [None]:
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")


In [None]:
candidate_labels = ["urgent", "informational", "misinformation", "irrelevant"]


In [None]:
def classify_urgency(text):
    result = classifier(text, candidate_labels)
    return result['labels'][0]  # Take top label

df["Urgency_Label"] = df["Cleaned_Text"].apply(classify_urgency)


In [None]:
df.to_csv("final_tweets_classified.csv", index=False)
print("Urgency classification complete. Saved to 'final_tweets_classified.csv'")


In [None]:
df = pd.read_csv("final_tweets_classified.csv")
df.head()

In [None]:
%%writefile app.py
import streamlit as st
import pandas as pd

st.set_page_config(layout="wide")
st.title("🌍 Real-Time Disaster Tweet Monitoring Dashboard")

@st.cache_data(ttl=60)
def load_data():
    df = pd.read_csv("final_tweets_classified.csv")
    return df

df = load_data()

col1, col2 = st.columns(2)

with col1:
    st.subheader("📈 Sentiment Distribution")
    sentiment_count = df['Sentiment'].value_counts()
    st.bar_chart(sentiment_count)

with col2:
    st.subheader("😶 Emotion Distribution")
    emotion_count = df['Emotion'].value_counts()
    st.bar_chart(emotion_count)

st.subheader("🚨 Urgency Classification")
urgency_count = df['Urgency_Label'].value_counts()
st.bar_chart(urgency_count)

st.subheader("🔍 Sample Classified Tweets")
st.dataframe(df[['Tweet Text', 'Sentiment', 'Emotion', 'Urgency_Label']].head(10))


In [None]:
!ngrok config add-authtoken 2vg6TKE6mK8PjXNKLTreD8Gax3t_3zyEMYGmKnmFJnrSNyhkG


In [None]:
from pyngrok import ngrok
import threading
import time

# Start the Streamlit app in a separate thread
def run():
    !streamlit run app.py &> logs.txt

thread = threading.Thread(target=run)
thread.start()
time.sleep(5)  # Wait for streamlit to spin up

# Open the public URL using ngrok
url = ngrok.connect(8501)
print("Streamlit app is live at:", url)


Validation steps for:

Sentiment

Emotion

Urgency

In [None]:

manual_labels = {
    1911415120518848659: ('LABEL_1', 'joy', 'informational'),    # Positive, hopeful relief, sharing info
    1911414915434237964: ('LABEL_1', 'joy', 'urgent'),           # Positive, hopeful relief, urgent tone
    1911414810266292620: ('LABEL_0', 'anger', 'informational'),  # Negative, denied relief, sharing news
    1911414690908864816: ('LABEL_1', 'joy', 'urgent'),           # Positive, hopeful relief, urgent tone
    1911414444631957835: ('LABEL_1', 'joy', 'informational'),    # Positive, hopeful relief, sharing info
    1911413975780110636: ('LABEL_1', 'joy', 'informational'),    # Positive, factual about FEMA, neutral joy
    1911413867676106767: ('LABEL_0', 'anger', 'informational'),  # Negative, angry accusations
    1911413830522974485: ('LABEL_1', 'joy', 'informational'),    # Positive, hopeful relief
    1911413443917132003: ('LABEL_0', 'anger', 'informational'),  # Negative, angry about ignored message
    1911413217152106578: ('LABEL_1', 'joy', 'informational'),    # Positive, hopeful relief
    1911451307468357738: ('LABEL_1', 'joy', 'informational'),    # Positive, proud of RSS efforts
    1911450778855735502: ('LABEL_0', 'anger', 'informational'),  # Negative, defensive and critical
    1911447273680085474: ('LABEL_0', 'anger', 'informational'),  # Negative, confrontational challenge
    1911446655120867634: ('LABEL_0', 'sadness', 'informational'),# Negative, clarifying relief source
    1911446202777747564: ('LABEL_1', 'joy', 'informational'),    # Positive, optimistic about AI
    1911445185487155410: ('LABEL_0', 'anger', 'urgent'),         # Negative, mocking denial of relief
    1911444903516602879: ('LABEL_0', 'sadness', 'urgent'),       # Negative, personal distress
    1911444543569871294: ('LABEL_1', 'joy', 'informational'),    # Positive, questioning funds positively
    1911440425430167745: ('LABEL_0', 'anger', 'informational'),  # Negative, critical of denial
    1911456421117374810: ('LABEL_0', 'anger', 'urgent'),         # Negative, urgent about denied relief
    1911456400179339717: ('LABEL_0', 'anger', 'informational'),  # Negative, critical of policy
    1911455280879055353: ('LABEL_0', 'anger', 'informational'),  # Negative, critical of policy
    1911455124423160248: ('LABEL_0', 'anger', 'informational'),  # Negative, correcting misconception
    1911454606279815383: ('LABEL_0', 'anger', 'informational'),  # Negative, critical of policy
    1911454368395591853: ('LABEL_0', 'anger', 'informational'),  # Negative, debunking conspiracy
    1911461133183000651: ('LABEL_0', 'sadness', 'urgent'),       # Negative, urgent tech failure
    1911460770510229767: ('LABEL_0', 'anger', 'informational'),  # Negative, angry defense
    1911460523549286636: ('LABEL_0', 'anger', 'informational'),  # Negative, sarcastic critique
    1911466915693764938: ('LABEL_1', 'sadness', 'urgent'),       # Positive, offering relief, sad context
    1911466234723311800: ('LABEL_0', 'anger', 'misinformation'), # Negative, angry conspiracy
    1911466014547603960: ('LABEL_0', 'sadness', 'informational'),# Negative, observational, neutral sadness
    1911465530445451376: ('LABEL_0', 'anger', 'informational'),  # Negative, critical of unit
    1911465460383523115: ('LABEL_0', 'anger', 'informational'),  # Negative, critical of inaction
    1911465161883267478: ('LABEL_0', 'anger', 'informational'),  # Negative, critical priorities
    1911465144770760866: ('LABEL_1', 'joy', 'informational'),    # Positive, listing welfare services
    1911464640824893520: ('LABEL_1', 'sadness', 'urgent'),       # Positive, urgent call to help, sad context
    1911462675898589404: ('LABEL_1', 'joy', 'informational'),    # Positive, clarifying FEMA policy
    1911462540728586708: ('LABEL_0', 'sadness', 'informational'),# Negative, questioning relevance
}

df["True_Sentiment"] = df["Tweet ID"].map(lambda x: manual_labels.get(x, (None, None, None))[0])
df["True_Emotion"] = df["Tweet ID"].map(lambda x: manual_labels.get(x, (None, None, None))[1])
df["True_Urgency_Label"] = df["Tweet ID"].map(lambda x: manual_labels.get(x, (None, None, None))[2])

df_eval = df.dropna(subset=["True_Sentiment", "True_Emotion", "True_Urgency_Label"])

df_eval[["Tweet ID", "Sentiment", "True_Sentiment", "Emotion", "True_Emotion", "Urgency_Label", "True_Urgency_Label"]].head()


In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

def plot_confusion_matrix(y_true, y_pred, labels, title):
    cm = confusion_matrix(y_true, y_pred, labels=labels)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
    plt.title(f'Confusion Matrix: {title}')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.tight_layout()
    plt.show()

plot_confusion_matrix(
    df_eval["True_Sentiment"],
    df_eval["Sentiment"],
    labels=["LABEL_0", "LABEL_1"],
    title="Sentiment"
)

emotion_labels = sorted(df_eval["True_Emotion"].dropna().unique().tolist())
plot_confusion_matrix(
    df_eval["True_Emotion"],
    df_eval["Emotion"],
    labels=emotion_labels,
    title="Emotion"
)

urgency_labels = sorted(df_eval["True_Urgency_Label"].dropna().unique().tolist())
plot_confusion_matrix(
    df_eval["True_Urgency_Label"],
    df_eval["Urgency_Label"],
    labels=urgency_labels,
    title="Urgency Label"
)


In [None]:
!pip install flask-ngrok transformers


In [None]:
# Replace YOUR_AUTHTOKEN below with your own ngrok token
!./ngrok authtoken 2vinwMFJRzrJWrtM1aIFker41Qo_3poE11V7v2SBtPQnv9nym


In [None]:
from flask import Flask, request, jsonify
from threading import Thread
from transformers import pipeline

app = Flask(__name__)

# Load model
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
categories = ["Distress", "Informational", "Misinformation"]

@app.route('/')
def home():
    return "Tweet Classifier API is live!"

@app.route('/classify', methods=['POST'])
def classify():
    data = request.get_json()
    tweet = data.get("text")

    if not tweet:
        return jsonify({"error": "No tweet text provided"}), 400

    result = classifier(tweet, categories)
    return jsonify({
        "label": result["labels"][0],
        "confidence": round(result["scores"][0], 3)
    })

def run():
    app.run(host='0.0.0.0', port=5000)

# Start Flask in background thread
Thread(target=run).start()


In [None]:
!pip install pyngrok

In [None]:
import os
import time
from pyngrok import ngrok

# Start ngrok tunnel
public_url = ngrok.connect(5000)
print("🚀 Public URL:", public_url)


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_name = "facebook/bart-large-mnli"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

tokenizer.save_pretrained("my_model")
model.save_pretrained("my_model")


In [None]:
import shutil

shutil.make_archive("my_model", 'zip', "my_model")


In [None]:
from google.colab import files
files.download("my_model.zip")
