In [None]:
pip install tweepy geopandas shapely pandas

### Use this for searching all around the world (without geolocation limitations)

In [None]:
import tweepy
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# === Configuration ===
# Replace with your Twitter API Bearer Token
bearer_token = "your_bearer_token_here"

# Initialize the client
client = tweepy.Client(bearer_token=bearer_token, wait_on_rate_limit=True)

# Define your query: tweets containing "flood" OR "banjir" and "Indonesia" (excluding retweets)
query = '(flood OR banjir) -is:retweet'

# Define your time range in ISO 8601 format (UTC); note that recent search only supports past 7 days
# Adjust these dates as needed (e.g., for this month, if within the last 7 days)
start_time = "2025-03-01T00:00:00Z"
end_time   = "2025-03-04T00:00:00Z"

# === Searching Tweets with Geo Information ===
# Request tweet fields (created_at and geo), expansions for geo.place_id, and place fields (including bounding box)
tweets = client.search_recent_tweets(
    query=query,
    tweet_fields=['created_at', 'geo'],
    expansions=['geo.place_id'],
    place_fields=['full_name', 'geo'],
    start_time=start_time,
    end_time=end_time,
    max_results=100  # Adjust as needed (max allowed per request is 100)
)

# Check if any tweets were returned
if tweets.data is None:
    print("No tweets found with the given query and time range.")
    exit()

# Build a mapping from place_id to place info
places = {}
if tweets.includes and "places" in tweets.includes:
    for place in tweets.includes["places"]:
        places[place.id] = place

data = []
# Process each tweet to extract location info
for tweet in tweets.data:
    if tweet.geo and "place_id" in tweet.geo:
        place_id = tweet.geo["place_id"]
        # Check if the place info exists and has bounding box data
        if place_id in places and places[place_id].geo and "bbox" in places[place_id].geo:
            bbox = places[place_id].geo["bbox"]  # Format: [west_long, south_lat, east_long, north_lat]
            # Calculate the centroid of the bounding box
            lon = (bbox[0] + bbox[2]) / 2
            lat = (bbox[1] + bbox[3]) / 2
            data.append({
                "id": tweet.id,
                "date": tweet.created_at,
                "content": tweet.text,
                "latitude": lat,
                "longitude": lon,
                "place": places[place_id].full_name
            })

# Create a DataFrame from the extracted data
df = pd.DataFrame(data)
print(f"Found {len(df)} tweets with location data.")

# === Creating a GeoDataFrame and Exporting ===
if not df.empty:
    # Create a geometry column from longitude and latitude
    df['geometry'] = df.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)
    gdf = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326")  # WGS84

    # Export to GeoJSON for QGIS
    geojson_path = "tweets_flood_indonesia.geojson"
    gdf.to_file(geojson_path, driver="GeoJSON")
    print(f"GeoJSON saved to {geojson_path}")

    # Export to Shapefile for QGIS
    shapefile_path = "tweets_flood_indonesia.shp"
    gdf.to_file(shapefile_path)
    print(f"Shapefile saved to {shapefile_path}")
else:
    print("No tweets with geo information were found.")


### Use this to search the tweet inside certain coordinates (in the example is the extent for Indonesia)

In [None]:
import tweepy
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# === Configuration ===
# Replace with your Twitter API Bearer Token
bearer_token = "insert_your_bearer_token_here"

# Initialize the client
client = tweepy.Client(bearer_token=bearer_token, wait_on_rate_limit=True)

# Define your query: tweets containing "flood" OR "banjir" and "Indonesia" (excluding retweets)
query = '(flood OR banjir) -is:retweet'

# Define your time range in ISO 8601 format (UTC)
start_time = "2025-03-01T00:00:00Z"
end_time   = "2025-03-04T00:00:00Z"

# === Helper: Check if coordinates are in Indonesia ===
def in_indonesia(lat, lon):
    # Approximate bounding box for Indonesia:
    # Latitude roughly between -11 and 6, longitude between 95 and 141.
    return (-11 <= lat <= 6) and (95 <= lon <= 141)

# === Searching Tweets with Geo Information ===
tweets = client.search_recent_tweets(
    query=query,
    tweet_fields=['created_at', 'geo'],
    expansions=['geo.place_id'],
    place_fields=['full_name', 'geo'],
    start_time=start_time,
    end_time=end_time,
    max_results=100  # Maximum allowed per request; adjust as needed
)

if tweets.data is None:
    print("No tweets found with the given query and time range.")
    exit()

# Build a mapping from place_id to place info
places = {}
if tweets.includes and "places" in tweets.includes:
    for place in tweets.includes["places"]:
        places[place.id] = place

data = []
for tweet in tweets.data:
    if tweet.geo and "place_id" in tweet.geo:
        place_id = tweet.geo["place_id"]
        # Check if the place info exists and has bounding box data
        if place_id in places and places[place_id].geo and "bbox" in places[place_id].geo:
            bbox = places[place_id].geo["bbox"]  # Format: [west_long, south_lat, east_long, north_lat]
            # Calculate the centroid of the bounding box
            lon = (bbox[0] + bbox[2]) / 2
            lat = (bbox[1] + bbox[3]) / 2

            # Filter out points that are not in Indonesia
            if in_indonesia(lat, lon):
                data.append({
                    "id": tweet.id,
                    "date": tweet.created_at,
                    "content": tweet.text,
                    "latitude": lat,
                    "longitude": lon,
                    "place": places[place_id].full_name
                })
            else:
                print(f"Filtered out tweet {tweet.id} - computed coordinates ({lat}, {lon}) not in Indonesia.")
        else:
            print(f"Tweet {tweet.id} has no valid bounding box info.")
    else:
        print(f"Tweet {tweet.id} does not include geo information.")

# Create a DataFrame from the extracted data
df = pd.DataFrame(data)
print(f"Found {len(df)} tweets with location data in Indonesia.")

# === Creating a GeoDataFrame and Exporting ===
if not df.empty:
    # Create geometry column from longitude and latitude
    df['geometry'] = df.apply(lambda row: Point(row['longitude'], row['latitude']), axis=1)
    gdf = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326")  # WGS84

    # Export to GeoJSON for QGIS
    geojson_path = "tweets_flood_indonesia.geojson"
    gdf.to_file(geojson_path, driver="GeoJSON")
    print(f"GeoJSON saved to {geojson_path}")

    # Export to Shapefile for QGIS
    shapefile_path = "tweets_flood_indonesia.shp"
    gdf.to_file(shapefile_path)
    print(f"Shapefile saved to {shapefile_path}")
else:
    print("No tweets with geo information in Indonesia were found.")


Rate limit exceeded. Sleeping for 724 seconds.


Tweet 1896712184765223096 does not include geo information.
Tweet 1896712171759022586 does not include geo information.
Tweet 1896712167790948626 does not include geo information.
Tweet 1896712158907650522 does not include geo information.
Tweet 1896712156265292115 does not include geo information.
Tweet 1896712149080420607 does not include geo information.
Tweet 1896712145854996875 does not include geo information.
Tweet 1896712136132612335 does not include geo information.
Tweet 1896712135612530907 does not include geo information.
Tweet 1896712121695739996 does not include geo information.
Tweet 1896712114167030107 does not include geo information.
Tweet 1896712072609890353 does not include geo information.
Tweet 1896712066754621620 does not include geo information.
Tweet 1896712060647735452 does not include geo information.
Tweet 1896711999616385190 does not include geo information.
Tweet 1896711994184700013 does not include geo information.
Tweet 1896711985145995330 does not inclu

PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'tweets_flood_indonesia.geojson'