In [1]:
import json
import warnings

import pandas as pd

from TwitterAPI import TwitterAPI

warnings.filterwarnings("ignore")

In [2]:
with open("../config/TwitterAPI.json") as f:
    config = json.loads(f.read())

api = TwitterAPI(config["main"])

In [3]:
start_date = "20230203"
end_date = "20230301"
query = 'has:geo ("Derail" OR "Vinyl chloride" OR "Train Crash" OR "Ohio Train" OR "Derailment")'


def get_tweet_geo(start_date: str, end_date: str, query: str):
    params = {"geo": "place_id"}
    df_raw = api.search_tweets(query, params=params, start_time=start_date, end_time=end_date) 
    df_tweets = df_raw.dropna(axis=0)
    df_tweets["Timestamp"] = pd.to_datetime(df_tweets["created_at"])
    df_tweets["Date"] = df_tweets["Timestamp"].dt.strftime("%Y%m%d")
    df_tweets["PlaceId"] = df_tweets["geo"].apply(lambda x:x["place_id"])
    df_tweets["Text"] = api.parse_tweet(df_tweets["text"])
    df_tweets["Retweet"] = df_tweets["public_metrics"].apply(lambda x: x["retweet_count"])
    df_tweets["Reply"] = df_tweets["public_metrics"].apply(lambda x: x["reply_count"])
    df_tweets["Like"] = df_tweets["public_metrics"].apply(lambda x: x["like_count"])
    df_tweets["Quote"] = df_tweets["public_metrics"].apply(lambda x: x["quote_count"])
    df_tweets["Impression"] = df_tweets["public_metrics"].apply(lambda x: x["impression_count"])
    df_tweets = df_tweets.rename(columns={"author_id": "AuthorId", "id": "Id"})
    columns = [col for col in df_tweets.columns if col[0].upper() == col[0]]
    return df_tweets[columns].set_index(["Date"]).sort_index()

df_tweets = get_tweet_geo(start_date, end_date, query)

In [6]:
df_tweets.to_csv("../data/twitter/GeoTweets.csv", index_label="Date", sep="\t")