In [1]:
# Dependencies
from TwitterAPI import TwitterAPI
import json
import pandas as pd

In [2]:
# Login using credentials stored in auth.json file

# Define authentication file path
auth_path = 'data/auth.json'

# Read auth.json file
with open(auth_path, 'r') as file:
    auth_dict = json.load(file)

# Execute login
api = TwitterAPI(**auth_dict)

# Verify credentials
req = api.request('account/verify_credentials')
assert req.status_code == 200, 'Authentication failed!'

In [3]:
# Retrieve tweets for @FeminismInIndia
res = api.request('search/tweets', {
    'q': 'from:FeminismInIndia',
    'tweet_mode': 'extended'
})

# Print response status
print('Response status: {:d}'.format(res.status_code))
print()

# Store retireved tweets
tweets = [tweet for tweet in res]
tweets

Response status: 200



[{'created_at': 'Wed Dec 25 09:00:01 +0000 2019',
  'id': 1209760698558865410,
  'id_str': '1209760698558865410',
  'full_text': 'देश के इस अराजक माहौल में, इस अघोषित आपातकाल में, हम उत्तेजित हैं, क्रोधित हैं, दुखी हैं। ऐसी और भी कई ढेर सारी भावनाओं ने हमारे भीतर एक कौतुहल मचा दिया है।\n\n#CAAProtest #MentalHealth \n\nhttps://t.co/uo6EdwmM9C',
  'truncated': False,
  'display_text_range': [0, 209],
  'entities': {'hashtags': [{'text': 'CAAProtest', 'indices': [158, 169]},
    {'text': 'MentalHealth', 'indices': [170, 183]}],
   'symbols': [],
   'user_mentions': [],
   'urls': [{'url': 'https://t.co/uo6EdwmM9C',
     'expanded_url': 'https://hindi.feminisminindia.com/2019/12/25/care-mental-health-during-protests-hindi/',
     'display_url': 'hindi.feminisminindia.com/2019/12/25/car…',
     'indices': [186, 209]}]},
  'metadata': {'iso_language_code': 'hi', 'result_type': 'recent'},
  'source': '<a href="https://about.twitter.com/products/tweetdeck" rel="nofollow">TweetDeck</a>',
  'in_

In [15]:
# Get entities
entities = []
for i, tweet in enumerate(tweets):
    # Retrieve hashtags
    for j, hashtag in enumerate(tweet.get('entities', {}).get('hashtags', [])):
        entities.append({
            # Id of the current tweet
            'tweet_id_str': tweet.get('id'),
            # Tweet type (e.g. hashtag)
            'type': 'hashtag',
            # Add actual hashtag text
            'text': hashtag.get('text')
        })
        
# Show entities
print('Entities retrieved:')
print(entities)

Entities retrieved:
[{'tweet_id_str': 1209760698558865410, 'type': 'hashtag', 'text': 'CAAProtest'}, {'tweet_id_str': 1209760698558865410, 'type': 'hashtag', 'text': 'MentalHealth'}, {'tweet_id_str': 1209753143518433281, 'type': 'hashtag', 'text': 'media'}, {'tweet_id_str': 1209753143518433281, 'type': 'hashtag', 'text': 'sexualviolence'}, {'tweet_id_str': 1209753143518433281, 'type': 'hashtag', 'text': 'LGBTQIA'}, {'tweet_id_str': 1209738044246351873, 'type': 'hashtag', 'text': 'queer'}, {'tweet_id_str': 1209722944559042562, 'type': 'hashtag', 'text': 'Manusmriti'}, {'tweet_id_str': 1209722944559042562, 'type': 'hashtag', 'text': 'ManusmritiDahanDiwas'}, {'tweet_id_str': 1209692745813483520, 'type': 'hashtag', 'text': 'Section144'}, {'tweet_id_str': 1209692745813483520, 'type': 'hashtag', 'text': 'IndianGovernment'}, {'tweet_id_str': 1209692745813483520, 'type': 'hashtag', 'text': 'IndiaRejectsCAA_NRC'}, {'tweet_id_str': 1209692745813483520, 'type': 'hashtag', 'text': 'CAAProtest'}, {

In [16]:
# Save entities into Pandas DataFrame object
entities = pd.DataFrame(entities)
entities.head()

Unnamed: 0,tweet_id_str,type,text
0,1209760698558865410,hashtag,CAAProtest
1,1209760698558865410,hashtag,MentalHealth
2,1209753143518433281,hashtag,media
3,1209753143518433281,hashtag,sexualviolence
4,1209753143518433281,hashtag,LGBTQIA


In [17]:
# Save entities to file
entities.to_csv('data/database/entities.csv')

In [18]:
# Define attributes which will be kept from retrieved tweets
kept_attr = ('created_at', 'id_str', 'text', 'truncated', 'geo', 'coordinates',
             'place', 'retweet_count', 'favourite_count', 'in_reply_to_status_id_str', 
             'in_reply_to_user_id_str', 'lang')

# Apply filter
for i, tweet in enumerate(tweets):
    # Handle text
    tweet['text'] = tweet['full_text' if 'full_text' in set(tweet.keys()) else 'text']
    # Substitute i-th tweet
    tweets[i] = {k: tweet.get(k, None) for k in kept_attr}
    # Show i-th tweet
    print(tweets[i])
    print()

{'created_at': 'Wed Dec 25 09:00:01 +0000 2019', 'id_str': '1209760698558865410', 'full_text': 'देश के इस अराजक माहौल में, इस अघोषित आपातकाल में, हम उत्तेजित हैं, क्रोधित हैं, दुखी हैं। ऐसी और भी कई ढेर सारी भावनाओं ने हमारे भीतर एक कौतुहल मचा दिया है।\n\n#CAAProtest #MentalHealth \n\nhttps://t.co/uo6EdwmM9C', 'text': 'देश के इस अराजक माहौल में, इस अघोषित आपातकाल में, हम उत्तेजित हैं, क्रोधित हैं, दुखी हैं। ऐसी और भी कई ढेर सारी भावनाओं ने हमारे भीतर एक कौतुहल मचा दिया है।\n\n#CAAProtest #MentalHealth \n\nhttps://t.co/uo6EdwmM9C', 'truncated': False, 'geo': None, 'coordinates': None, 'place': None, 'retweet_count': 0, 'favourite_count': None, 'in_reply_to_status_id_str': None, 'in_reply_to_user_id_str': None, 'lang': 'hi'}

{'created_at': 'Wed Dec 25 08:30:00 +0000 2019', 'id_str': '1209753143518433281', 'full_text': 'Why is the #media quiet about #sexualviolence against members of the #LGBTQIA community? @MAARnews tells us about current media coverage of these incidents. \n\nhttps://t

In [19]:
# Store tweets into Pandas DataFrame object
tweets = pd.DataFrame(tweets)
tweets.head()

Unnamed: 0,created_at,id_str,full_text,text,truncated,geo,coordinates,place,retweet_count,favourite_count,in_reply_to_status_id_str,in_reply_to_user_id_str,lang
0,Wed Dec 25 09:00:01 +0000 2019,1209760698558865410,"देश के इस अराजक माहौल में, इस अघोषित आपातकाल म...","देश के इस अराजक माहौल में, इस अघोषित आपातकाल म...",False,,,,0,,,,hi
1,Wed Dec 25 08:30:00 +0000 2019,1209753143518433281,Why is the #media quiet about #sexualviolence ...,Why is the #media quiet about #sexualviolence ...,False,,,,1,,,,en
2,Wed Dec 25 07:30:00 +0000 2019,1209738044246351873,"The future is #queer , and so was this year!\n...","The future is #queer , and so was this year!\n...",False,,,,2,,,,en
3,Wed Dec 25 06:30:00 +0000 2019,1209722944559042562,#Manusmriti was unjust towards “Untouchables” ...,#Manusmriti was unjust towards “Untouchables” ...,False,,,,5,,,,en
4,Wed Dec 25 04:30:00 +0000 2019,1209692745813483520,""" #Section144 is a piece of colonial-era legis...",""" #Section144 is a piece of colonial-era legis...",False,,,,5,,,,en


In [20]:
# Save tweets to file
tweets.to_csv('data/database/tweets.csv')