In [None]:


import requests
import pandas as pd
import json


bearer_token = 'YourBearerTokenHere'
headers = {'Authorization':('Bearer '+ bearer_token)}



In [None]:
n = 500                           # The total number of tweets we want
max_results = 100                 # The number of tweets to pull per request; must be between 10 and 100
total_retrieved = 0             # To keep track of when to stop
next_token = ""                   # Must be empty on first iteration
search_term = "I%20%20(depression%20OR%20anxiety%20OR%20depressed)"             # To form an advanced query, see here: https://twitter.com/search-advanced?lang=en
since_id = "1440409756280934400"  # The id of the oldest tweet you want to retrieve
min_followers = 10

# Create the empty DataFrame with the columns you want
df = pd.DataFrame(columns=['id', 'retweets', 'likes', 'follower_count', 'url', 'text', 'bio', 'pfp_url'])
df.set_index('id', inplace=True)

# stop when we have n results
while total_retrieved < n:

  # the first time through the loop, we do not need the next_token parameter
  if next_token == "":
    url = f'https://api.twitter.com/2/tweets/search/recent?query={search_term}&max_results={max_results}&since_id={since_id}'
  else:
    url = f'https://api.twitter.com/2/tweets/search/recent?query={search_term}&max_results={max_results}&since_id={since_id}&next_token={next_token}'

  # These are the extra parameters we will add to the querystring; we won't store them all though; just want you to see what's possible
  
  url += f'&tweet.fields=attachments,public_metrics,text,author_id'
  url += f'&expansions=attachments.media_keys,author_id'
  url += f'&user.fields=created_at,description,entities,id,location,name,profile_image_url,protected,public_metrics,url,username,verified,withheld'
  url += f'&media.fields=media_key,type,url'

  # make the request to the Twitter API Recent Search endpoint
  response = requests.request("GET", url, headers=headers)
  try:  # Just in case we get an error
    json_data = json.loads(response.text)
   # print(json.dumps(json_data, indent=2))
  except:
    print(response.text)
  #print(json.dumps(json_data, indent=2))
  #break

  for tweet in json_data['data']:
    media_key = ""  # Reset to empty each time through the loop so that we can use it for a condition later

    # Store the data into variables
    tweet_id = tweet['id']
    retweet_count = tweet['public_metrics']['retweet_count']
    like_count = tweet['public_metrics']['like_count']
    image_url = ""
    text = tweet['text']
    author_id = tweet['author_id']

    #get profile info
    if 'author_id' in tweet:
      for user in json_data['includes']['users']:
        if user['id'] == author_id:
          bio = user['description']
          pfp = user['profile_image_url']
          follower_count = user['public_metrics']['followers_count']
          break

    #only get tweets with over 20 followers
    if follower_count < min_followers:
      continue


    # Find out if there is media
    if 'attachments' in tweet:
      if 'media_keys' in tweet['attachments']:
        media_key = tweet['attachments']['media_keys'][0]

    # If there is a media key in this tweet, iterate through tweet['includes']['media'] until we find it
    if media_key != "":
      for media in json_data['includes']['media']:
        if media['media_key'] == media_key: # Only if the media_key matches the one we stored
          if media['type'] == 'photo':      # Only if it is a photo; ignore videos
            image_url = media['url']        # Store the url in a variable
            
            # Only iterate if a photo is found
            total_retrieved += 1
            break
    else:
      image_url = 'blank'   
            # Only add the record in the DataFrame if a photo is found
    
    df.loc[tweet_id] = [retweet_count, like_count, follower_count, image_url, text, bio, pfp]
            

  # keep track of where to start next time, but quit if there are no more results
  try:
    next_token = json_data['meta']['next_token']
  except:
    break  

print(f'Number of records:\t{len(df)}')
df.to_csv('twitter.csv')
df.head()

Number of records:	16093


Unnamed: 0_level_0,retweets,likes,follower_count,url,text,bio,pfp_url
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1442945558935527427,0,0,44,blank,Wish I could go back to being bored when I'm h...,"🏳️‍⚧️ Trans Fem, Lesbian!!!!\n\nLock Up, Shut ...",https://pbs.twimg.com/profile_images/143553419...
1442945548311400449,0,0,14861,blank,folks prolly think i act hollywood or i’m stuc...,Mop da Goat 🐐 nineonetwo🍑 LATELY❤️‍🩹 OUT NOW l...,https://pbs.twimg.com/profile_images/141654004...
1442945545606029312,87314,0,12,blank,RT @aly__dixon: doctor at my check up asked me...,game dev student. art amateur. meme connoiseur...,https://pbs.twimg.com/profile_images/143575837...
1442945545480278016,0,0,1440,blank,Ok i dead ass need some fucking cuddles &amp; ...,virgo baby ♍️ mental health counselor🦋🌈 singer...,https://pbs.twimg.com/profile_images/144278963...
1442945544817549314,0,0,118,blank,@kimoimichi @BeeBookMagic now i can't sleep cu...,Wuji / 20+ / heavy rt and random tweets / geck...,https://pbs.twimg.com/profile_images/144061695...


In [None]:
df.shape

(16093, 7)

In [None]:
pronoun_list = ['he/him', 'she/her', 'they/them', 'pronouns', 'they/he', 'he/they', 'they/him', 'him/they', 'they/she', 'she/they', 'they/her', 'her/they' ,'zie/', '/zie', '/ze', 'ze/' , 'queer']
bio_list = []
for bio in df['bio']:
  for pronoun in pronoun_list:
    lower_bio = bio.lower()
    pronouns_found = False
    if lower_bio.find(pronoun) != -1:
      bio_list.append(1)
      pronouns_found = True
      break
  if pronouns_found == False:
    bio_list.append(0)

print(bio_list)

[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 

In [None]:
df['pronouns_in_bio'] = bio_list


In [None]:
df

Unnamed: 0_level_0,retweets,likes,follower_count,url,text,bio,pfp_url,pronouns_in_bio
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1442945558935527427,0,0,44,blank,Wish I could go back to being bored when I'm h...,"🏳️‍⚧️ Trans Fem, Lesbian!!!!\n\nLock Up, Shut ...",https://pbs.twimg.com/profile_images/143553419...,0
1442945548311400449,0,0,14861,blank,folks prolly think i act hollywood or i’m stuc...,Mop da Goat 🐐 nineonetwo🍑 LATELY❤️‍🩹 OUT NOW l...,https://pbs.twimg.com/profile_images/141654004...,1
1442945545606029312,87314,0,12,blank,RT @aly__dixon: doctor at my check up asked me...,game dev student. art amateur. meme connoiseur...,https://pbs.twimg.com/profile_images/143575837...,0
1442945545480278016,0,0,1440,blank,Ok i dead ass need some fucking cuddles &amp; ...,virgo baby ♍️ mental health counselor🦋🌈 singer...,https://pbs.twimg.com/profile_images/144278963...,0
1442945544817549314,0,0,118,blank,@kimoimichi @BeeBookMagic now i can't sleep cu...,Wuji / 20+ / heavy rt and random tweets / geck...,https://pbs.twimg.com/profile_images/144061695...,0
...,...,...,...,...,...,...,...,...
1442831535611260933,21,0,565,blank,RT @JulieMc04581017: My life changed 4 years a...,Maga American back the military no dms,https://pbs.twimg.com/profile_images/144190299...,0
1442831519295291396,0,0,11,blank,Hi . I Just want to say sorry EXO for not givi...,I just Hit the LOTTO !,https://pbs.twimg.com/profile_images/142582815...,0
1442831502706974721,1,0,386,blank,@verynormalgrl no unfortunately i was a weeb c...,jeems bayou rat | he/they,https://pbs.twimg.com/profile_images/142169708...,1
1442831500118949895,1,20,4123,blank,me when im extremely depressed: maybe i should...,23. he/him. gay & taken. bit of a doomer,https://pbs.twimg.com/profile_images/143741105...,1


In [None]:
df.to_csv('finalTwitter.csv')