In [1]:
import numpy as np
import pandas as pd
import tweepy
import os

### connect to API

In [2]:
CONSUMER_KEY = os.environ['CONSUMER_KEY']
CONSUMER_SECRET = os.environ['CONSUMER_SECRET']
ACCESS_TOKEN = os.environ['ACCESS_TOKEN']
ACCESS_TOKEN_SECRET = os.environ['ACCESS_TOKEN_SECRET']
BEARER_TOKEN = os.environ['BEARER_TOKEN']

In [4]:
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth, wait_on_rate_limit=True)

### Extracting Data

In [5]:
def scrape(words, date_since, numtweet) :
    tweets = tweepy.Cursor(
        api.search_tweets,
        words,
        lang='en',
        since_id=date_since,
        tweet_mode='extended',
    ).items(numtweet)
    
    return tweets

In [6]:
tweets = scrape(
    words='MahsaAmini',
    date_since='2022-09-30',
    numtweet=100,
)


In [7]:
list_tweets = {tweet for tweet in tweets}

In [32]:
df = pd.DataFrame(
    {},
    columns=
    [
        'username',
        'description',
        'location',
        'following',
        'followers',
        'totaltweets',
        'retweetcount',
        'text',
        'hashtags',
    ]
)
df

Unnamed: 0,username,description,location,following,followers,totaltweets,retweetcount,text,hashtags


In [33]:
def concater(df,
            username, description, location, following,
            followers, totaltweets, retweetcount,
            text=None, hashtags=None) :
    
    return pd.concat([df, 
               pd.DataFrame([
                {
                'username': username,
                'description': description,
                'location': location,
                'following': following,
                'followers': followers,
                'totaltweets': totaltweets,
                'retweetcount': retweetcount,
                'text': text,
                'hashtags': hashtags,
                }
            ]   
        )
    ]
)

In [34]:
for tweet in list_tweets:
    try : 
        text=tweet.retweeted_status.full_text
    except AttributeError:
        text = None
        
    df = concater(
        df,
        username=tweet.user.screen_name,
        description=tweet.user.description,
        location=tweet.user.location,
        following=tweet.user.friends_count,
        followers=tweet.user.followers_count,
        totaltweets=tweet.user.statuses_count ,
        retweetcount=tweet.retweet_count,
        text=text,
        hashtags=tweet.entities['hashtags']
    )

In [35]:
df.index = range(100)

In [36]:
df.to_csv('./twitterData.csv')

In [37]:
df.head()

Unnamed: 0,username,description,location,following,followers,totaltweets,retweetcount,text,hashtags
0,Yasminjederzeit,,,27,11,91,4818,".@nytimes What’s going on, guys?\n#IranProtests #SharifUniversity #IranRevolution #MahsaAmini ht...","[{'text': 'IranProtests', 'indices': [50, 63]}, {'text': 'SharifUniversity', 'indices': [64, 81]..."
1,mohammdali1988,EX-Student of Tehran University- I active for a #freeIran \nPro-PMOI/MEK https://t.co/4WCealWJY3...,,663,305,44620,35,"October 3 - Tehran, #Iran \nFerdows Boulevard\nHigh school students chanting: ""Death to the dict...","[{'text': 'Iran', 'indices': [38, 43]}, {'text': 'IranProtests2022', 'indices': [119, 136]}]"
2,Sauroniops1,,,189,17,3506,2770,This the image &amp; voice of the new Iran! Highschool girls singing #ShervinHajipour's song wit...,"[{'text': 'ShervinHajipour', 'indices': [88, 104]}]"
3,Rose_zhina,,,3,11,647,20,Pt.1\nRIP @Coolio who inspired me to make this video\n#MahsaAmini #Oplran #مهسا_امینی #اعتصابات_...,"[{'text': 'MahsaAmini', 'indices': [68, 79]}, {'text': 'Oplran', 'indices': [80, 87]}, {'text': ..."
4,ChewyBirdd,ایران🖤,,261,33,2695,759,They are killing the classmates of the first American woman ever to have won a Fields Medal. Ser...,[]


In [38]:
df['followers'] = df['followers'].apply(
    lambda x: np.NaN if x==0 else x)

In [39]:
df

Unnamed: 0,username,description,location,following,followers,totaltweets,retweetcount,text,hashtags
0,Yasminjederzeit,,,27,11.0,91,4818,".@nytimes What’s going on, guys?\n#IranProtests #SharifUniversity #IranRevolution #MahsaAmini ht...","[{'text': 'IranProtests', 'indices': [50, 63]}, {'text': 'SharifUniversity', 'indices': [64, 81]..."
1,mohammdali1988,EX-Student of Tehran University- I active for a #freeIran \nPro-PMOI/MEK https://t.co/4WCealWJY3...,,663,305.0,44620,35,"October 3 - Tehran, #Iran \nFerdows Boulevard\nHigh school students chanting: ""Death to the dict...","[{'text': 'Iran', 'indices': [38, 43]}, {'text': 'IranProtests2022', 'indices': [119, 136]}]"
2,Sauroniops1,,,189,17.0,3506,2770,This the image &amp; voice of the new Iran! Highschool girls singing #ShervinHajipour's song wit...,"[{'text': 'ShervinHajipour', 'indices': [88, 104]}]"
3,Rose_zhina,,,3,11.0,647,20,Pt.1\nRIP @Coolio who inspired me to make this video\n#MahsaAmini #Oplran #مهسا_امینی #اعتصابات_...,"[{'text': 'MahsaAmini', 'indices': [68, 79]}, {'text': 'Oplran', 'indices': [80, 87]}, {'text': ..."
4,ChewyBirdd,ایران🖤,,261,33.0,2695,759,They are killing the classmates of the first American woman ever to have won a Fields Medal. Ser...,[]
...,...,...,...,...,...,...,...,...,...
95,mohammdali1988,EX-Student of Tehran University- I active for a #freeIran \nPro-PMOI/MEK https://t.co/4WCealWJY3...,,663,305.0,44620,28,"October 3 - Urmia, northwest #Iran \nUrmia University students protesting and chanting: ""Politic...","[{'text': 'Iran', 'indices': [47, 52]}]"
96,theAnt_Man1,"U.S. Army 🪖 vet,love my dogs \nDonald Dumpf and Putin are scum.\n 🌊 👋Vote blue 🌊🌊 Atheist ⚛️","Nevada, USA",723,342.0,4609,963,They are shooting students at Sharif University in Tehran. \n\nPlease let the world know! \n\n#M...,"[{'text': 'MahsaAmini', 'indices': [102, 113]}, {'text': 'ZhinaAmini', 'indices': [114, 125]}]"
97,sooriiaannaa,,,53,4.0,1424,760,They are killing the classmates of the first American woman ever to have won a Fields Medal. Ser...,[]
98,lonlyowl_Iran,,,23,,210,2050,"Dear @POTUS administration, if you send billions of dollars to prop up this murderous regime, hi...",[]


___