In [2]:

####################
#Author: brandon chiazza
#version: 1.0
#purpose: to call a twitter api and return results
#documentation: https://developer.twitter.com/en/docs
#####################

import pandas as pd
import requests
import json
import base64
import s3fs # documentation: https://s3fs.readthedocs.io/en/latest/
import time
import sys
sys.path.append('/opt/anaconda3/lib/')
import twitter_keys #this is a custom reference module to a package containing twitter keys

%config IPCompleter.greedy=True


key_secret = '{}:{}'.format(twitter_keys.client_key, twitter_keys.client_secret).encode('ascii')
b64_encoded_key = base64.b64encode(key_secret)
b64_encoded_key = b64_encoded_key.decode('ascii')

#identify base url and oauth token path
base_url = 'https://api.twitter.com/' #base url for authentication
auth_url = '{}oauth2/token'.format(base_url)

#share header information -- encoding is ascii
auth_headers = {
    'Authorization': 'Basic {}'.format(b64_encoded_key),
    'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8'
}

#pass clientcredentials
auth_data = {
    'grant_type': 'client_credentials'
}

#send authentication using requests - POST request
auth_resp = requests.post(auth_url, headers=auth_headers, data=auth_data)

#check response status. 200 = OK
auth_resp.status_code




200

In [3]:

#Keys in data response are token_type (bearer) and access_token (your access token)
print(auth_resp.json().keys())

access_token = auth_resp.json()['access_token']


search_headers = {
    'Authorization': 'Bearer {}'.format(access_token)    
}

#enter search parameters for coronavirus example. This looks for "covid-19" in the 1000 most recent tweets
query_params = {
    'q': 'Pizza',
    'result_type': 'recent',
    'count': 100, #update here to get more/less than 1000 returns
    'lang': 'en' #filters by english language only
}


#identify search url path and save 
search_url = '{}1.1/search/tweets.json'.format(base_url)


#run search using get request
search_resp = requests.get(search_url, headers=search_headers, params=query_params)

#check status code of GET request
search_resp.status_code


dict_keys(['token_type', 'access_token'])


200

In [4]:
#print text from result to verify  
twitter_data = search_resp.json()

for x in twitter_data['statuses']:
    print(x['text'] + '\n')
    break #prints after one iteration and stops, remove break to see all 1000

Now, my favorite pizza is 
Quattro Formaggi with plenty of Honey on the Side🍯
Besides,I will surely take some if th… https://t.co/XyLh74m2TI



In [5]:
# move data into data frame 
df = pd.DataFrame(twitter_data['statuses'])

# show one record to verify import 
df.head(10)

Unnamed: 0,created_at,id,id_str,text,truncated,entities,metadata,source,in_reply_to_status_id,in_reply_to_status_id_str,...,favorite_count,favorited,retweeted,lang,retweeted_status,extended_entities,possibly_sensitive,quoted_status_id,quoted_status_id_str,quoted_status
0,Sun Mar 21 15:25:40 +0000 2021,1373657067911749636,1373657067911749636,"Now, my favorite pizza is \nQuattro Formaggi w...",True,"{'hashtags': [], 'symbols': [], 'user_mentions...","{'iso_language_code': 'en', 'result_type': 're...","<a href=""http://twitter.com/download/android"" ...",1.373657e+18,1.3736569715937157e+18,...,0,False,False,en,,,,,,
1,Sun Mar 21 15:25:40 +0000 2021,1373657064959004678,1373657064959004678,RT @Mikethewander1: #LaurenBoebertIsSoDumb Sh...,False,{'hashtags': [{'text': 'LaurenBoebertIsSoDumb'...,"{'iso_language_code': 'en', 'result_type': 're...","<a href=""http://twitter.com/download/android"" ...",,,...,0,False,False,en,{'created_at': 'Sun Mar 21 15:24:32 +0000 2021...,,,,,
2,Sun Mar 21 15:25:38 +0000 2021,1373657056394219520,1373657056394219520,Pizza is kind of expensive for just 8 small sl...,False,"{'hashtags': [], 'symbols': [], 'user_mentions...","{'iso_language_code': 'en', 'result_type': 're...","<a href=""http://twitter.com/download/iphone"" r...",,,...,0,False,False,en,,,,,,
3,Sun Mar 21 15:25:35 +0000 2021,1373657045795287041,1373657045795287041,RT @TMNT: get rocked with the shell-shocked pi...,False,"{'hashtags': [], 'symbols': [], 'user_mentions...","{'iso_language_code': 'en', 'result_type': 're...","<a href=""http://twitter.com/download/android"" ...",,,...,0,False,False,en,{'created_at': 'Sun Mar 21 14:43:43 +0000 2021...,"{'media': [{'id': 1373646482105065474, 'id_str...",False,,,
4,Sun Mar 21 15:25:33 +0000 2021,1373657038513930246,1373657038513930246,@m_drez Pizza. Party. Every. Day! 🍕,False,"{'hashtags': [], 'symbols': [], 'user_mentions...","{'iso_language_code': 'en', 'result_type': 're...","<a href=""http://twitter.com/download/iphone"" r...",1.373387e+18,1.3733868745684664e+18,...,0,False,False,en,,,,,,
5,Sun Mar 21 15:25:33 +0000 2021,1373657035083030529,1373657035083030529,@iss_emmy My stuffed crust pizza face down on ...,False,"{'hashtags': [], 'symbols': [], 'user_mentions...","{'iso_language_code': 'en', 'result_type': 're...","<a href=""http://twitter.com/download/android"" ...",1.373657e+18,1.3736567564050268e+18,...,0,False,False,en,,,,,,
6,Sun Mar 21 15:25:32 +0000 2021,1373657033807912964,1373657033807912964,@markstrot @BarstoolFund @EatChicagoPizza @sto...,True,"{'hashtags': [], 'symbols': [], 'user_mentions...","{'iso_language_code': 'en', 'result_type': 're...","<a href=""https://mobile.twitter.com"" rel=""nofo...",1.372962e+18,1.372961547853521e+18,...,0,False,False,en,,,,,,
7,Sun Mar 21 15:25:29 +0000 2021,1373657021883498496,1373657021883498496,RT @StellaArtois: Retweet with #StellaMutual #...,False,"{'hashtags': [{'text': 'StellaMutual', 'indice...","{'iso_language_code': 'en', 'result_type': 're...","<a href=""https://twitter-experiment-001.heroku...",,,...,0,False,False,en,{'created_at': 'Wed Mar 17 16:31:41 +0000 2021...,,,,,
8,Sun Mar 21 15:25:28 +0000 2021,1373657014925135873,1373657014925135873,@Mirror_Ayako Amazing pizza and local craft be...,False,"{'hashtags': [], 'symbols': [], 'user_mentions...","{'iso_language_code': 'en', 'result_type': 're...","<a href=""https://about.twitter.com/products/tw...",1.373639e+18,1.3736387562810368e+18,...,0,False,False,en,,"{'media': [{'id': 1373656923275415555, 'id_str...",False,,,
9,Sun Mar 21 15:25:20 +0000 2021,1373656984042364930,1373656984042364930,RT @SUHOWorldPromo: Reply with Pizza's flavor ...,False,"{'hashtags': [{'text': 'StanWorld', 'indices':...","{'iso_language_code': 'en', 'result_type': 're...","<a href=""https://mobile.twitter.com"" rel=""nofo...",,,...,0,False,False,en,{'created_at': 'Sun Mar 21 15:22:00 +0000 2021...,,,,,


In [6]:
# we can use pandas to put data directly into an s3 bucket
#prepare csv file name   
filename = 's3:/lab-03/'#specify location of s3:/{my-bucket}/
groupname= 'Group_2_' #name of your group
datetime = time.strftime("%Y%m%d%H%M%S") #timestamp
filenames3 = "%s%s%s.csv"%(filename,groupname,datetime) #name of the filepath and csv file



#encoding must be adjusted to accommodate abnormal characters. Use s3fs to write to S3 bucket
byte_encoded_df = df.to_csv(None).encode() #encodes file as binary
s3 = s3fs.S3FileSystem(anon=False)
with s3.open(filenames3, 'wb') as file:
    file.write(byte_encoded_df) #writes byte-encoded file to s3 location

#print success message
print("Successfull uploaded file to location:"+str(filenames3))


Successfull uploaded file to location:s3:/lab-03/Group_2_20210321112553.csv
