# Streaming Images from Twitter And Saving them to Disk

In [4]:
import requests 
import json 
import pandas as pd
import urllib
import os
from os import path
from datetime import datetime as dt

# imports the twitter_secrets python file in which we store the twitter API keys
from twitter_secrets import twitter_secrets as ts

def create_headers(bearer_token):
    headers = {"Authorization": "Bearer {}".format(bearer_token)}
    return headers
        
def set_rules(headers, delete, bearer_token, rules):
    payload = {"add": rules}
    response = requests.post(
        "https://api.twitter.com/2/tweets/search/stream/rules",
        headers=headers,
        json=payload,
    )
    if response.status_code != 201:
        raise Exception(
            "Cannot add rules (HTTP {}): {}".format(response.status_code, response.text)
        )
    print(json.dumps(response.json()))
    
def get_rules(headers, bearer_token):
    response = requests.get(
        "https://api.twitter.com/2/tweets/search/stream/rules", headers=headers
    )
    if response.status_code != 200:
        raise Exception(
            "Cannot get rules (HTTP {}): {}".format(response.status_code, response.text)
        )
    print(json.dumps(response.json()))
    return response.json()

def delete_all_rules(headers, bearer_token, rules):
    if rules is None or "data" not in rules:
        return None

    ids = list(map(lambda rule: rule["id"], rules["data"]))
    payload = {"delete": {"ids": ids}}
    response = requests.post(
        "https://api.twitter.com/2/tweets/search/stream/rules",
        headers=headers,
        json=payload
    )
    if response.status_code != 200:
        raise Exception(
            "Cannot delete rules (HTTP {}): {}".format(
                response.status_code, response.text
            )
        )
    print(json.dumps(response.json()))

def get_stream(headers, set, bearer_token, expansions, fields, save_to_disk, save_path):
    data = []
    response = requests.get(
        "https://api.twitter.com/2/tweets/search/stream" + expansions + fields, headers=headers, stream=True,
    )
    print(response.status_code)
    if response.status_code != 200:
        raise Exception(
            "Cannot get stream (HTTP {}): {}".format(
                response.status_code, response.text
            )
        )
    i = 0
    for response_line in response.iter_lines():
        i += 1
        if i == max_results:
            break
        else:
            json_response = json.loads(response_line)
            #print(json.dumps(json_response, indent=4, sort_keys=True))
            try:
                save_tweets(json_response)
                if save_to_disk == True:
                    save_media_to_disk(json_response, save_path)
            except (json.JSONDecodeError, KeyError) as err:
                # In case the JSON fails to decode, we skip this tweet
                print(f"{i}/{max_results}: ERROR: encountered a problem with a line of data... \n")
                continue
                
def save_tweets(tweet):
    #print(json.dumps(tweet, indent=4, sort_keys=True))
    data = tweet['data']
    includes = tweet['includes']
    media = includes['media']
    for line in media:
        tweet_list.append([data['id'], line['url']])  
        
def save_media_to_disk(tweet, save_path):
    data = tweet['data']
    #print(json.dumps(data, indent=4, sort_keys=True))
    includes = tweet['includes']
    media = includes['media']
    for line in media:
        media_url = line['url']
        media_key = line['media_key']
        pic = urllib.request.urlopen(media_url)
        file_path = save_path + "/" + media_key + ".jpg"
        try:
            with open(file_path, 'wb') as localFile:
                localFile.write(pic.read())
            tweet_list.append(media_key, media_url)
        except Exception as e:
            print('exception when saving media url ' + media_url + ' to path: ' + file_path)
            if path.exists(file_path):
                print("path exists")
    
def createDir(save_path):
    try:
        os.makedirs(save_path)
    except OSError:
        print ("Creation of the directory %s failed" % save_path)
        if path.exists(savepath):
            print("file already exists")
    else:
        print ("Successfully created the directory %s " % save_path)
        
# save to disk
save_to_disk = True
 
if save_to_disk == True: 
    # detect the current working directory and print it
    base_path = os.getcwd()
    print ("The current working directory is %s" % base_path)
    img_dir = '/twitter/downloaded_media/'
    # the write path in which the data will be stored. If it does not yet exist, it will be created
    now = dt.now()
    dt_string = now.strftime("%d%m%Y-%H%M%S")# ddmmYY-HMS
    save_path = base_path + img_dir + dt_string
    createDir(save_path)
    
# the max number of tweets that will be returned
max_results = 10

# You can adjust the rules if needed
search_rules = [
    {"value": "dog has:images", "tag": "dog pictures", "lang": "en"},
]

media_fields = "&media.fields=duration_ms,height,media_key,preview_image_url,public_metrics,type,url,width"
expansions = "?expansions=attachments.media_keys"
tweet_list = []

bearer_token = ts.BEARER_TOKEN
headers = create_headers(bearer_token)
rules = get_rules(headers, bearer_token)
delete = delete_all_rules(headers, bearer_token, rules)
set = set_rules(headers, delete, bearer_token, search_rules)
get_stream(headers, set, bearer_token, expansions, media_fields, save_to_disk, save_path)

df = pd.DataFrame (tweet_list, columns = ['tweetid', 'preview_image_url'])
df

The current working directory is C:\Users\Flo\relataly-public-python-tutorials
Successfully created the directory C:\Users\Flo\relataly-public-python-tutorials/twitter/downloaded_media/04012021-001439 
{"data": [{"id": "1345870715111809030", "value": "dog has:images", "tag": "dog pictures"}], "meta": {"sent": "2021-01-03T23:14:40.672Z"}}
{"meta": {"sent": "2021-01-03T23:14:41.956Z", "summary": {"deleted": 1, "not_deleted": 0}}}
{"data": [{"value": "dog has:images", "tag": "dog pictures", "id": "1345871235985584128"}], "meta": {"sent": "2021-01-03T23:14:43.456Z", "summary": {"created": 1, "not_created": 0, "valid": 1, "invalid": 0}}}
200
exception when saving media url https://pbs.twimg.com/media/Eq1-uCFXYAEHo0u.jpg to path: C:\Users\Flo\relataly-public-python-tutorials/twitter/downloaded_media/04012021-001439/3_1345871192952102913.jpg
path exists
exception when saving media url https://pbs.twimg.com/media/Eq1nlPKW4AM94gZ.jpg to path: C:\Users\Flo\relataly-public-python-tutorials/twitte

Unnamed: 0,tweetid,preview_image_url
0,1345871202225696770,https://pbs.twimg.com/media/Eq1-uCFXYAEHo0u.jpg
1,1345871208877871107,https://pbs.twimg.com/media/Eq1nlPKW4AM94gZ.jpg
2,1345871208877871107,https://pbs.twimg.com/media/Eq1nlb4XcAE71NH.jpg
3,1345871210635259904,https://pbs.twimg.com/media/Eq1qn8KW4AA7CAM.png
4,1345871210635259904,https://pbs.twimg.com/media/Eq1qv-qXAAEgEUO.png
5,1345871213273481217,https://pbs.twimg.com/media/Eq1ZGbLXAAQTsfl.jpg
6,1345871219711746050,https://pbs.twimg.com/media/EqwQFCdXYAIezhF.jpg
7,1345871234890747904,https://pbs.twimg.com/media/Eq1-u3jVEAAtxs-.jpg
8,1345871234890747904,https://pbs.twimg.com/media/Eq1-vO1VkAI4B1A.jpg
9,1345871234890747904,https://pbs.twimg.com/media/Eq1-vtiUwAEA3e3.jpg
