# Introduction to the notion of digital text corpus: get data
Examples are base on Twitter API. A valid Twitter developer API Key is needed to run the examples (see [Twitter Developers Website](https://developer.twitter.com/en) for details).

The following examples use `pip install python-twitter`

In [1]:
import twitter
import json

In [2]:
with open('secret/twitter.json', 'r') as keysfile:
    keys = json.load(keysfile)

In [3]:
api = twitter.Api(
    consumer_key=keys['consumer_key'], 
    consumer_secret=keys['consumer_secret'], 
    access_token_key=keys['access_token_key'], 
    access_token_secret=keys['access_token_secret'])

In [4]:
def get_timeline(screen, count=200, iterations=3):
    statuses = api.GetUserTimeline(screen_name=screen, count=count)
    s = dict([(s.id, s.AsDict()) for s in statuses])
    ids = sorted(s.keys())
    for i in range(iterations-1):
        statuses = api.GetUserTimeline(screen_name=screen, count=count, max_id=ids[0])
        for status in statuses:
            s[status.id] = status.AsDict()
        ids = sorted(s.keys())
    return s

In [5]:
corriere = get_timeline(screen='Corriere', count=150, iterations=3)

In [6]:
repubblica = get_timeline(screen='repubblica', count=150, iterations=3)

In [7]:
sole = get_timeline(screen='sole24ore', count=150, iterations=3)

In [8]:
data = {}
for dataset in [corriere, repubblica, sole]:
    for k, v in dataset.items():
        data[k] = v

In [9]:
len(data)

1344

In [10]:
with open('data/twitter-news.json', 'w') as out:
    json.dump(data, out)