In [1]:
import csv
import json
import click
import requests
from time import sleep
from datetime import datetime, timedelta

In [2]:
proxies = {
    'http': "socks5://127.0.0.1:1080",
    'https': "socks5://127.0.0.1:1080",
}

In [3]:
def get_response(url):
    return requests.get(url, proxies=proxies)

In [4]:
MEDIUM = 'https://medium.com'

In [5]:
# clean ])}while(1);</x> up and turn the JSON into a Python dictionary.
def clean_json_from(response):
    return json.loads(response.text.replace('])}while(1);</x>', '', 1))

In [6]:
def fetch_user_id_by(username):
    print('Retrieving user ID...')

    url = MEDIUM + '/@' + username + '?format=json'
    response = get_response(url)
    response_dict = clean_json_from(response)
    user_id = response_dict['payload']['user']['userId']

    return user_id

In [7]:
user_id = fetch_user_id_by('explorewo')

Retrieving user ID...


In [8]:
user_id

'5a2e47aa48be'

In [9]:
def fetch_following_usernames_by(user_id):
    print('Retrieving usernames from Followings...')

    next_id = False
    usernames = []

    while True:
        if next_id:
            # If this is not the first page of the followings list
            url = MEDIUM + '/_/api/users/' + user_id + '/following?limit=8&to=' + next_id
        else:
            # If this is the first page of the followings list
            url = MEDIUM + '/_/api/users/' + user_id + '/following'

        response = get_response(url)
        response_dict = clean_json_from(response)
        payload = response_dict['payload']

        for user in payload['value']:
            usernames.append(user['username'])

        try:
            # If the "to" key is missing, we've reached the end
            # of the list and an exception is thrown
            next_id = payload['paging']['next']['to']
        except:
            break

    return usernames

In [10]:
c = fetch_following_usernames_by(user_id)

Retrieving usernames from Followings...


In [20]:
# Add list of interesting users to the interesting_users.csv and add a timestamp
def store_to_csv_0(a_list, file_name):
    with open(file_name+'.csv', 'a') as file:
        writer = csv.writer(file)
#         now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
#         list.insert(0, now)
        writer.writerow(a_list)

In [14]:
store_to_csv_0(following_usernames, 'following_usernames')

In [15]:
following_usernames[:3]

['Stanford', 'dzungnguyen.hcm', 'quincylarson']

In [16]:
def fetch_latest_post_ids_by(usernames):
    print('Retrieving the latest posts...')

    post_ids = []
    for username in usernames:
        url = MEDIUM + '/@' + username + '/latest?format=json'
        response = get_response(url)
        response_dict = clean_json_from(response)

        try:
            posts = response_dict['payload']['references']['Post']
        except:
            posts = []

        if posts:
            for key in posts.keys():
                post_ids.append(posts[key]['id'])

    return post_ids

In [17]:
post_ids = fetch_latest_post_ids_by(following_usernames)

Retrieving the latest posts...


In [18]:
len(post_ids)
post_ids[:3]

['239fdc841f06', 'bffd2ee9efc', 'd23dd6b6f08b']

In [21]:
store_to_csv_0(post_ids, 'post_ids')

In [22]:
def fetch_responses_of_each_post_by(post_ids):
    print('Retrieving the post responses...')

    responses = []
#     temp_dict = {}
    for post_id in post_ids:
        url = MEDIUM + '/_/api/posts/' + post_id + '/responses'
        response = get_response(url)
        response_dict = clean_json_from(response)
        
        value = response_dict['payload']['value']
        if not value:
            continue
        else:
             value = value[0]   
        
#         temp_dict['created_at'] = value['createdAt']
#         temp_dict['recommends'] = value['virtuals']['recommends']
#         temp_dict['creatorId'] = value['creatorId']
        
#         responses.append(temp_dict)
        
        responses.append(dict(created_at=value['createdAt'], 
                       recommends=value['virtuals']['recommends'], 
                       creatorId=value['creatorId']))
        
        #responses += response_dict['payload']['value']
        sleep(0.5) # This is the most intensive operation for the Medium servers

    return responses

In [23]:
post_responses = fetch_responses_of_each_post_by(post_ids)

Retrieving the post responses...


In [28]:
post_responses[0]

{'created_at': 1524963770759, 'creatorId': '4b0b0e57293d', 'recommends': 0}

In [25]:
store_to_csv_0(post_responses, 'post_responses')

In [34]:
def is_high_recommend(response, recommend_min):
    if response['recommends'] >= recommend_min:
        return True


In [35]:
def is_recent(response):
    limit_date = datetime.now() - timedelta(days=30)
    created_at = response['created_at'] / 1000
    create_date = datetime.fromtimestamp(created_at)

    if create_date >= limit_date:
        return True

In [36]:
resp = post_responses[0]
resp['creatorId'], resp['recommends'], resp['created_at'] / 1000

('4b0b0e57293d', 0, 1524963770.759)

In [37]:
def fetch_user_ids_from(responses, recommend_min):
    print('Retrieving user IDs from the responses...')

    user_ids = []
    for response in responses:
        recent = is_recent(response)
        high = is_high_recommend(response, recommend_min)

        if recent and high:
            user_ids.append(response['creatorId'])

    return user_ids

In [38]:
user_ids = fetch_user_ids_from(post_responses, 1)

Retrieving user IDs from the responses...


In [39]:
user_ids

['b0d99ee145a2', '1304e27c5729', 'c4a45bce0bc1']

In [42]:
def fetch_usernames_by(user_ids):
    print('Retrieving usernames of interesting users...')

    usernames = []
    for user_id in user_ids:
        url = MEDIUM + '/_/api/users/' + user_id
        response = get_response(url)
        response_dict = clean_json_from(response)
        payload = response_dict['payload']
        usernames.append(payload['value']['username'])

    return usernames

In [43]:
fetch_usernames_by(user_ids)

Retrieving usernames of interesting users...


['thalesfrigo', 'maicon_wagner', 'andrejglauser']

In [44]:
# Add list of interesting users to the interesting_users.csv and add a timestamp
def store_to_csv(interesting_users_list):
    with open('interesting_users.csv', 'a') as file:
        writer = csv.writer(file)
        now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        interesting_users_list.insert(0, now)
        writer.writerow(interesting_users_list)

In [45]:
store_to_csv(['thalesfrigo', 'maicon_wagner', 'andrejglauser'])

In [36]:
# put them all together
def fetch_interesting_users_by(username, recommend_min):
    print('Looking for interesting users for %s...' % username)

    user_id = fetch_user_id_by(username)
    following_usernames = fetch_following_usernames_by(user_id)
    post_ids = fetch_latest_post_ids_by(following_usernames)
    post_responses = fetch_responses_of_each_post_by(post_ids)
    user_ids = fetch_user_ids_from(post_responses, recommend_min)

    return fetch_usernames_by(user_ids)