In [1]:
%matplotlib inline

import tweepy as tw
import json
import pandas as pd
import numpy as np

from collections import defaultdict, Counter
import os
from IPython.display import clear_output

import networkx as nx


import matplotlib.pyplot as plt
import seaborn as sns

from io import StringIO
from pydotplus import graph_from_dot_data
import matplotlib.image as mpimg

In [2]:
sns.set_context("poster")
sns.set_style("ticks")

In [3]:
DATA_DIR="../data"
TWITTER_CONFIG_FILE=os.path.join(DATA_DIR, "twitter_config.json")

## Twitter Access Tokens

If you are proceeding further then you are expected to have created your Twitter application by following the steps from [Twitter App Creation](https://dev.twitter.com/oauth/overview/application-owner-access-tokens) page. 

Make sure you have the following details of your Twitter application readily available:
* 'access_token'
* 'access_token_secret'
* 'consumer_key'
* 'consumer_secret'

Please enter the value of each of the items as shown in your Twitter application, when prompted by the code below.


In [4]:
if not os.path.isfile(TWITTER_CONFIG_FILE):
    with open(os.path.join(DATA_DIR, "twitter_config.sample.json")) as fp:
        creds = json.load(fp)
        for k in sorted(creds.keys()):
            v = input("Enter %s:\t" % k)
            creds[k] = v
    print(creds)
    with open(TWITTER_CONFIG_FILE, "w+") as fp:
        json.dump(creds, fp, indent=4, sort_keys=True)
    clear_output()
    print("Printed credentials to file %s" % TWITTER_CONFIG_FILE)

In [5]:
with open(TWITTER_CONFIG_FILE) as fp:
    creds = json.load(fp)
print(creds.keys())

dict_keys(['consumer_key', 'consumer_secret', 'access_token', 'access_token_secret'])


In [6]:
auth = tw.OAuthHandler(creds["consumer_key"], creds["consumer_secret"])
auth.set_access_token(creds["access_token"], creds["access_token_secret"])
api = tw.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True,
             retry_count=5, retry_delay=100, 
            )

print("Tweepy ready for search")

Tweepy ready for search


In [7]:
users = []
num_pages=50
for i in range(num_pages):
    users.extend(api.search_users(q="urbana illinois", count=20, page=i))
len(users)

1000

In [8]:
users[0].screen_name

'Illinois_Alma'

In [9]:
users[0].friends_count

8664

In [10]:
users_100_friends = [user for user in users if user.friends_count <= 100]
len(users_100_friends)

287

In [14]:
friend_matrix = {}
all_friends = {}
num_users = 1
for user in users_100_friends[:num_users]:
    friends = []
    for friend in tw.Cursor(api.friends, user_id=user.id).items():
        if friend.screen_name not in all_friends:
            all_friends[friend.screen_name] = friend
        friends.append(friend.screen_name)
        friend_matrix[user.screen_name] = friends
    print("User {} found {} friends".format(user.screen_name, len(friends)))

User dannydanr found 25 friends


In [15]:
friend_matrix

{'dannydanr': ['nitish_gup',
  'Kordjamshidi',
  'zehavoc',
  'hadar_shemtov',
  'AI2LAB',
  'chrmanning',
  'dropshot_lob',
  'NexLP_Inc',
  'Twitter',
  'rogerfederer',
  'etzioni',
  'stanfordnlp',
  'riedelcastro',
  'darren_cahill',
  'haldaume3',
  'bgtennisnation',
  'Martina',
  'PHShriver',
  'JAIR_Editor',
  'rothella',
  'edoroth',
  'paulkrugman',
  'usopen',
  'BarackObama',
  'roamnoth']}

In [16]:
user.description

'Founder Professor of Computer Science at the University of Illinois at Urbana-Champaign.'

In [17]:
def get_user_friends_desc_vector(user):
    friends = friend_matrix[user.screen_name]
    friend_desc = [all_friends[friend].description for friend in friends]
    word_counts = Counter(sum([desc.lower().split() for desc in friend_desc], []))
    return word_counts

In [18]:
word_counts = get_user_friends_desc_vector(users_100_friends[0])

In [19]:
word_counts

Counter({'#ai': 1,
         '#ai,': 1,
         '#machinelearning': 1,
         '#machinelearning,': 1,
         '#usopen': 1,
         '&': 7,
         '(@chrmanning,': 1,
         '(ai2);': 1,
         '+': 1,
         '--linkedin--': 1,
         '...': 1,
         '19': 1,
         '2018': 1,
         '27': 1,
         '3': 1,
         '9,': 1,
         ':),used': 1,
         '=automatic': 1,
         '@bloomsburyai,': 1,
         '@factmata.': 1,
         '@jurafsky': 1,
         '@msftresearch;': 1,
         '@nytopinion.': 1,
         '@penn': 1,
         '@percyliang)': 1,
         '@stanford.': 1,
         '@stanfordnlp.': 1,
         '@uclmr,': 1,
         '@umd_lsc,': 1,
         '@umdclip;': 1,
         '@umdcs': 1,
         'a': 2,
         'about': 2,
         'about.': 1,
         'abroad': 1,
         'actionable': 1,
         'advances': 1,
         'advisor': 1,
         'afl': 1,
         'age': 1,
         'ai': 1,
         'ailen': 1,
         'all': 1,
         'al