In [None]:
# Library installations

# !pip install praw
# !pip install tweepy

In [None]:
# Library imports

# Python Reddit API Wrapper
import praw

# Twitter API client
import tweepy

import pandas as pd 

# Introduction to Social Media Data

Data from social media platforms are more important than ever. However, fact-checking is also more important than ever as misinformation, hatespeech, bots, and trolling become more and more prevalent. [Reddit](https://www.reddit.com/) and [Twitter](https://twitter.com/) are two of the most popular social media websites. 

# Reddit

Let's follow this tutorial to see how you can connect to Reddit's [API](https://en.wikipedia.org/wiki/Application_programming_interface): 

https://pythonprogramming.net/introduction-python-reddit-api-wrapper-praw-tutorial/

[The praw documentation is also very useful](https://praw.readthedocs.io/en/latest/getting_started/quick_start.html)!

1. Visit https://www.reddit.com/
2. Click Sign Up/Log In to create an account or login
3. Click the drop-down menu next to your username and select "Visit Old Reddit"
4. Click Preferences --> Apps --> create an app (at the bottom of the page)
5. Click the "script" radio button
6. Give your project a name and description
7. Enter an "about URL" if you choose (such as your project name)
8. Enter http://localhost:8080 for your the "redirect uri"

# 1. Authenticate!

Create an instance and add your information: 

- client_id = the code under your project name in the upper-left
- client_secret = your API access token
- password = your password
- user_agent = put something like: 'dhe 1.0 by /u/dh_example'
- username = your username

# Create a Reddit API instance

In [None]:
# We need these 5 things - let's overwrite them with our own!

reddit = praw.Reddit(client_id='???',
                     client_secret='???',
                     password='???',
                     user_agent='???', 
                     username='???'
                    )

In [None]:
# Check out a subreddit
subreddit = reddit.subreddit('BlackLivesMatter')

In [None]:
subreddit.description_html

In [None]:
# Filter the Python subreddit by hot topics
hot_blm = subreddit.hot()
hot_blm

In [None]:
# Check out reddit methods
# reddit.

In [None]:
# Check out subreddit methods
# subreddit.

In [None]:
# Iterate to get the object IDs
for submission in hot_blm:
    print(submission.id)

In [None]:
# Return just the first 5 and print their titles
hot_blm = subreddit.hot(limit = 5)
for submission in hot_blm:
    print(submission.title)

In [None]:
# Define a blank dictionary to store the metadata
conversedict = {}

# Get more information
hot_blm = subreddit.hot(limit = 5)
for submission in hot_blm:
    if not submission.stickied:
        print('Title: {}, ups: {}, downs: {}, Have we visited?: {}'.format(submission.title,
                                                                           submission.ups,
                                                                           submission.downs,
                                                                           submission.visited)
             )
        
        submission.comments.replace_more(limit=0)
        for comment in submission.comments.list():
            if comment.id not in conversedict:
                conversedict[comment.id] = [comment.body,{}]
                if comment.parent() != submission.id:
                    parent = str(comment.parent())
                    conversedict[parent][1][comment.id] = [comment.ups, comment.body]

In [None]:
for post_id in conversedict:
    message = conversedict[post_id][0]
    replies = conversedict[post_id][1]
    if len(replies) > 1:
        print('Original Message: {}'.format(message))
        print(35*'_')
        print('Replies:')
        for reply in replies:
            print(replies[reply])

In [None]:
# Define a blank list for export to data frame
reddit_output = []

for post_id in conversedict:
    message = conversedict[post_id][0]
    replies = conversedict[post_id][1]
    if len(replies) > 1:
        print('Original Message: {}'.format(message))
        print(35*'_')
        print('Replies:')
        for reply in replies:
            reddit_output.append(replies[reply])

In [None]:
# View the output of the variable
reddit_output

# Convert to data frame

In [None]:
reddit_df = pd.DataFrame(reddit_output, columns = ["Upvotes", "Text"])
reddit_df.head()

In [None]:
# Save original message as file name...
reddit_df.to_csv("blm reddit.csv")

In [None]:
%pwd

# Twitter

Twitter works similarly, but you have to fill out more information to get permission to use their API. 

Here is a nice Tweepy walkthrough: 

https://realpython.com/twitter-bot-python-tweepy/

[The Tweepy docs are also very useful!](http://docs.tweepy.org/en/latest/)

1. Visit the Twitter Developer site: https://developer.twitter.com/en and create an account/login
2. Navigate to the Developer Portal
3. Under Projects & Apps, create a new project (note down the API codes).
4. Create a new app within the project and generate access keys (note down the API codes).

# Authenticate!

In [None]:
# Similar to reddit! 
auth = tweepy.OAuthHandler("consumer_key", "consumer_secret")
auth.set_access_token("access_token", "access_token_secret")

# What do these arguments do?
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

public_tweets = api.home_timeline()
for tweet in public_tweets:
    print(tweet.text)

In [None]:
# Sanity check - did you validate correctly?
try:
    api.verify_credentials()
    print("Success!")
except:
    print("Invalid authentication")

In [None]:
# tweepy methods!
# tweepy.

In [None]:
# Maybe the Internet can provide better explanations?
# tweepy.API?

In [None]:
# Find followers
user = api.get_user("billnye")

print("User details:")
print(user.name)
print(user.description)
print(user.location)

print("Last 20 Followers:")
for follower in user.followers():
    print(follower.name)

In [None]:
# Get the User object for a twitter handle...
user = api.get_user('billnye')

In [None]:
# Define an empty list
# storage = []

def tweets(user_name):
          
    # Authorize yourself
    # consumer_key, consumer_secret
    auth = tweepy.OAuthHandler("????????", 
                               "????????")
  
    # Provide your tokens
    # access_key, access_secret
    auth.set_access_token("????????",
                          "????????")

    # Define an API instance
    api = tweepy.API(auth)
  
    # Define an empty list to store the tweets
    storage = []

    # Get 20 tweets
    num_tweets = 20
    tweets = api.user_timeline(screen_name = user_name)
  
    # Return user, tweet, date and time, and body
    tweet_data = [tweet.text for tweet in tweets]
    for i in tweet_data: 
  
        # Append storage
        storage.append(i)

    # What to return?
    return storage

In [None]:
twitter_output = tweets("billnye")
print(twitter_output)

In [None]:
# Convert to DataFrame
twitter_df = pd.DataFrame(twitter_output, columns = ["Tweet"])
twitter_df.head()

In [None]:
twitter_df.to_csv("billnye tweets.csv")