In [None]:
# Dependencies
import tweepy
import random
import time
import json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from pprint import pprint

# Twitter API Keys
from config import (consumer_key, 
                    consumer_secret, 
                    access_token, 
                    access_token_secret)
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

# Setup Tweepy API Authentication
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

In [None]:
# scan timeline and append unique screen names to our list of 500 'normal' twitter users

normal_tweeters = []

while len(normal_tweeters)<=500:
    normals = api.search('today',count=100,lang='en')
    normal_tweets = normals['statuses']
    for status in normal_tweets:
        if status['user']['screen_name'] not in normal_tweeters:
            if status['user']['followers_count']<1500 and\
            status['user']['followers_count']>15 and\
            status['user']['statuses_count']>100 and\
            status['user']['statuses_count']<20000:
                normal_tweeters.append(status['user']['screen_name'])
    time.sleep(30)

# convert to pandas DF, save to CSV
normal_users_df = pd.DataFrame({
    'Screen Name':normal_tweeters
})
normal_users_df.to_csv('normal_user_names.csv')

In [None]:
# generate a basic summary table for a sample of 100 users
num_followers = []
num_following = []
statuses_count = []
test_users = random.sample(normal_tweeters,100)

for tweeter in test_users:
    profile = api.get_user(tweeter)
    num_followers.append(profile['followers_count'])
    num_following.append(profile['friends_count'])
    statuses_count.append(profile['statuses_count'])

In [None]:
# convert lists into a pandas DF, save to CSV
normal_summ = pd.DataFrame({
    'Screen Name':test_users,
    'Number of Followers':num_followers,
    'Number Following':num_following,
    'Number of Statuses':statuses_count
})
normal_summ.to_csv('normal_user_summary.csv')

In [None]:
# ------ Get data for our 'fitness users' ------
# Target Fitness Hashtags
target_tags = ["#nopainnogain", "#cardio", "#cycling", "#fitspo", "#exercise", "#gym", 
               "#fitfam", "#fitlife","#fitness", "#fitnessaddict", "#gymlife", "#gymrat", 
              "#gymtime", "#sweat", "#workout", "#marathon", "#runners", "#fit"
              "#bodybuilding", "#beachbody", "#motivation", "#justdoit", "#TrainHard", "#GetFit"]
fitness_user_accounts = {}

# "Real Person" Filters
min_tweets = 100
max_tweets = 20000
max_followers = 1500
min_following = 50
lang = "en"
    
#Loop through the hashtags  
for tag in target_tags: 
    
    # Variable for holding the oldest tweet
    oldest_tweet=None
    
    # Loop through target tags
    for x in range(7):
        
        public_tweets = api.search(tag, count=100, result_type="recent", max_id=oldest_tweet)
    #   pprint(public_tweets)
    
        # Loop through all tweets
        for tweet in public_tweets["statuses"]:
        
            # Find the screen name
            user = tweet["user"]["screen_name"]
        
            # Define whether user is a 'real' person or not
            if( 
                tweet["user"]["lang"] == "en" and 
                "gym" not in tweet["user"]["description"] and
                tweet["user"]["followers_count"] < max_followers and 
                tweet["user"]["statuses_count"] > min_tweets and 
                tweet["user"]["statuses_count"] < max_tweets and 
                tweet["user"]["friends_count"] > min_following):
            
                    # Add screen name to user list
                    if(user not in fitness_user_accounts):
                        fitness_user_accounts[user] = 1
                    
                    # If it already exists add 1 to its count
                    else:
                        fitness_user_accounts[user] += 1 
    
       

print(fitness_user_accounts)

In [None]:
# Convert user_accounts object into a series
fitness_user_accounts_pd = pd.Series(fitness_user_accounts)

fitness_user_accounts_pd.sort_values(ascending=False)

# Export the list

fitness_user_accounts_pd.to_csv("fitness_users2.csv", encoding='utf-8')