# Imports, API Object, Retrieving Tweets:

In [88]:
import os
import requests
import json 
from dotenv import load_dotenv
import time
load_dotenv()
import pandas as pd 
import csv
import tweepy
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
import re
import numpy as np

In [89]:
twitter_api_key = os.getenv("TWITTER_API_KEY")
twitter_secret_key = os.getenv("TWITTER_SECRET_KEY")
twitter_access_token = os.getenv("TWITTER_ACCESS_TOKEN")
twitter_secret_token = os.getenv("TWITTER_SECRET_TOKEN")

In [90]:
def oAuth():
    try:
        auth = tweepy.OAuthHandler(twitter_api_key,twitter_secret_key)
        auth.set_access_token(twitter_access_token,twitter_secret_token)
        return auth
    except Exception as e:
        return None

oauth= oAuth()

tweepy_api = tweepy.API(oauth)

In [91]:
def get_new_tweets(names):
    print("Retrieving tweets")
    corpus = []                                                                                        
    for name in names:
        tweets = tweepy_api.user_timeline(screen_name = name, include_rts=False, count=30, tweet_mode="extended", exclude_replies = True)          
        time.sleep(4)
        corpus.extend(tweets)                                                                          
    data = [[tweet.id_str, tweet.user.screen_name, tweet.full_text, tweet.created_at] for tweet in corpus]
    tweets = pd.DataFrame(data, columns=['tweet_id', 'screen_name', 'text', 'timestamp'])                

    return tweets

In [92]:
# Change this to whoever you want
screen_names = ['CryptoKaleo']

In [93]:
user_tweets = get_new_tweets(screen_names)

Retrieving tweets


In [94]:
user_tweets.head()

Unnamed: 0,tweet_id,screen_name,text,timestamp
0,1650579436536750096,CryptoKaleo,There's so much pressure on CT to nail every d...,2023-04-24 19:16:18+00:00
1,1650550715230695434,CryptoKaleo,Can’t even post a Pepe meme anymore without so...,2023-04-24 17:22:10+00:00
2,1650548623002808329,CryptoKaleo,GOOD AFTERNOON CRYPTO TWITTER https://t.co/KHT...,2023-04-24 17:13:52+00:00
3,1650303290675003392,CryptoKaleo,Soon https://t.co/S9hhffZiog,2023-04-24 00:59:00+00:00
4,1650280651399020548,CryptoKaleo,It actually does feel like engagement on priva...,2023-04-23 23:29:02+00:00


___

# Data Exploration, Retrieving Replies, Creating a DataFrame

**you can copy paste the screen_name and tweet_id from the dataframe above to search the replies of any tweet pulled**

In [95]:
name = 'CryptoKaleo'
tweet_id = '1650550715230695434'

**Result_type can be 'recent', 'popular', 'mixed':**
* Mixed seems to be the best option to get the highest amount of replies from the query
* Not guaranteed to get 'hidden' replies which are often the bot replies

In [96]:
replies=[]
for tweet in tweepy.Cursor(tweepy_api.search_tweets,q='to:'+name, result_type = 'recent', tweet_mode = 'extended').items(100):
    if hasattr(tweet, 'in_reply_to_status_id_str'):
        if (tweet.in_reply_to_status_id_str==tweet_id):
            replies.append(tweet)

In [97]:
len(replies)

48

In [98]:
# First 5 replies
replies[:5]

[Status(_api=<tweepy.api.API object at 0x0000026D72252B88>, _json={'created_at': 'Mon Apr 24 19:00:51 +0000 2023', 'id': 1650575548035260422, 'id_str': '1650575548035260422', 'full_text': '@CryptoKaleo @ExplainThisBob', 'truncated': False, 'display_text_range': [13, 28], 'entities': {'hashtags': [], 'symbols': [], 'user_mentions': [{'screen_name': 'CryptoKaleo', 'name': 'K A L E O', 'id': 906234475604037637, 'id_str': '906234475604037637', 'indices': [0, 12]}, {'screen_name': 'ExplainThisBob', 'name': 'Bob Is Here To Explain', 'id': 1525739203341918208, 'id_str': '1525739203341918208', 'indices': [13, 28]}], 'urls': []}, 'metadata': {'iso_language_code': 'und', 'result_type': 'recent'}, 'source': '<a href="http://twitter.com/download/iphone" rel="nofollow">Twitter for iPhone</a>', 'in_reply_to_status_id': 1650550715230695434, 'in_reply_to_status_id_str': '1650550715230695434', 'in_reply_to_user_id': 906234475604037637, 'in_reply_to_user_id_str': '906234475604037637', 'in_reply_to_scree

**Text From Replies:**

In [99]:
for i in range(0,5):
    print(f"Tweet {i}: {replies[i].full_text}")

Tweet 0: @CryptoKaleo @ExplainThisBob
Tweet 1: @CryptoKaleo Fresh new character https://t.co/EeE5zwg9AD
Tweet 2: @CryptoKaleo And when you do, I will create a meme coin for the new character
Tweet 3: @CryptoKaleo They seized the memes ser.
Tweet 4: @CryptoKaleo The wonky stonker


**Can get different pieces of info by doing something like the following:**

In [100]:
print(f"Tweet Text: {replies[0].full_text}")
print(f"Users @: {replies[0].entities['user_mentions']}")
print(f"Account Created At: {replies[0].user.created_at}")
print(f"Followers: {replies[0].user.followers_count}")


Tweet Text: @CryptoKaleo @ExplainThisBob
Users @: [{'screen_name': 'CryptoKaleo', 'name': 'K A L E O', 'id': 906234475604037637, 'id_str': '906234475604037637', 'indices': [0, 12]}, {'screen_name': 'ExplainThisBob', 'name': 'Bob Is Here To Explain', 'id': 1525739203341918208, 'id_str': '1525739203341918208', 'indices': [13, 28]}]
Account Created At: 2020-12-12 02:23:53+00:00
Followers: 984


In [101]:
text = []
screen_name = []
followers = []
following= [] #friends-count
account_age = []
verified = []
tweet_count = [] #statuses count
default_profile_image = []
user_mentions = []
linked_urls = []
reply_time = []
has_hashtag =[]

**This Determines if the reply text has a '#' in it:
* Will become a feature later

In [102]:
for i in range(len(replies)):
    if '#' in replies[i].full_text:
        has_hashtag.append('yes')
    else:
        has_hashtag.append('no') 

In [103]:
for i in range(len(replies)):
    text.append(replies[i].full_text)
    screen_name.append(replies[i].user.screen_name)
    followers.append(replies[i].user.followers_count)
    following.append(replies[i].user.friends_count)
    account_age.append(replies[i].user.created_at)
    verified.append(replies[i].user.verified)
    tweet_count.append(replies[i].user.statuses_count)
    default_profile_image.append(replies[i].user.default_profile_image)
    user_mentions.append(len(replies[i].entities['user_mentions'])) # Determines # of user mentions
    linked_urls.append(len(replies[i].entities['urls'])) # Determines if they linked a URL
    reply_time.append(replies[i].created_at) 

In [104]:
df = pd.DataFrame(
    {'text': text,
     'screen_name': screen_name,
     'followers':followers,
     'following':following,
     'account_age': account_age,
     'verified': verified,
     'tweet_count':tweet_count,
     'default_prof_img':default_profile_image,
     'user_mentions': user_mentions,
     'linked_urls' : linked_urls,
     'reply_time': reply_time,
     'has_hashtag' : has_hashtag
    })

**Our starting DataFrame:**

In [105]:
df.head()

Unnamed: 0,text,screen_name,followers,following,account_age,verified,tweet_count,default_prof_img,user_mentions,linked_urls,reply_time,has_hashtag
0,@CryptoKaleo @ExplainThisBob,dreamchaserwow,984,1199,2020-12-12 02:23:53+00:00,False,18653,False,2,0,2023-04-24 19:00:51+00:00,no
1,@CryptoKaleo Fresh new character https://t.co/...,ARFunnySeal,2165,1686,2021-04-20 23:40:01+00:00,False,6588,False,1,0,2023-04-24 19:00:17+00:00,no
2,"@CryptoKaleo And when you do, I will create a ...",TheCryptoCIA,33,316,2021-10-14 09:58:41+00:00,False,1072,False,1,0,2023-04-24 18:59:37+00:00,no
3,@CryptoKaleo They seized the memes ser.,DustinRizzo,658,1655,2012-03-10 00:49:41+00:00,False,15200,False,1,0,2023-04-24 18:57:20+00:00,no
4,@CryptoKaleo The wonky stonker,Carolina4Crypto,6338,6231,2020-04-18 18:13:54+00:00,False,52638,False,1,0,2023-04-24 18:56:28+00:00,no


---

# Data Cleanup, Adding Relevant Columns, Text Cleanup:

In [106]:
# Set time variable for when the original tweet was tweeted
tweet_time = user_tweets['timestamp'][0]

In [107]:
# Refers to original tweet creation time from earlier variable
df['tweet_time'] = tweet_time

In [108]:
# Calculates how long after the tweet was sent, the user replied to the tweet
df['time_to_respond_minutes'] = (user_tweets['timestamp'][0] - df['reply_time']).astype('timedelta64[m]')*-1

In [109]:
df.head()

Unnamed: 0,text,screen_name,followers,following,account_age,verified,tweet_count,default_prof_img,user_mentions,linked_urls,reply_time,has_hashtag,tweet_time,time_to_respond_minutes
0,@CryptoKaleo @ExplainThisBob,dreamchaserwow,984,1199,2020-12-12 02:23:53+00:00,False,18653,False,2,0,2023-04-24 19:00:51+00:00,no,2023-04-24 19:16:18+00:00,-15.0
1,@CryptoKaleo Fresh new character https://t.co/...,ARFunnySeal,2165,1686,2021-04-20 23:40:01+00:00,False,6588,False,1,0,2023-04-24 19:00:17+00:00,no,2023-04-24 19:16:18+00:00,-16.0
2,"@CryptoKaleo And when you do, I will create a ...",TheCryptoCIA,33,316,2021-10-14 09:58:41+00:00,False,1072,False,1,0,2023-04-24 18:59:37+00:00,no,2023-04-24 19:16:18+00:00,-16.0
3,@CryptoKaleo They seized the memes ser.,DustinRizzo,658,1655,2012-03-10 00:49:41+00:00,False,15200,False,1,0,2023-04-24 18:57:20+00:00,no,2023-04-24 19:16:18+00:00,-18.0
4,@CryptoKaleo The wonky stonker,Carolina4Crypto,6338,6231,2020-04-18 18:13:54+00:00,False,52638,False,1,0,2023-04-24 18:56:28+00:00,no,2023-04-24 19:16:18+00:00,-19.0


In [110]:
df = df.drop(['tweet_time','reply_time'],axis= 1)

In [111]:
# Create a variable for todays date to calculate how old the account is
today = pd.Timestamp.now()
today = today.date()

In [112]:
#calculates the accounts age in days

df['account_age_days'] = today - df['account_age'][i].date()
for i in range(len(replies)):    
    df['account_age_days'][i] = today - df['account_age'][i].date()
    df['account_age_days'][i] = df['account_age_days'][i].days

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [113]:
df['account_age_days'] = df['account_age_days'].astype('float64')

In [114]:
df = df.drop(['account_age'],axis= 1)

**Adding a column for follower to following ratio --> May be helpful**

In [115]:
df['follwers_to_following_ratio'] = (df['followers']/df['following']).round(2)

**Adding a column for average tweets per day**

In [116]:
df['avg_tweets_per_day'] = (df['tweet_count']/df['account_age_days']).round(2)

**Removes special characters, numbers, links, etc. from tweet text**

In [117]:
def clean_tweets(text):
    text = text.lower()
    text = re.sub("@[A-Za-z0-9_]+","", text)
    text = re.sub("#[A-Za-z0-9_]+","", text)
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"www.\S+", "", text)
    text = re.sub('[()!?]', ' ', text)
    text = re.sub('\[.*?\]',' ', text)
    text = re.sub("[^a-z0-9]"," ", text)
    return text

df['clean_text'] = df['text'].apply(clean_tweets)
df = df.drop('text',axis=1)

**Important Note:**
* IF 'user_mentions' > 1, then that means they mentioned someone else other than the original tweet poster in their reply. This is indicative of spam
    * The default number is 1 here, because it's counting the reply '@original_tweeter'(reply text)
* IF 'linked_urls' > 0, that means they have linked something in their reply. Also indicative of a spam reply
* Expect lower time to respond to be indicative of spam, as many of these accounts are automated using the twitter API
* Strangely, it seems that people are buying older twitter accounts to spam reply with, as it makes it harder to detect as spam if the account 
has been around for a while

In [118]:
df.head(5)

Unnamed: 0,screen_name,followers,following,verified,tweet_count,default_prof_img,user_mentions,linked_urls,has_hashtag,time_to_respond_minutes,account_age_days,follwers_to_following_ratio,avg_tweets_per_day,clean_text
0,dreamchaserwow,984,1199,False,18653,False,2,0,no,-15.0,863.0,0.82,21.61,
1,ARFunnySeal,2165,1686,False,6588,False,1,0,no,-16.0,734.0,1.28,8.98,fresh new character
2,TheCryptoCIA,33,316,False,1072,False,1,0,no,-16.0,557.0,0.1,1.92,and when you do i will create a meme coin fo...
3,DustinRizzo,658,1655,False,15200,False,1,0,no,-18.0,4062.0,0.4,3.74,they seized the memes ser
4,Carolina4Crypto,6338,6231,False,52638,False,1,0,no,-19.0,1101.0,1.02,47.81,the wonky stonker


---

# Writing to an Excel file, so we can gather as many entries as possible, before making a ML model:

In [120]:
import pandas as pd

with pd.ExcelWriter('Twitter_data.xlsx', mode= 'a', engine='openpyxl', if_sheet_exists='new') as writer:
    df.to_excel(writer)

In [121]:
for i in range(len(replies)):
    print(f"Tweet {i}: {replies[i].full_text}")

Tweet 0: @CryptoKaleo @ExplainThisBob
Tweet 1: @CryptoKaleo Fresh new character https://t.co/EeE5zwg9AD
Tweet 2: @CryptoKaleo And when you do, I will create a meme coin for the new character
Tweet 3: @CryptoKaleo They seized the memes ser.
Tweet 4: @CryptoKaleo The wonky stonker
Tweet 5: @CryptoKaleo This guy!! https://t.co/rXqTKJfIEk
Tweet 6: @CryptoKaleo Make it a duck
Tweet 7: @CryptoKaleo The folks over @ @garbagefriends have memes for every occasion. Whether your bags are Pumping or Dumping, GF has you covered!! 🗑️🚮 https://t.co/TeXSMXzpt6
Tweet 8: @CryptoKaleo https://t.co/BoP7egRIS3
Tweet 9: @CryptoKaleo Lol right ?!
Tweet 10: @CryptoKaleo I see Pepe as a symbol by Matt Furie
Tweet 11: @CryptoKaleo https://t.co/lHHMEqRNVS
Tweet 12: @CryptoKaleo when kaleo s meme presale?? asking for a friend ✍️
Tweet 13: @CryptoKaleo facts
Tweet 14: @CryptoKaleo facts
Tweet 15: @CryptoKaleo Can we do a duck please 

No bias in me saying that 

I just like ducks 

Has nothin to do with my NFT 

R