In [43]:
from statsmodels.tsa.arima_model import ARIMA
import pandas as pd
from urllib.request import Request, urlopen
import json
import time
import requests
from datetime import datetime as dt
import os
import sys
from apis import get_TrackICOAPI
import tweepy
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import re
from collections import Counter

consumer_key = os.getenv("TWITTER_PUBLIC_API")
consumer_secret = os.getenv("TWITTER_SECRET_KEY")
nltk.download('vader_lexicon')

addl_stopwords = [',','`', '', 'rt', 'http', 'https', 'RT', 'BTC', 'bitcoin', 'ETH', 'LTC', 'XRP', 'co', 'crypto', 'blockchain', 'cryptocurrency', 'cripto', 'litecoin']

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\cscat\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [23]:
def forecast_price(df):
    '''
    Requires df with ticker index
    '''
    fut_df = pd.DataFrame()
    forecast = {}
    df = df.fillna(0)
    for i in range(len(df)):
        try:
            ticker = df.index[i]
            ohlcv = get_crypto_daily_price(ticker)
            fut_df[ticker] = ohlcv['close']
            for i in progressbar(range(10), f"Query Success for {ticker}!, Preparing data for next call: ", 40):
                    time.sleep(0.1)

        except:
            print(f'Unsuccessfull Query for {ticker} :( Setting value to 0')
            fut_df[ticker] = 0

    for i in df.columns:
        try:
            model = ARIMA(df[i], order=(6,1,2))
            results = model.fit()
            forecast[i] = results.forecast(steps=20)[0]
            fut_df = pd.DataFrame.from_dict(forecast)
        except:
            print(f'Passing on {i}')
        
    return fut_df

In [24]:
def get_crypto_daily_price(ticker):
    ticker = ticker.upper()
    api_key = os.getenv("CC_API")
    crypto_df = pd.DataFrame()
    url = f"https://min-api.cryptocompare.com/data/v2/histoday?fsym={ticker}&tsym=USD&allData=true&api_key={api_key}"
    raw_data = read_json(url)
    df = pd.DataFrame(raw_data['Data']['Data'])
    df['time'] = pd.to_datetime(df['time'],unit='s')
    df.set_index(df['time'], inplace=True)
    df['close'] = df['close'].astype(float)
    df['var'] = df['close'].pct_change()
    return df.dropna()

In [25]:
def read_json(url):
    request = Request(url)
    response = urlopen(request)
    data = response.read()
    url2 = json.loads(data)
    return url2

In [26]:
def progressbar(it, prefix="", size=60, file=sys.stdout):
    count = len(it)
    def show(j):
        x = int(size*j/count)
        file.write("%s[%s%s] %i/%i\r" % (prefix, "#"*x, "."*(size-x), j, count))
        file.flush()        
    show(0)
    for i, item in enumerate(it):
        yield item
        show(i+1)
    file.write("\n")
    file.flush()

In [27]:
track_df = get_TrackICOAPI(2)
print(track_df.head())

                     country    platform         pre_ico_end  \
Ticker                                                         
EZ365   2019-11-01T01:59:00Z    Ethereum 2019-09-22 15:59:00   
FUNTO                 Turkey    Ethereum 2019-11-01 01:59:00   
TYC                  Germany    Ethereum 2019-10-31 00:00:00   
Hawk                   China  Blockchain 2019-08-30 00:00:00   
ORX           United Kingdom    Ethereum 2019-11-01 01:59:00   

             pre_ico_start rating   status          Name type  \
Ticker                                                          
EZ365  2019-09-05 16:00:00    4.6   Closed         EZ365  IEO   
FUNTO  2019-11-01 01:59:00    4.1  Ongoing    FunnyToken  ICO   
TYC    2019-07-22 00:00:00    5.0  Ongoing        Tycoon  ICO   
Hawk   2019-08-15 00:00:00    3.5  Ongoing  Hawk Network  ICO   
ORX    2019-11-01 01:59:00    4.8   Closed       Orionix  ICO   

                       End               Start          Duration  \
Ticker                     

In [28]:
x = forecast_price(track_df)

  import sys


Unsuccessfull Query for EZ365 :( Setting value to 0
Unsuccessfull Query for FUNTO :( Setting value to 0
Unsuccessfull Query for TYC :( Setting value to 0
Unsuccessfull Query for Hawk :( Setting value to 0
Unsuccessfull Query for ORX :( Setting value to 0
Unsuccessfull Query for DNP :( Setting value to 0
Query Success for GT!, Preparing data for next call: [########################################] 10/10
Unsuccessfull Query for QI :( Setting value to 0
Query Success for MDC!, Preparing data for next call: [########################################] 10/10
Unsuccessfull Query for MEQ :( Setting value to 0
Unsuccessfull Query for  :( Setting value to 0
Unsuccessfull Query for ASR :( Setting value to 0
Unsuccessfull Query for SPW :( Setting value to 0
Unsuccessfull Query for KTS :( Setting value to 0
Unsuccessfull Query for BRIK :( Setting value to 0
Unsuccessfull Query for  :( Setting value to 0
Unsuccessfull Query for PXP :( Setting value to 0
Unsuccessfull Query for MNT :( Setting value t



Passing on pre_ico_end




Passing on pre_ico_start
Passing on rating
Passing on status
Passing on Name
Passing on type




Passing on End




Passing on Start




Passing on Duration


  newparams = ((1-np.exp(-params))/(1+np.exp(-params))).copy()
  tmp = ((1-np.exp(-params))/(1+np.exp(-params))).copy()


Passing on pre_Duration




In [57]:
def twitter_df_score(df, N): 
    '''
    Scores an entire Dataframe of coins based on the last 'N' tweets.  Returns a dataframe of scores with a 
    '''
    scores = []
    df2=pd.DataFrame()
    num = 0
    for name in df.Name:
        num += 1
        search_term = str(name)
        print(f"Searching and Scoring {search_term}, Tweet #{num} of {len(df)}")
        tweet_df = get_twitter_scores(search_term, N)
        score = {name :{
                'Compound' : tweet_df.Compound.mean(),
                'Positive' : tweet_df.Positive.mean(),
                'Negative' : tweet_df.Negative.mean(),
                'Neutral' : tweet_df.Neutral.mean(),
        }}
        scores.append(score)
        print(f"{name} scored")
        for i in progressbar(range(10), "Waiting for Twitter Rate Limit: ", 40):
            time.sleep(1) # any calculation you need
    print(f"Scoring of {len(scores)} tweet concluded, creating dataframe")
    
    for item in scores:
        df1=pd.DataFrame.from_dict(item).T
        df2 = pd.concat([df1,df2], sort = True)
#    ndf = pd.concat([df, df2])

    return df2

In [58]:
def tokenizer(text):
    """Tokenizes text."""
    text = word_tokenize(text)
    text = [word.lower() for word in text]
    regex = re.compile("[^a-zA-Z ]")
    text = [regex.sub('', word) for word in text]
    sw = set(stopwords.words('english') + addl_stopwords)
    lemmatizer = WordNetLemmatizer()
    text = [lemmatizer.lemmatize(word) for word in text]
    clean_text = [word for word in text if word not in sw]
    return clean_text

def token_count(tokens, N=10):
    """Returns the top N tokens from the frequency count"""
    return Counter(tokens).most_common(N)


# Functions for Twitter


def get_tweets_list(topic_of_tweet, num_of_tweets):
    '''
    Returns a dataframe of the most recent 'N' tweets from Twitter tokenized and counted.
    
    Arguements: `topic_of_tweet` : str; what hashtag is being searched 
                'num_of_tweets' : int; how many tweet do you want returned
    '''
    text,time, word_list, word_count=[],[],[],[]
    auth = tweepy.AppAuthHandler(consumer_key, consumer_secret)
    api = tweepy.API(auth)
    for tweet in tweepy.Cursor(api.search, q=topic_of_tweet, tweet_mode='extended').items(num_of_tweets):
        text.append(tweet.full_text),
        time.append(tweet.created_at)
    tweets_df = pd.DataFrame({'Tweet':text}, index=time)
    [word_list.append(tokenizer(text)) for text in tweets_df.Tweet]
    tweets_df['Tokens'] = word_list
    [word_count.append(token_count(token)) for token in tweets_df.Tokens]
    tweets_df['Word_Count'] = word_count
    
    return tweets_df

def twitter_sent_analysis(tweet_df):    
    tweet_sentiments, comp, pos, neg, neu = [],[],[],[],[]
    analyzer = SentimentIntensityAnalyzer()
    for tweet in tweet_df.Tweet:
        sentiment = analyzer.polarity_scores(tweet),
        comp.append(sentiment[0]["compound"]),
        pos.append(sentiment[0]["pos"]),
        neg.append(sentiment[0]["neg"]),
        neu.append(sentiment[0]["neu"]),
  
    tweet_df['Compound'] = comp
    tweet_df['Positive'] = pos
    tweet_df['Negative'] = neg
    tweet_df['Neutral'] = neu

    return tweet_df

def count(df):
    '''
    Takes a DataFrame with a "compund" column and returns a basic count of positive, neutral, and negative sentiment in a dict format
    '''
    positive_count, negative_count, neutral_count = 0,0,0
    for i in df['Compound']:
        if i >= 0.05:
            positive_count += 1
        elif i <= -0.05:
            negative_count += 1
        else:
            neutral_count += 1
    count={
        'Positive Tweets': positive_count,
        'Neutral Tweets': neutral_count,
       'Negavtive Tweets': negative_count
    }
    return count

def get_twitter_scores(topic_of_tweet, num_of_tweets):
    df = get_tweets_list(topic_of_tweet, num_of_tweets)
    df = twitter_sent_analysis(df)
    
    return df

In [59]:
df = pd.read_csv('../../data/cleandata/track_cmc_merged_df.csv')

In [61]:
twitter_df = twitter_df_score(df, 100)

Searching and Scoring  Ethereum, Tweet #1 of 1010
 Ethereum scored
Waiting for Twitter Rate Limit: [########################################] 10/10
Searching and Scoring  XRP, Tweet #2 of 1010
 XRP scored
Waiting for Twitter Rate Limit: [########################################] 10/10
Searching and Scoring  Tether, Tweet #3 of 1010
 Tether scored
Waiting for Twitter Rate Limit: [########################################] 10/10
Searching and Scoring  Binance Coin, Tweet #4 of 1010
 Binance Coin scored
Waiting for Twitter Rate Limit: [########################################] 10/10
Searching and Scoring  EOS, Tweet #5 of 1010
 EOS scored
Waiting for Twitter Rate Limit: [########################################] 10/10
Searching and Scoring  Stellar, Tweet #6 of 1010
 Stellar scored
Waiting for Twitter Rate Limit: [########################################] 10/10
Searching and Scoring  Monero, Tweet #7 of 1010
 Monero scored
Waiting for Twitter Rate Limit: [##################################

TweepError: Twitter error response: status code = 429

In [48]:
twitter_df.head()

Unnamed: 0,Compound,Negative,Neutral,Positive
Tether,0.31845,0.0,0.87615,0.12385
XRP,0.125855,0.03695,0.90655,0.0565
Ethereum,0.199015,0.0167,0.91795,0.06535
