# Libraries

In [8]:
import requests
import time
import pandas as pd
from multiprocessing import Process
import os
from pytrends.request import TrendReq
import re
import time
import datetime

# Environment variables

In [2]:
datapath = "../data"

In [6]:
def get_timestamp():
    return datetime.datetime.fromtimestamp(time.time()) \
    .strftime('%Y-%m-%d %H:%M:%S')

# Binance

In [182]:
def base_df(path):
    """"""

    df = pd.DataFrame(columns=["time", "open", "high", "low", "close", "volume", 
                          "closetime", "quote-volume", "ntrades", "taker-base-vol",
                          "taker-quote-vol"])
    df.index = df["time"]
    df.drop("time", axis = 1, inplace=True)
    df.to_csv(path)

def get_from_binance(fsym, tsym, interval, limit):
    
    base_url = "https://api.binance.com/api/v1/klines"
    params = {"symbol" : fsym+tsym, "interval": interval, "limit": limit}
    filename = "btcdata-{}.csv".format(interval)
    fullpath = datapath + "/" + filename
    
    response = requests.get(base_url, params=params)
        
    # Escogemos la información de interés
    df = pd.DataFrame(response.json())

    df.columns = ["time", "open", "high", "low", "close", "volume", 
                 "closetime", "quote-volume", "ntrades", "taker-base-vol",
                 "taker-quote-vol", "nothing"]

    df.index = pd.to_datetime(df["time"], unit = "ms")
    df.drop("time", axis = 1, inplace = True)
    
    dffull = pd.read_csv(fullpath, index_col = 0)
    dffull = dffull.append(df)
    dffull.to_csv(fullpath)
        

def get_data(fsym, tsym, interval, limit, step_time):
    """"""    
    filename = "btcdata-{}.csv".format(interval)
    fullpath = datapath + "/" + filename
    if not os.path.exists(fullpath):
        base_df(fullpath)
    
    # Tratamiento inicial (500 registros máximo)
    get_from_binance(fsym, tsym, interval, 500)
    
    # Obtenemos los datos por día
    while True:
        print("Update of file {} at {}".format(fullpath, get_timestamp()))
        time.sleep(limit*step_time)
        # Tratamiento inicial
        get_from_binance(fsym, tsym, interval, limit)
        

# Google Trends

In [162]:
def get_gtrends_index(terms, timeframe, timesleep):
    
    pytrend = TrendReq()
    filename = "bitcoin-gtrends.csv"
    fullpath = datapath + "/" + filename
    df = pd.DataFrame(columns=["date"] + terms + ["isPartial"])
    df.index = df["date"]
    df.drop("date", axis = 1, inplace=True)
    df.to_csv(fullpath)
    
    while True:
        pytrend.build_payload(kw_list=related_terms, timeframe=timeframe)
        interest_over_time_df = pytrend.interest_over_time()
        
        dffull = pd.read_csv(fullpath, index_col = 0)
        dffull = dffull.append(interest_over_time_df)
        dffull.to_csv(fullpath)
        print("Update of file {} at {}".format(fullpath, get_timestamp()))
        time.sleep(timesleep)
        
related_terms = ['Bitcoin', "Cryptocurrency", "Blockchain", "BTC", "BTCUSD"]
timeframe = 'now 1-H'

# Twitter

In [6]:
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import json
import textblob as textblob
from textblob import TextBlob

# https://apps.twitter.com/
#consumer key, consumer secret, access token, access secret.
ckey="H80l007aSO4b8TIJrkeBJhFUt"
csecret="Lts38d9FkBt8XSid0ZKK8wMn2TEoOh3uep3S78JJEx8OsnH1lP"
atoken="100423068-uNVEchqU6O4Mov4fdzpieSJCKLMoS41ZyXdOy5m6"
asecret="hRw0zw68Wcwjj47LZS50cENrNcmiMNebRPuCTOihVzmKE"

filename = 'tweets.json'
fullpath = datapath + "/" + filename
tweet_struct = {"tweets": []}
empty = ""

# Getting 3 tweets per second
#override tweepy.StreamListener to add logic to on_status
class StdOutlistener(StreamListener):
    def on_data(self, data):
        # Get tweet
        all_data = json.loads(data)
        
        # Checking if there are data
        if "text" in all_data:
        
            # Normalizing tweet text
            tweet_text = normalize_tweet(all_data["text"])

            # Checking if it is empty
            if tweet_text != empty:
            
                # Analyzing tweet (polarity and subjectivity)
                tweet_analysis = TextBlob(tweet_text)

                # If exist content and polarity is different to 0
                if tweet_analysis.sentiment.polarity != 0:

                    # Aggregating information
                    tweet_dict = {
                                  "created_at": all_data["created_at"],
                                  "text": tweet_text,
                                  "polarity": tweet_analysis.sentiment.polarity,
                                  "subjectivity": tweet_analysis.sentiment.subjectivity
                                 }

                    # Open json text file to save the tweets
                    with open(fullpath, 'a') as tf:

                        # Write the json data directly to the file
                        json.dump(tweet_dict, tf)
                        # Alternatively: tf.write(json.dumps(all_data))
                        # Write a new line
                        tf.write('\n')
                
        return True

    def on_error(self, status):
        print(status)
    
auth = OAuthHandler(ckey, csecret)
auth.set_access_token(atoken, asecret)

In [4]:
def normalize_tweet(tweet_text):
    # Removing tweet references
    tweet_text = re.sub(r"@[\w]*", "", tweet_text)
    # Removing hashtags
    tweet_text = re.sub(r"#[\w]*", "", tweet_text)
    # Removing url's
    tweet_text = re.sub(r"http\S+", "", tweet_text)
    # Removing  break lines
    tweet_text = re.sub(r"(\n)", "", tweet_text)
    # Removing anormal characters
    tweet_text = tweet_text.encode('ascii', 'ignore').decode("utf-8")
    # Removing if RT tag
    tweet_text = re.sub(r"RT : ", "", tweet_text)
    
    return tweet_text.strip()

In [5]:
def stream_tweets():
    twitterStream = Stream(auth, StdOutlistener())
    twitterStream.filter(track=['Bitcoin', 'BTC', 'BTCUSD', 'BTCUSDT'])

In [None]:
stream_tweets()

In [155]:
#a = open("../data/tweets.json", "r")
#lines = [json.loads(line) for line in a.readlines()]

# NewsApi

In [None]:
from newsapi import NewsApiClient
newsapi = NewsApiClient(api_key='16cb76b0e22b462392d7cfaf9cdbe31d')

all_articles = newsapi.get_everything(q='bitcoin',
                                      sources='bbc-news,the-verge',
                                      domains='bbc.co.uk,techcrunch.com',
                                      from_parameter='2018-04-01',
                                      language='en',
                                      sort_by='relevancy',
                                      page=1)

# Programa principal

In [186]:
if __name__ == '__main__':
    if not os.path.exists(datapath):
        os.mkdir(datapath)
    
    # Launching parallel proceses
    p = Process(target=get_data, args=("BTC", "USDT", "1m", 60, 60,))
    p.start()
    p2 = Process(target=get_data, args=("BTC", "USDT", "15m", 4, 900))
    p2.start()
    p3 = Process(target=get_data, args=("BTC", "USDT", "1h", 1, 3600))
    p3.start()
    p4 = Process(target=get_gtrends_index, args=(related_terms, timeframe, 3600))
    p4.start()
    p5 = Process(target=stream_tweets)
    p5.start()

Process Process-24:
Process Process-22:
Process Process-23:
Process Process-25:
Process Process-21:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/Users/justoherreroamoros/anaconda/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/justoherreroamoros/anaconda/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/justoherreroamoros/anaconda/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/justoherreroamoros/anaconda/lib/python3.6/multiprocessing/process.py", line 249, in _bootstrap
    self.run()
  File "/Users/justoherreroamoros/anaconda/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/justoherreroamoros/anaconda/lib/python3.6/site-packages/reques