# Setup

In [4]:
from tqdm import tqdm
import pandas as pd
import numpy as np
from nltk.tokenize import RegexpTokenizer
import glob
import datetime

# bitcointalk Data

In [2]:
path =r'./cleaned_data/' # use your path
allFiles = glob.glob(path + "/*.csv")
df_btalk = pd.DataFrame()

list_ = []
for file_ in allFiles:
    df_btalk = pd.read_csv(file_,index_col=None)
    list_.append(df_btalk)

df_btalk = pd.concat(list_)
df_btalk.reset_index(inplace=True, drop=True)

In [11]:
df_btalk.head(2)

Unnamed: 0_level_0,category_id,topic_id,topic_title,message_number,message_author,message_text,quoteheader
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-01-12 22:20:00,57,2739481,"BITCOIN passing $20,000 and when the next bull...",1,Goosebump,"Hey everyone, I&#039;m not sure how many of yo...",
2018-01-12 23:32:00,57,2739481,"BITCOIN passing $20,000 and when the next bull...",2,among,"i'v watched, its good keep it up, i like that ...","['<div class=""quoteheader""><a href=""https://bi..."


In [6]:
date_time = []
for i in df_btalk.timestamp.values:
    utc_time = datetime.datetime.utcfromtimestamp(i)
    utc_time.strftime("%Y-%m-%d %H:%M:%S (UTC)")
    date_time.append(utc_time.strftime("%Y-%m-%d %H:%M:%S"))
date_time = pd.to_datetime(date_time)
date_time = date_time.map(lambda x: x.replace(second=0))

In [7]:
df_btalk['timestamp'] = pd.to_datetime(date_time)
df_btalk = df_btalk.set_index('timestamp')

In [10]:
df_btalk.head(2)

Unnamed: 0_level_0,category_id,topic_id,topic_title,message_number,message_author,message_text,quoteheader
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-01-12 22:20:00,57,2739481,"BITCOIN passing $20,000 and when the next bull...",1,Goosebump,"Hey everyone, I&#039;m not sure how many of yo...",
2018-01-12 23:32:00,57,2739481,"BITCOIN passing $20,000 and when the next bull...",2,among,"i'v watched, its good keep it up, i like that ...","['<div class=""quoteheader""><a href=""https://bi..."


In [28]:
df_btalk.index = df_btalk.index.map(lambda x: x.replace(second=0))

# bitcoin price local min/max Data

In [23]:
df_min_btc = pd.read_csv('min_btc.csv', parse_dates=['timestamp']).set_index('timestamp')
df_max_btc = pd.read_csv('max_btc.csv', parse_dates=['timestamp']).set_index('timestamp')

In [27]:
print(df_min_btc.head(2))
print(df_max_btc.head(2))

                          data
timestamp                     
2017-07-16 13:00:00  1778.8975
2017-09-15 11:25:00  2989.1900
                          data
timestamp                     
2017-06-12 01:35:00  2999.9775
2017-09-02 02:20:00  4979.9875


-------------------

take a window for min/max date

In [33]:
num_minutes = 60*6
res = [[((i - datetime.timedelta(minutes=x)), 'min') for x in range(0, num_minutes)] for i in df_min_btc.index]
res.extend([[((i - datetime.timedelta(minutes=x)), 'max') for x in range(0, num_minutes)] for i in df_max_btc.index])

In [34]:
num_minutes = 60*6
res.extend([[((i + datetime.timedelta(minutes=x)), 'min') for x in range(0, num_minutes)] for i in df_min_btc.index])
res.extend([[((i + datetime.timedelta(minutes=x)), 'max') for x in range(0, num_minutes)] for i in df_max_btc.index])

In [40]:
spike_time_extended = [item for sublist in res for item in sublist]
time_spike_time_extended = [i[0] for i in spike_time_extended]
value_spike_time_extended = [i[1] for i in spike_time_extended]

In [43]:
spike_time_extended_df = pd.DataFrame(value_spike_time_extended, index=(time_spike_time_extended), columns =['min_max'])

In [45]:
spike_time_extended_df.index.names =['timestamp']

In [46]:
spike_btc = df_btalk.loc[spike_time_extended_df.index].dropna()

Passing list-likes to .loc or [] with any missing label will raise
KeyError in the future, you can use .reindex() as an alternative.

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike
  """Entry point for launching an IPython kernel.


In [49]:
df_final_spike_change = pd.merge(spike_time_extended_df,spike_btc,left_index=True, right_index=True)

In [50]:
df_final_spike_change

Unnamed: 0_level_0,min_max,category_id,topic_id,topic_title,message_number,message_author,message_text,quoteheader
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-06-11 19:42:00,max,57.0,1956589.0,If you have 10 bitcoin !!,76.0,lite,I will hold.. i won't sell unless price is $10...,"['<div class=""quoteheader""><a href=""https://bi..."
2017-06-11 19:58:00,max,57.0,1962322.0,Is right now a good time to buy Bitcoin / Ethe...,1.0,sabriel,"Hi, I know you guys get asked this a...",[]
2017-06-11 20:01:00,max,57.0,1887509.0,Bitcoin can reach 3000 USD in a year ?,170.0,Supercrypt,Well I am hoping for more than $3000 till the ...,"['<div class=""quoteheader""><a href=""https://bi..."
2017-06-11 20:04:00,max,57.0,786795.0,Bitcoin is dead,82.0,ktabb,lol bold predictions from 2014 here,"['<div class=""quoteheader""><a href=""https://bi..."
2017-06-11 20:05:00,max,57.0,1962239.0,Bitcoin $3000,20.0,FasTroy,"No, until now bitcoin doesn't touch the 3000$,...","['<div class=""quoteheader""><a href=""https://bi..."
2017-06-11 20:08:00,max,57.0,1962239.0,Bitcoin $3000,21.0,Despacito,"Guys, finally. Here we are. Remember the the M...",[]
2017-06-11 20:26:00,max,57.0,1953173.0,segwit effect on bitcoin price,13.0,1Referee,You got it. Important to note is that thi...,"['<div class=""quoteheader""><a href=""https://bi..."
2017-06-11 20:30:00,max,57.0,1961702.0,The price of bitcoin on halving day 2020,34.0,Slark,As always it is very hard to predict future bi...,[]
2017-06-11 20:44:00,max,57.0,1935021.0,Can bitcoin hit 3500$ in 2018? ???,181.0,Eternu,"Yes you are right, and i agree with you. For n...","['<div class=""quoteheader""><a href=""https://bi..."
2017-06-11 20:52:00,max,57.0,1936065.0,Bitcoin goes to the moon,62.0,lumeire,We've seen cycles rise and falls and we k...,"['<div class=""quoteheader""><a href=""https://bi..."


# Poitive Negative

In [66]:
pos_neg_df  = pd.read_csv('pos_neg.csv')

In [67]:
pos_neg_df.head(3)

Unnamed: 0.1,Unnamed: 0,timestamp,category_id,topic_id,topic_title,message_number,message_author,message_text,quoteheader,pos,neg,neu,compound
0,0,1514379921,57,2653937,16.5K will be the major resistance for BTC ?,1,AICoin_Official,"As you can see, price started a correction imm...",,0.082,0.041,0.876,0.9201
1,1,1514380918,57,2653937,16.5K will be the major resistance for BTC ?,2,nokati,And&nbsp; who is this Kuang Ren with such a wi...,[],0.112,0.033,0.854,0.7476
2,2,1514381025,57,2653937,16.5K will be the major resistance for BTC ?,3,fabiorem,"Yes, theres a lot of resistance at this level,...",[],0.11,0.0,0.89,0.2144


In [68]:
date_time = []
for i in pos_neg_df.timestamp.values:
    utc_time = datetime.datetime.utcfromtimestamp(i)
    utc_time.strftime("%Y-%m-%d %H:%M:%S (UTC)")
    date_time.append(utc_time.strftime("%Y-%m-%d %H:%M:%S"))
    
date_time = pd.to_datetime(date_time)
date_time = date_time.map(lambda x: x.replace(second=0))

In [69]:
pos_neg_df['timestamp'] = pd.to_datetime(date_time)
pos_neg_df = pos_neg_df.set_index('timestamp')

In [70]:
pos_neg_df = pos_neg_df.drop(['Unnamed: 0'], axis = 1)

In [71]:
pos_neg_df.index = pos_neg_df.index.map(lambda x: x.replace(second=0))

In [72]:
merged = pd.merge(spike_time_extended_df, spike_btc, how='inner',left_index=True, right_index=True)

In [73]:
merged_pos_neg

Unnamed: 0_level_0,min_max,category_id_x,topic_id_x,topic_title_x,message_number_x,message_author_x,message_text_x,quoteheader_x,category_id_y,topic_id_y,topic_title_y,message_number_y,message_author_y,message_text_y,quoteheader_y,pos,neg,neu,compound
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2017-06-11 23:50:00,max,57.0,1961913.0,3000$? when?,24.0,mundang,"The wall is starting to break, the bombs are a...","['<div class=""quoteheader""><a href=""https://bi...",57,1961913,3000$? when?,24,mundang,"The wall is starting to break, the bombs are a...","['<div class=""quoteheader""><a href=""https://bi...",,,,
2017-06-11 23:51:00,max,57.0,1961913.0,3000$? when?,25.0,helpmywallet,When is will we see $4000? And $5000?,[],57,1961913,3000$? when?,25,helpmywallet,When is will we see $4000? And $5000?,[],,,,
2017-06-11 23:54:00,max,57.0,1961913.0,3000$? when?,26.0,pealr12,10$ dollars away before reaching the 3000$!!! ...,[],57,1961913,3000$? when?,26,pealr12,10$ dollars away before reaching the 3000$!!! ...,[],,,,
2017-06-11 23:56:00,max,57.0,1961913.0,3000$? when?,27.0,lionheart78,It seems Bitcoin is going to break $3000 barri...,[],57,1961913,3000$? when?,27,lionheart78,It seems Bitcoin is going to break $3000 barri...,[],,,,
2017-06-11 23:57:00,max,57.0,1961913.0,3000$? when?,28.0,eddie13,http://www.coindesk.com/bitcoin-price-crosses-...,[],57,1961913,3000$? when?,28,eddie13,http://www.coindesk.com/bitcoin-price-crosses-...,[],,,,
2017-06-12 00:02:00,max,57.0,1961913.0,3000$? when?,29.0,topper26,Coinbase is stuck on 2998.98! Come on already!!!!,[],57,1961913,3000$? when?,29,topper26,Coinbase is stuck on 2998.98! Come on already!!!!,[],,,,
2017-06-12 00:04:00,max,57.0,1949549.0,IS BITCOIN GOING BACK DOWN?,103.0,freebutcaged,"Going up instead mate, not something everyone ...",[],57,1961913,3000$? when?,30,topper26,Kripes mad dump right now we were so close! Re...,[],,,,
2017-06-12 00:04:00,max,57.0,1961913.0,3000$? when?,30.0,topper26,Kripes mad dump right now we were so close! Re...,[],57,1961913,3000$? when?,30,topper26,Kripes mad dump right now we were so close! Re...,[],,,,
2017-06-12 00:22:00,max,57.0,1961913.0,3000$? when?,31.0,JimboToronto,? ? ? ??? All I see is rising prices incl...,"['<div class=""quoteheader""><a href=""https://bi...",57,1961913,3000$? when?,31,JimboToronto,? ? ? ??? All I see is rising prices incl...,"['<div class=""quoteheader""><a href=""https://bi...",,,,
2017-06-12 00:23:00,max,57.0,1961913.0,3000$? when?,32.0,topper26,Coinbase went down to 2975 I mean not a dump d...,[],57,1961913,3000$? when?,32,topper26,Coinbase went down to 2975 I mean not a dump d...,[],,,,
