In [1]:
import os, sys, re
import pandas as pd, numpy as np, scipy as sp
import requests
from dateutil import parser
from joblib import Memory
from bson import json_util
    
%load_ext cythonmagic

In [None]:
data_directory = os.path.join('data')
output_directory = os.path.join(os.path.expanduser("~"), 'work', 'stockmeme', 'analysis')

if not os.path.exists(output_directory):
    os.makedirs(output_directory)

joblib_cache = Memory(cachedir=os.path.join(output_directory, 'joblib'), verbose=0)

pd.set_option('notebook_repr_html', True)
pd.set_option('precision', 4)
pd.set_option('max_columns', 500)
pd.set_option('max_rows', 50)
pd.set_option('max_colwidth', 500)
pd.set_option('column_space', 100)
pd.set_option('use_inf_as_null', True)

np.set_printoptions(linewidth=200, precision=15, suppress=True)

!pwd

In [3]:
@joblib_cache.cache
def load_stocktwits(year='2013'):
    return pd.load('data/stocktwits_' + year + '.pandas')

In [4]:
def clean_word(word):
    if '#' in word:
        word = word.split('#')[0]
    return word.strip().lower()

In [5]:
with open( os.path.join(data_directory, 'positive_words.txt') ) as f:
    positive_words = set( clean_word(word) for word in f.readlines())
with open( os.path.join(data_directory, 'negative_words.txt') ) as f:
    negative_words = set( clean_word(word) for word in f.readlines())

In [6]:
@joblib_cache.cache
def load_rankings():
	rankings = {}
	with open( os.path.join(data_directory, 'stocktwits_rankings_full.csv') ) as f:
		for line in f:
			line = line.strip().split(',')
			rankings[line[0].lower()] = float(line[1])
	return rankings

In [7]:
df = load_stocktwits()
rankings = load_rankings()

In [8]:
def calculate_score(text):
    tokens = set(token.lower() for token in text.split(' ') if '$' not in token)
    positive_tokens = positive_words.intersection(tokens)
    negative_tokens = negative_words.intersection(tokens)
    return (positive_tokens, negative_tokens)

In [9]:
df = df[df.tickers.map(lambda x: len(x)) == 1][:1000]
df['score'] = df['body'].map(lambda x: calculate_score(x))   #returning column of negative & positive wds
df['positive_score'] = df['score'].map(lambda x: len(x[0]))   ## of positive wds in score coln
df['negative_score'] = df['score'].map(lambda x: len(x[1]))
df['User Ranking'] = df['username'].map(lambda x: rankings.get(x) if x in rankings else -1)
## rankings is a dictionary where nodes are usernames & values are the rankings of user
df = df[df['User Ranking'] > -1]

df.head(50)

Unnamed: 0_level_0,id,username,body,tickers,score,positive_score,negative_score,User Ranking
created_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2013-01-01 00:00:55+00:00,11226191,afernandez321,$F Happy New Year everyone. Lets make it a great trading year!!,[F],"(set([great, make, happy]), set([make]))",3,1,4636
2013-01-01 00:01:26+00:00,11226194,tillman,$VXX wholy shorts batman lets jump ship....no problem Robin only problem is its a bit late now......,[VXX],"(set([]), set([bit, problem]))",0,2,1622
2013-01-01 00:02:02+00:00,11226197,afernandez321,$F ford is early overnight. I am expecting a pull back soon.,[F],"(set([back]), set([]))",1,0,4636
2013-01-01 00:02:53+00:00,11226201,drivethelane2001,@dawgpac as u know VITRASE initially granted 3 years. FDA later extended from 3-5 years . FDA decided that nce appropriate on moiety .$AMRN,[AMRN],"(set([appropriate, know]), set([]))",2,0,3579
2013-01-01 00:03:04+00:00,11226204,afernandez321,$F overbought. Sorry. This damn autocorrect on my iPhone.,[F],"(set([]), set([damn]))",0,1,4636
2013-01-01 00:05:25+00:00,11226212,bigbuddha,$CBOU Pretty lame that Caribou charges more for a small coffee than Starbucks. And the WIFI here is slow.,[CBOU],"(set([pretty]), set([lame]))",1,1,6358
2013-01-01 00:05:48+00:00,11226213,toddstrade,$GC_F weekly 50% retrace $1668 holding...Possible bullish seed wave if momentum follows-through http://stks.co/eHCo,[GC_F],"(set([]), set([]))",0,0,741
2013-01-01 00:06:50+00:00,11226219,tillman,$TZA oh deary me all those shorts assuming fiscal death.....Market decides when to short not opinions,[TZA],"(set([]), set([short]))",0,1,1622
2013-01-01 00:06:57+00:00,11226220,drivethelane2001,"@dawgpac many many weak stocks up today . I&#39;ll take 5% + gain . I&#39;m interested in VITRASE from nce angle . ISTA was purchased 4 , 500m $AMRN",[AMRN],"(set([interested, gain]), set([]))",2,0,3579
2013-01-01 00:08:54+00:00,11226229,reuvenohr,"$AAPL andrew p-fork w/ speedlines, creates current resistance. ty @fibline for horizontal speedlines http://stks.co/lHhC",[AAPL],"(set([]), set([]))",0,0,167


In [16]:
user_ranking_cutoff = 10000
score_cutoff = 2        

In [17]:
df[ (df['negative_score'] >= score_cutoff) & (df['User Ranking'] < user_ranking_cutoff)].head(100)

Unnamed: 0_level_0,id,username,body,tickers,score,positive_score,negative_score,User Ranking
created_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2013-01-01 00:01:26+00:00,11226194,tillman,$VXX wholy shorts batman lets jump ship....no problem Robin only problem is its a bit late now......,[VXX],"(set([]), set([bit, problem]))",0,2,1622
2013-01-01 02:33:40+00:00,11226653,frankcapra03,"$AAPL News Wire just reported Congress reached a deal and will avoid fiscal cliff, nothing like down to the wire 2.5 hrs to spare! Cheers!",[AAPL],"(set([deal, just, like]), set([avoid, deal]))",3,2,463
2013-01-01 02:53:08+00:00,11226700,reuvenohr,&quot;@ripper11: $AAPL with deal reached perhaps pre earnings run will begin&quot;&lt;-and january effect,[AAPL],"(set([deal]), set([run, deal]))",1,2,167
2013-01-01 08:12:03+00:00,11227177,reuvenohr,"&quot;If u want to be successful, you need to think of the long run and ignore the outcomes of individual trades.&quot; - curtis faith (turtle) $study",[STUDY],"(set([faith]), set([need, run]))",1,2,167
2013-01-01 14:00:41+00:00,11227485,jdells06,@tman2k @tg33316 agree on Kass. Also bear in mind his durations are so short you would never know - best ignored. Happy new year!! $aapl,[AAPL],"(set([mind, know, best, happy]), set([short, mind]))",4,2,4161
2013-01-01 14:50:58+00:00,11227673,mainetrader,&quot;@WolfpackInvest: $VRNG They can raise all the taxes they want &amp; does not dent the fiscal issues we have ... downsize govt period.&quot;/AGREE,[VRNG],"(set([]), set([raise, dent]))",0,2,7961
2013-01-01 15:46:45+00:00,11227881,dawgpac,$AMRN PS gotta like those projected Vascepa sales on p18. Although Ram at Aegis sees $6-$10B at peak. Like that even better. AMRN&#39;s dilemma,[AMRN],"(set([even, like]), set([even, dilemma]))",2,2,2774
2013-01-01 16:01:07+00:00,11227929,afernandez321,$F seems like 13.05 was the resistance but if numbers are solid on the 3rd. It should blow right through.. We could get another 1-2..,[F],"(set([right, like]), set([blow, get]))",2,2,4636
2013-01-01 16:19:00+00:00,11228010,mkarias1,@ftatrader I know what you mean as I held lots of $VIX calls but sold too early. I could have triple my money if I held it to last Fri.,[VIX],"(set([know]), set([too, mean]))",1,2,6501
2013-01-01 16:45:17+00:00,11228125,nyweatherguy,"$VIX After a 1-2 day rally, assuming we get a deal, brace for more market volatility as 1/2 FISCAL CLIFF &amp; debt ceiling still loom.",[VIX],"(set([]), set([get, volatility]))",0,2,6192


In [18]:
df[ (df['positive_score'] >= score_cutoff) & (df['User Ranking'] < user_ranking_cutoff)].head(100)

Unnamed: 0_level_0,id,username,body,tickers,score,positive_score,negative_score,User Ranking
created_at,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2013-01-01 00:00:55+00:00,11226191,afernandez321,$F Happy New Year everyone. Lets make it a great trading year!!,[F],"(set([great, make, happy]), set([make]))",3,1,4636
2013-01-01 00:02:53+00:00,11226201,drivethelane2001,@dawgpac as u know VITRASE initially granted 3 years. FDA later extended from 3-5 years . FDA decided that nce appropriate on moiety .$AMRN,[AMRN],"(set([appropriate, know]), set([]))",2,0,3579
2013-01-01 00:06:57+00:00,11226220,drivethelane2001,"@dawgpac many many weak stocks up today . I&#39;ll take 5% + gain . I&#39;m interested in VITRASE from nce angle . ISTA was purchased 4 , 500m $AMRN",[AMRN],"(set([interested, gain]), set([]))",2,0,3579
2013-01-01 00:16:29+00:00,11226260,tohio,"&quot;@TheCondor: $AAPL soon the op system for a car will be an option just like phones. You want iOS, Android, Windows?&quot; no windows please",[AAPL],"(set([please, like, just]), set([]))",3,0,1924
2013-01-01 00:30:56+00:00,11226306,valuewalk,BREAKING @WSJbusiness Two Best Buy Directors Resign http://stks.co/jI6H $BBY,[BBY],"(set([buy, best]), set([]))",2,0,583
2013-01-01 02:33:40+00:00,11226653,frankcapra03,"$AAPL News Wire just reported Congress reached a deal and will avoid fiscal cliff, nothing like down to the wire 2.5 hrs to spare! Cheers!",[AAPL],"(set([deal, just, like]), set([avoid, deal]))",3,2,463
2013-01-01 03:16:53+00:00,11226755,crazyfasteddy,Ok hold it... Not that it means anything.. DOW futs Closed at 13104; AH 13027 up 250 fair value AH 13032...$SPX 1426 AH 1420 up 36 WTF???,[SPX],"(set([value, fair]), set([]))",2,0,968
2013-01-01 04:19:52+00:00,11226901,nycspeeder,@SirIsaacNewton can you send me the link to the animation you did of $aapl s decent since $700+ i was telling colleague,[AAPL],"(set([colleague, decent]), set([]))",2,0,1293
2013-01-01 05:02:14+00:00,11226978,humble747,&quot;@DoctorRX: @SirIsaacNewton Great to see you here on StockTwits -- a great addition for $AAPL followers in awhile. Happy New Year!&quot;,[AAPL],"(set([great, happy]), set([]))",2,0,2171
2013-01-01 05:08:03+00:00,11226990,humble747,&quot;@SirIsaacNewton: $AAPL MOVIE. 55% return for the FEAR/GREED indicator just 15 trades for the year. AWESOME. http://stks.co/gIAY&quot;&gt;THANKS,[AAPL],"(set([return, just]), set([]))",2,0,2171
