In [45]:
# Packages
import pickle
from textblob import TextBlob
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from collections import Counter
import nltk, spacy, re, json
from nltk.corpus import stopwords
from nltk.util import ngrams
from nltk.stem import PorterStemmer
nltk.download("punkt")
nltk.download('stopwords')

# Set up
% matplotlib inline
import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_colwidth', -1)

[nltk_data] Downloading package punkt to /jet/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /jet/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


## Combine Features

### Import sentiment features

In [46]:
### import features: sentiment scores of elon's telsa related tweets
with open('../../data/features/2019_05_07_nlp_sentiments_jing.pickle', "rb") as file:
    elon_features = pickle.load(file)

In [47]:
elon_features.shape

(1077, 5)

In [48]:
elon_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1077 entries, 0 to 1076
Data columns (total 5 columns):
id              1077 non-null object
Datetime        1077 non-null object
text            1077 non-null object
sentiment       1077 non-null float64
subjectivity    1077 non-null float64
dtypes: float64(2), object(3)
memory usage: 42.1+ KB


In [49]:
len(elon_features['Datetime'].unique())

1075

In [50]:
# tweets are indexed by id rather than datetime 
elon_features[elon_features['Datetime'].duplicated()]

Unnamed: 0,id,Datetime,text,sentiment,subjectivity
503,980566103356194816,2018-04-01 22:02:00+00:00,"there are many chapters of bankruptcy and, as critics so rightly pointed out, tesla has them *all*, including chapter 14 and a half (the worst one).",-0.095238,0.550595
504,980566101124722688,2018-04-01 22:02:00+00:00,"tesla goes bankrupt palo alto, california, april 1, 2018 -- despite intense efforts to raise money, including a last-ditch mass sale of easter eggs, we are sad to report that tesla has gone completely and totally bankrupt. so bankrupt, you can't believe it.",-0.025,0.39375


In [51]:
elon_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1077 entries, 0 to 1076
Data columns (total 5 columns):
id              1077 non-null object
Datetime        1077 non-null object
text            1077 non-null object
sentiment       1077 non-null float64
subjectivity    1077 non-null float64
dtypes: float64(2), object(3)
memory usage: 42.1+ KB


### Add datetime features

In [52]:
### import features: datatime related (day, month, week, DayofWeek, BinaryTrading)
with open('../../data/features/2019_05_07_Datetime_Jeff.pickle', "rb") as file:
    datetime_features = pickle.load(file)

In [53]:
datetime_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1077 entries, 0 to 1076
Data columns (total 19 columns):
Unnamed: 0       1077 non-null int64
Datetime         1077 non-null datetime64[ns, UTC]
retweets         1077 non-null int64
favorites        1077 non-null int64
text             1077 non-null object
id               1077 non-null object
emoji            23 non-null object
isreply          1072 non-null object
replyto          529 non-null object
origin           1072 non-null float64
year             1077 non-null int64
month            1077 non-null int64
day              1077 non-null int64
date             1077 non-null object
DayofWeek        1077 non-null int64
Weekend          1077 non-null bool
Time             1077 non-null object
BinaryTrading    1077 non-null int64
DayDistance      1077 non-null int64
dtypes: bool(1), datetime64[ns, UTC](1), float64(1), int64(9), object(7)
memory usage: 152.6+ KB


In [54]:
#elon_features['Datetime'] = pd.to_datetime(elon_features['Datetime'], utc=True)

In [55]:
elon_features.shape, datetime_features.shape

((1077, 5), (1077, 19))

In [56]:
elon_features[elon_features['id'].duplicated()]

Unnamed: 0,id,Datetime,text,sentiment,subjectivity


In [57]:
datetime_features[datetime_features['id'].duplicated()]

Unnamed: 0.1,Unnamed: 0,Datetime,retweets,favorites,text,id,emoji,isreply,replyto,origin,year,month,day,date,DayofWeek,Weekend,Time,BinaryTrading,DayDistance


In [58]:
# features = elon_features.set_index('Datetime').drop(['text'], axis = 1).merge(
#     datetime_features.set_index('Datetime'), left_index=True, right_index=True, how='inner')

In [59]:
### Merge sentiment and datatime features
features = elon_features.merge(datetime_features.drop(['text', 'Datetime'], axis = 1), 
                    left_on='id', right_on='id', how='inner')

In [60]:
features.shape

(1077, 21)

In [61]:
#features[features['Datetime'].duplicated()]

### Add Google Trend Variables

In [62]:
with open('../../data/features/2019_05_07_ElonGoogleTrend_Jeff.pickle', "rb") as file:
    googletrend_features = pickle.load(file)

In [63]:
googletrend_features.shape

(1077, 16)

In [64]:
googletrend_features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1077 entries, 0 to 1076
Data columns (total 16 columns):
Unnamed: 0    1077 non-null int64
Datetime      1077 non-null datetime64[ns, UTC]
retweets      1077 non-null int64
favorites     1077 non-null int64
text          1077 non-null object
id            1077 non-null object
emoji         23 non-null object
isreply       1072 non-null object
replyto       529 non-null object
origin        1072 non-null float64
year          1077 non-null int64
month         1077 non-null int64
day           1077 non-null int64
date          1077 non-null object
DayofWeek     1077 non-null int64
gtrend        1077 non-null int64
dtypes: datetime64[ns, UTC](1), float64(1), int64(8), object(6)
memory usage: 134.7+ KB


In [65]:
features = features.merge(googletrend_features[['id', 'gtrend']], 
                    left_on='id', right_on='id', how='inner')

In [66]:
features

Unnamed: 0.1,id,Datetime,text,sentiment,subjectivity,Unnamed: 0,retweets,favorites,emoji,isreply,...,year,month,day,date,DayofWeek,Weekend,Time,BinaryTrading,DayDistance,gtrend
0,1117561885040283648,2019-04-14 22:54:00+00:00,"that tweet did take immense effort fair point tho. powerwall production is now ramping fast. tesla was cell-starved last year, so we had to switch all lines to make packs for cars, which meant powerwall production was living off scraps.",0.183333,0.538889,5,131,5201,🤣 🤣,True,...,2019,4,14,2019-04-14,6,True,22:54:00,0,1,28
1,1117519699930013696,2019-04-14 20:07:00+00:00,i’ve always thought that the fundamental good of tesla should be measured by the number of years by which it accelerates the transition to sustainable transport & energy,0.700000,0.600000,17,202,1931,,True,...,2019,4,14,2019-04-14,6,True,20:07:00,0,1,28
2,1117518012926402560,2019-04-14 20:00:00+00:00,"i’m sorry, fred, but wsj has relentlessly attacked tesla with bogus articles for over a decade. they can’t *all* be chalked up to poor reporting. where are the wsj exposé pieces on oil, coal & gas? that industry is 1000 times bigger than tesla. perhaps they’re all angels …",-0.180000,0.420000,19,269,2337,,True,...,2019,4,14,2019-04-14,6,True,20:00:00,0,1,28
3,1117509874831609856,2019-04-14 19:28:00+00:00,"very much so. there are 2.5b cars & trucks on earth. even replacing 1% of that fleet would require making 25m vehicles per year. tesla will make over 500k cars in next 12 months, but that’s a mere 2% of 25m or 0.02% of global vehicle fleet. car industry slow -> demand >> supply.",-0.041333,0.165333,24,125,955,,True,...,2019,4,14,2019-04-14,6,True,19:28:00,0,1,28
4,1117507646804807681,2019-04-14 19:19:00+00:00,"sentry mode fundamentally empowers the individual car owner over entrenched interests, in this case a corrupt traffic judge. even tesla doesn’t have access to the video unless sent to us by owner.",-0.125000,0.350000,25,170,2470,,True,...,2019,4,14,2019-04-14,6,True,19:19:00,0,1,28
5,1117347290585243648,2019-04-14 08:42:00+00:00,"there is 35 gwh/yr “theoretical capacity”, but actual max output is ~2/3. it was physically impossible to make more model 3’s in q1 due to cell constraints.",-0.048611,0.362500,31,95,1185,,True,...,2019,4,14,2019-04-14,6,True,08:42:00,0,1,28
6,1117155252795346944,2019-04-13 19:59:00+00:00,"yes, all cars with autopilot 2.0 or higher, so ~400,000 of tesla cars built to date",0.250000,0.500000,33,17,380,,True,...,2019,4,13,2019-04-13,5,True,19:59:00,0,2,28
7,1117144865299501056,2019-04-13 19:17:00+00:00,incorrect. pana cell lines at giga are only at ~24gwh/yr & have been a constraint on model 3 output since july. no choice but to use other suppliers for powerwall/powerpack cells. tesla won’t spend money on more capacity until existing lines get closer to 35gwh theoretical.,0.031250,0.418750,39,424,6190,,True,...,2019,4,13,2019-04-13,5,True,19:17:00,0,2,28
8,1117118581865476096,2019-04-13 17:33:00+00:00,tesla will start fsd computer upgrade in a few months,-0.200000,0.100000,40,63,916,,True,...,2019,4,13,2019-04-13,5,True,17:33:00,0,2,28
9,1117116982778679297,2019-04-13 17:26:00+00:00,please note that the price of the tesla full self-driving option will increase substantially over time,0.350000,0.550000,43,2129,42658,,False,...,2019,4,13,2019-04-13,5,True,17:26:00,0,2,28


In [67]:
features.shape

(1077, 22)

### Use only obs after 2015-05-23 when yahoo finance data are accessible

In [68]:
import datetime
features = features[features['date'] >= datetime.date(2015, 5, 23)]

In [69]:
features.shape

(855, 22)

In [70]:
features.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 855 entries, 0 to 854
Data columns (total 22 columns):
id               855 non-null object
Datetime         855 non-null object
text             855 non-null object
sentiment        855 non-null float64
subjectivity     855 non-null float64
Unnamed: 0       855 non-null int64
retweets         855 non-null int64
favorites        855 non-null int64
emoji            23 non-null object
isreply          854 non-null object
replyto          491 non-null object
origin           854 non-null float64
year             855 non-null int64
month            855 non-null int64
day              855 non-null int64
date             855 non-null object
DayofWeek        855 non-null int64
Weekend          855 non-null bool
Time             855 non-null object
BinaryTrading    855 non-null int64
DayDistance      855 non-null int64
gtrend           855 non-null int64
dtypes: bool(1), float64(3), int64(10), object(8)
memory usage: 147.8+ KB


In [71]:
features.shape

(855, 22)

### Add sentiment features from comments to elon's tweets

In [72]:
import h5py
import pandas as pd

with open('../../data/features/2019_05_15_Eloncomments1hour.pickle', "rb") as file:
    elon_comments = pickle.load(file)

In [73]:
elon_comments.shape

(6245, 15)

In [74]:
elon_comments.columns

Index(['retweets', 'favorites', 'CommentSentimental', 'CommentSubjectivity',
       'date', 'id', 'origin', 'countComment', 'posCommentSum',
       'negCommentSum', 'CommentSD', 'posCommentSD', 'negCommentSD',
       'negCommentPercent', 'posCommentPercent'],
      dtype='object')

In [75]:
elon_comments

Unnamed: 0,retweets,favorites,CommentSentimental,CommentSubjectivity,date,id,origin,countComment,posCommentSum,negCommentSum,CommentSD,posCommentSD,negCommentSD,negCommentPercent,posCommentPercent
0,10513,120734,63.134057,229.326642,2019-04-14 23:31:00,1117571159195668480,1117563679099240449,1379,93.863996,-30.729939,0.205439,0.245963,0.208518,0.088470,0.207397
1,10513,120734,63.134057,229.326642,2019-04-14 23:10:00,1117565769829818368,1117563679099240449,1379,93.863996,-30.729939,0.205439,0.245963,0.208518,0.088470,0.207397
2,10513,120734,63.134057,229.326642,2019-04-14 23:06:00,1117564774190075904,1117563679099240449,1379,93.863996,-30.729939,0.205439,0.245963,0.208518,0.088470,0.207397
3,10513,120734,63.134057,229.326642,2019-04-14 23:02:00,1117563769159286784,1117563679099240449,1379,93.863996,-30.729939,0.205439,0.245963,0.208518,0.088470,0.207397
4,373,19413,30.295510,123.411165,2019-04-14 22:54:00,1117561885040283648,1117553530615648256,727,50.753336,-20.457826,0.211838,0.228215,0.216880,0.101788,0.210454
5,10513,120734,63.134057,229.326642,2019-04-14 23:01:00,1117563679099240449,1117563679099240449,1379,93.863996,-30.729939,0.205439,0.245963,0.208518,0.088470,0.207397
6,373,19407,30.470510,121.227832,2019-04-14 22:46:00,1117559770955812865,1117553530615648256,715,50.240836,-19.770326,0.211555,0.228942,0.212865,0.102098,0.209790
7,372,19383,27.652166,109.879184,2019-04-14 22:21:00,1117553530615648256,1117553530615648256,681,45.409991,-17.757826,0.205035,0.226954,0.205873,0.096916,0.198238
8,133,4825,21.880338,74.959134,2019-04-14 22:18:00,1117552682141532160,1117552423373918208,402,30.832074,-8.951736,0.204717,0.214156,0.161391,0.097015,0.213930
9,133,4823,21.680338,74.625800,2019-04-14 22:17:00,1117552423373918208,1117552423373918208,400,30.632074,-8.951736,0.205082,0.214723,0.161391,0.097500,0.212500


In [76]:
#elon_comments[elon_comments['id'] != elon_comments['origin']]

In [77]:
elon_comments = elon_comments.drop(['retweets', 'favorites','date', 'origin',], axis = 1)

In [78]:
features.shape

(855, 22)

In [79]:
features = pd.merge(features, elon_comments, left_on = 'id', right_on = 'id', how = 'inner')

In [80]:
features.shape

(854, 32)

In [81]:
features

Unnamed: 0.1,id,Datetime,text,sentiment,subjectivity,Unnamed: 0,retweets,favorites,emoji,isreply,...,CommentSentimental,CommentSubjectivity,countComment,posCommentSum,negCommentSum,CommentSD,posCommentSD,negCommentSD,negCommentPercent,posCommentPercent
0,1117561885040283648,2019-04-14 22:54:00+00:00,"that tweet did take immense effort fair point tho. powerwall production is now ramping fast. tesla was cell-starved last year, so we had to switch all lines to make packs for cars, which meant powerwall production was living off scraps.",0.183333,0.538889,5,131,5201,🤣 🤣,True,...,30.295510,123.411165,727,50.753336,-20.457826,0.211838,0.228215,0.216880,0.101788,0.210454
1,1117519699930013696,2019-04-14 20:07:00+00:00,i’ve always thought that the fundamental good of tesla should be measured by the number of years by which it accelerates the transition to sustainable transport & energy,0.700000,0.600000,17,202,1931,,True,...,13.988479,38.828556,152,18.228486,-4.240006,0.237073,0.234062,0.131819,0.197368,0.414474
2,1117518012926402560,2019-04-14 20:00:00+00:00,"i’m sorry, fred, but wsj has relentlessly attacked tesla with bogus articles for over a decade. they can’t *all* be chalked up to poor reporting. where are the wsj exposé pieces on oil, coal & gas? that industry is 1000 times bigger than tesla. perhaps they’re all angels …",-0.180000,0.420000,19,269,2337,,True,...,17.575514,112.023914,419,35.958364,-18.382850,0.228340,0.209668,0.194167,0.212411,0.326969
3,1117509874831609856,2019-04-14 19:28:00+00:00,"very much so. there are 2.5b cars & trucks on earth. even replacing 1% of that fleet would require making 25m vehicles per year. tesla will make over 500k cars in next 12 months, but that’s a mere 2% of 25m or 0.02% of global vehicle fleet. car industry slow -> demand >> supply.",-0.041333,0.165333,24,125,955,,True,...,6.670488,22.575234,103,9.681724,-3.011236,0.212194,0.216935,0.120033,0.213592,0.330097
4,1117507646804807681,2019-04-14 19:19:00+00:00,"sentry mode fundamentally empowers the individual car owner over entrenched interests, in this case a corrupt traffic judge. even tesla doesn’t have access to the video unless sent to us by owner.",-0.125000,0.350000,25,170,2470,,True,...,14.872470,44.732349,171,20.153503,-5.281033,0.252651,0.241471,0.196370,0.163743,0.362573
5,1117347290585243648,2019-04-14 08:42:00+00:00,"there is 35 gwh/yr “theoretical capacity”, but actual max output is ~2/3. it was physically impossible to make more model 3’s in q1 due to cell constraints.",-0.048611,0.362500,31,95,1185,,True,...,17.645462,63.787965,266,24.291165,-6.645703,0.196506,0.187777,0.163050,0.169173,0.402256
6,1117155252795346944,2019-04-13 19:59:00+00:00,"yes, all cars with autopilot 2.0 or higher, so ~400,000 of tesla cars built to date",0.250000,0.500000,33,17,380,,True,...,117.019106,446.692991,1864,172.524221,-55.505115,0.207723,0.181679,0.192950,0.150215,0.362124
7,1117144865299501056,2019-04-13 19:17:00+00:00,incorrect. pana cell lines at giga are only at ~24gwh/yr & have been a constraint on model 3 output since july. no choice but to use other suppliers for powerwall/powerpack cells. tesla won’t spend money on more capacity until existing lines get closer to 35gwh theoretical.,0.031250,0.418750,39,424,6190,,True,...,8.536448,23.141468,96,10.807975,-2.271528,0.223476,0.192110,0.249556,0.145833,0.427083
8,1117118581865476096,2019-04-13 17:33:00+00:00,tesla will start fsd computer upgrade in a few months,-0.200000,0.100000,40,63,916,,True,...,70.712773,272.836290,1192,106.249765,-35.536992,0.204729,0.183976,0.180289,0.156040,0.345638
9,1117116982778679297,2019-04-13 17:26:00+00:00,please note that the price of the tesla full self-driving option will increase substantially over time,0.350000,0.550000,43,2129,42658,,False,...,68.523676,262.793582,1153,102.892242,-34.368566,0.205622,0.184157,0.184057,0.150911,0.343452


In [82]:
#features[features['emoji'].isna() == False]

In [83]:
features.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 854 entries, 0 to 853
Data columns (total 32 columns):
id                     854 non-null object
Datetime               854 non-null object
text                   854 non-null object
sentiment              854 non-null float64
subjectivity           854 non-null float64
Unnamed: 0             854 non-null int64
retweets               854 non-null int64
favorites              854 non-null int64
emoji                  23 non-null object
isreply                854 non-null object
replyto                491 non-null object
origin                 854 non-null float64
year                   854 non-null int64
month                  854 non-null int64
day                    854 non-null int64
date                   854 non-null object
DayofWeek              854 non-null int64
Weekend                854 non-null bool
Time                   854 non-null object
BinaryTrading          854 non-null int64
DayDistance            854 non-null int64
g

In [84]:
features['isreply'] = features['isreply'].astype(int)

In [85]:
features = features.drop(['Unnamed: 0', 'emoji', 'replyto', 'origin','retweets', 'favorites'], axis = 1)

In [86]:
features.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 854 entries, 0 to 853
Data columns (total 26 columns):
id                     854 non-null object
Datetime               854 non-null object
text                   854 non-null object
sentiment              854 non-null float64
subjectivity           854 non-null float64
isreply                854 non-null int64
year                   854 non-null int64
month                  854 non-null int64
day                    854 non-null int64
date                   854 non-null object
DayofWeek              854 non-null int64
Weekend                854 non-null bool
Time                   854 non-null object
BinaryTrading          854 non-null int64
DayDistance            854 non-null int64
gtrend                 854 non-null int64
CommentSentimental     854 non-null float64
CommentSubjectivity    854 non-null float64
countComment           854 non-null int64
posCommentSum          854 non-null float64
negCommentSum          854 non-null float

In [87]:
features

Unnamed: 0,id,Datetime,text,sentiment,subjectivity,isreply,year,month,day,date,...,CommentSentimental,CommentSubjectivity,countComment,posCommentSum,negCommentSum,CommentSD,posCommentSD,negCommentSD,negCommentPercent,posCommentPercent
0,1117561885040283648,2019-04-14 22:54:00+00:00,"that tweet did take immense effort fair point tho. powerwall production is now ramping fast. tesla was cell-starved last year, so we had to switch all lines to make packs for cars, which meant powerwall production was living off scraps.",0.183333,0.538889,1,2019,4,14,2019-04-14,...,30.295510,123.411165,727,50.753336,-20.457826,0.211838,0.228215,0.216880,0.101788,0.210454
1,1117519699930013696,2019-04-14 20:07:00+00:00,i’ve always thought that the fundamental good of tesla should be measured by the number of years by which it accelerates the transition to sustainable transport & energy,0.700000,0.600000,1,2019,4,14,2019-04-14,...,13.988479,38.828556,152,18.228486,-4.240006,0.237073,0.234062,0.131819,0.197368,0.414474
2,1117518012926402560,2019-04-14 20:00:00+00:00,"i’m sorry, fred, but wsj has relentlessly attacked tesla with bogus articles for over a decade. they can’t *all* be chalked up to poor reporting. where are the wsj exposé pieces on oil, coal & gas? that industry is 1000 times bigger than tesla. perhaps they’re all angels …",-0.180000,0.420000,1,2019,4,14,2019-04-14,...,17.575514,112.023914,419,35.958364,-18.382850,0.228340,0.209668,0.194167,0.212411,0.326969
3,1117509874831609856,2019-04-14 19:28:00+00:00,"very much so. there are 2.5b cars & trucks on earth. even replacing 1% of that fleet would require making 25m vehicles per year. tesla will make over 500k cars in next 12 months, but that’s a mere 2% of 25m or 0.02% of global vehicle fleet. car industry slow -> demand >> supply.",-0.041333,0.165333,1,2019,4,14,2019-04-14,...,6.670488,22.575234,103,9.681724,-3.011236,0.212194,0.216935,0.120033,0.213592,0.330097
4,1117507646804807681,2019-04-14 19:19:00+00:00,"sentry mode fundamentally empowers the individual car owner over entrenched interests, in this case a corrupt traffic judge. even tesla doesn’t have access to the video unless sent to us by owner.",-0.125000,0.350000,1,2019,4,14,2019-04-14,...,14.872470,44.732349,171,20.153503,-5.281033,0.252651,0.241471,0.196370,0.163743,0.362573
5,1117347290585243648,2019-04-14 08:42:00+00:00,"there is 35 gwh/yr “theoretical capacity”, but actual max output is ~2/3. it was physically impossible to make more model 3’s in q1 due to cell constraints.",-0.048611,0.362500,1,2019,4,14,2019-04-14,...,17.645462,63.787965,266,24.291165,-6.645703,0.196506,0.187777,0.163050,0.169173,0.402256
6,1117155252795346944,2019-04-13 19:59:00+00:00,"yes, all cars with autopilot 2.0 or higher, so ~400,000 of tesla cars built to date",0.250000,0.500000,1,2019,4,13,2019-04-13,...,117.019106,446.692991,1864,172.524221,-55.505115,0.207723,0.181679,0.192950,0.150215,0.362124
7,1117144865299501056,2019-04-13 19:17:00+00:00,incorrect. pana cell lines at giga are only at ~24gwh/yr & have been a constraint on model 3 output since july. no choice but to use other suppliers for powerwall/powerpack cells. tesla won’t spend money on more capacity until existing lines get closer to 35gwh theoretical.,0.031250,0.418750,1,2019,4,13,2019-04-13,...,8.536448,23.141468,96,10.807975,-2.271528,0.223476,0.192110,0.249556,0.145833,0.427083
8,1117118581865476096,2019-04-13 17:33:00+00:00,tesla will start fsd computer upgrade in a few months,-0.200000,0.100000,1,2019,4,13,2019-04-13,...,70.712773,272.836290,1192,106.249765,-35.536992,0.204729,0.183976,0.180289,0.156040,0.345638
9,1117116982778679297,2019-04-13 17:26:00+00:00,please note that the price of the tesla full self-driving option will increase substantially over time,0.350000,0.550000,0,2019,4,13,2019-04-13,...,68.523676,262.793582,1153,102.892242,-34.368566,0.205622,0.184157,0.184057,0.150911,0.343452


### Output the table contains all features

In [88]:
### Save cleaned features table to pickle file
with open('../../data/features/2019_05_16_all_features.pickle', 'wb') as file:
        pickle.dump(features, file, protocol=pickle.HIGHEST_PROTOCOL)