In [1]:
import pandas as pd
import numpy as np
import json
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import precision_score, recall_score, accuracy_score, mean_squared_error

# For each ticker (company), store all news into a dataframe

In [2]:
# Return one season's financial news dataframe
def season_dataframe(ticker, season):
    with open('./financial_news/'+ticker+str(season), 'r') as ticker_news:
        content  = ticker_news.read()
    records = json.loads(content)
    df = pd.DataFrame(records)
    return df

In [3]:
# Create dataframe for each ticker, storing its news
ALL_TICKERS = ["$TSLA", "$INTC", "$PFE", "$SPGI", "$ADSK", "$VRTX", "$TWTR", "$EBAY", "$GRMN", "$AAL", "$ANET"]
df = []
for ticker in ALL_TICKERS:
  # Concatenate 8 seasons into one dataframe
  df_temp = pd.DataFrame()
  for i in range(1, 9):
      new_df = season_dataframe(ticker, i)
      df_temp = pd.concat([df_temp, new_df], ignore_index=True)
  df.append(df_temp)

Usage: df[0] will return $TSLA news from 2019-01-01 to 2020-12-31

# Calculate Polarity of each news, using nltk.sentiment.vader package

In [4]:
# Calculate vader
vader = SentimentIntensityAnalyzer()

# Helper function, which calculates the sentiment and returns compund score
def cal_compound(t):
    return vader.polarity_scores(t)["compound"]

In [5]:
for each_df in df:
  each_df['title_compound'] = each_df['title'].apply(cal_compound)
  each_df['text_compound'] = each_df['text'].apply(cal_compound)

Convert publishedDate to YYYY-MM-DD

In [6]:
# Helper function: for the date conversion
def remove_time(publish_date):
  return publish_date[0:10]

In [7]:
for each_df in df:
  each_df['publishedDate'] = each_df['publishedDate'].apply(remove_time)

Convert weekend to next Monday

In [8]:
# Helper function: Convert weekend to next Monday
def moveWeekend(publish_date):
  d = datetime.datetime(int(publish_date[0:4]), int(publish_date[5:7]), int(publish_date[8:10]))
  if (d.weekday() == 5):
    return str(d+datetime.timedelta(days=2))[0:10]
  elif (d.weekday() == 6):
    return str(d+datetime.timedelta(days=1))[0:10]
  else:
    return publish_date

In [9]:
for each_df in df:
  each_df['publishedDate'] = each_df['publishedDate'].apply(moveWeekend)

In [10]:
df[0]

Unnamed: 0,symbol,publishedDate,title,image,site,text,url,title_compound,text_compound
0,TSLA,2020-12-31,Tesla to deliver China-made Model Y SUVs this ...,https://cdn.snapi.dev/images/v1/5/m/m02d202101...,Reuters,Tesla Inc said on Friday it has started sellin...,https://www.reuters.com/article/us-tesla-china...,0.0000,0.1027
1,TSLA,2020-12-31,2020: Several Chinese Stocks Outperformed Thei...,https://cdn.snapi.dev/images/v1/f/j/catalog-ma...,Seeking Alpha,2020: Several Chinese Stocks Outperformed Thei...,https://seekingalpha.com/article/4396892-2020-...,0.0000,0.0000
2,TSLA,2020-12-31,EV Company News For The Month Of December 2020,https://cdn.snapi.dev/images/v1/l/r/sssik22-c5...,Seeking Alpha,Global electric car sales records for November...,https://seekingalpha.com/article/4396884-ev-co...,0.0000,0.8402
3,TSLA,2020-12-31,"Tesla, Volkswagen, Renault See Strong Share In...",https://cdn.snapi.dev/images/v1/v/x/s3xy-14.jpg,Benzinga,The European market continues to see strong ad...,https://www.benzinga.com/news/20/12/18973120/t...,0.6705,0.5106
4,TSLA,2020-12-31,Tech's top seven companies added $3.4 trillion...,https://cdn.snapi.dev/images/v1/s/t/stocks23-1...,CNBC,"Big Tech got much bigger in 2020, and Tesla jo...",https://www.cnbc.com/2020/12/31/techs-top-seve...,0.4939,0.0000
...,...,...,...,...,...,...,...,...,...
5817,TSLA,2019-01-18,Behind Elon Musk's Hiring and Firing Spree,https://cdn.snapi.dev/images/v1/v/i/viwtqd6wkp...,Bloomberg Technology,Elon Musk is cutting Tesla Inc.'s workforce by...,https://www.youtube.com/watch?v=ViwTqD6WKpA,-0.3400,-0.4404
5818,TSLA,2019-01-17,Tesla to cut full time workforce by roughly 7%...,https://cdn.snapi.dev/images/v1/t/8/t8cnbozbfx...,CNBC Television,The Wall Street Journal is reporting that Tesl...,https://www.youtube.com/watch?v=t8CNBOZBFXc,-0.2732,0.0258
5819,TSLA,2019-01-07,Cramer: New Tesla factory in Shanghai will wor...,https://cdn.snapi.dev/images/v1/w/g/wgegqwogy8...,CNBC Television,CNBC's Jim Cramer discusses his take on the la...,https://www.youtube.com/watch?v=WGEgQWogY8E,0.0000,0.0000
5820,TSLA,2019-01-03,Tesla stock drops over missed delivery estimat...,https://cdn.snapi.dev/images/v1/b/e/bebskx74-2...,Fox Business,“Bulls & Bears” panel discusses how Tesla shar...,https://www.youtube.com/watch?v=Bebskx74-2Y,-0.5267,-0.1280


# Calculate title and text's daily polarity mean respectively

In [11]:
polarity = []
# Calcuate mean on daily basis
for each_df in df:
  each_title_mean = each_df.groupby('publishedDate', as_index=False)['title_compound'].mean()
  each_text_mean = each_df.groupby('publishedDate', as_index=False)['text_compound'].mean()
  each_polarity = pd.merge(each_title_mean, each_text_mean, on='publishedDate')
  each_polarity['symbol'] = each_df['symbol']
  each_polarity['title'] = each_df['title']
  each_polarity['text'] = each_df['text']
  polarity.append(each_polarity)

In [12]:
polarity[0]

Unnamed: 0,publishedDate,title_compound,text_compound,symbol,title,text
0,2019-01-01,-0.226300,-0.296000,TSLA,Tesla to deliver China-made Model Y SUVs this ...,Tesla Inc said on Friday it has started sellin...
1,2019-01-03,-0.526700,-0.128000,TSLA,2020: Several Chinese Stocks Outperformed Thei...,2020: Several Chinese Stocks Outperformed Thei...
2,2019-01-07,0.000000,0.000000,TSLA,EV Company News For The Month Of December 2020,Global electric car sales records for November...
3,2019-01-17,-0.273200,0.025800,TSLA,"Tesla, Volkswagen, Renault See Strong Share In...",The European market continues to see strong ad...
4,2019-01-18,-0.340000,-0.440400,TSLA,Tech's top seven companies added $3.4 trillion...,"Big Tech got much bigger in 2020, and Tesla jo..."
...,...,...,...,...,...,...
429,2020-12-25,0.148000,0.509500,TSLA,Tesla to raise another $5bn by selling shares,Electric carmaker to join blue-chip S&P 500 in...
430,2020-12-28,0.081800,0.378638,TSLA,"LIVE | Apple, Pfizer, Tesla: Jim Cramer's Stoc...","In Tuesday's market breakdown, Jim Cramer talk..."
431,2020-12-29,0.209004,0.202939,TSLA,"Tesla Stock Price Receives $2,500 3-Year Targe...",Shares of Tesla Inc (NASDAQ: TSLA) have receiv...
432,2020-12-30,0.209600,0.323291,TSLA,Tesla files to sell $5B in stock while its sha...,Tesla is striking while its share price — and ...


# Merge everyday's closing price with polarity dataframe, but show everyday's closing price

In [13]:
# Merge everyday's closing price with polarity dataframe
index = 0
for ticker in ALL_TICKERS:
  data = pd.read_csv("./stock_price/compare_previous_day/"+ticker[1:]+".csv")
  data = data[['Date', 'Close']]
  data['publishedDate'] = data['Date']
  data = data[['publishedDate', 'Close']]
  polarity[index] = pd.merge(data, polarity[index], on = 'publishedDate', how = 'left').fillna(0)
  index+=1


In [14]:
# Fill symbol column for each company
for index, each_polarity in enumerate(polarity):
  each_polarity['symbol'] = ALL_TICKERS[index][1:]

In [15]:
polarity[0]

Unnamed: 0,publishedDate,Close,title_compound,text_compound,symbol,title,text
0,2018-12-31,66.559998,0.000000,0.000000,TSLA,0,0
1,2019-01-02,62.023998,0.000000,0.000000,TSLA,0,0
2,2019-01-03,60.071999,-0.526700,-0.128000,TSLA,2020: Several Chinese Stocks Outperformed Thei...,2020: Several Chinese Stocks Outperformed Thei...
3,2019-01-04,63.537998,0.000000,0.000000,TSLA,0,0
4,2019-01-07,66.991997,0.000000,0.000000,TSLA,EV Company News For The Month Of December 2020,Global electric car sales records for November...
...,...,...,...,...,...,...,...
501,2020-12-24,661.770020,0.158990,0.276630,TSLA,Cramer on Tesla's plans to sell $5 billion in ...,Tesla stock was lower Tuesday morning followin...
502,2020-12-28,663.690002,0.081800,0.378638,TSLA,"LIVE | Apple, Pfizer, Tesla: Jim Cramer's Stoc...","In Tuesday's market breakdown, Jim Cramer talk..."
503,2020-12-29,665.989990,0.209004,0.202939,TSLA,"Tesla Stock Price Receives $2,500 3-Year Targe...",Shares of Tesla Inc (NASDAQ: TSLA) have receiv...
504,2020-12-30,694.780029,0.209600,0.323291,TSLA,Tesla files to sell $5B in stock while its sha...,Tesla is striking while its share price — and ...


# Apply MinMax scaler to Close price

In [16]:
from sklearn.preprocessing import MinMaxScaler

In [17]:
for each_polarity in polarity:
  close = each_polarity['Close']
  scaler = MinMaxScaler(feature_range=(0,1))
  close = scaler.fit_transform(np.array(close).reshape(-1,1))
  each_polarity['Close_scaler'] = close

In [18]:
polarity[0]

Unnamed: 0,publishedDate,Close,title_compound,text_compound,symbol,title,text,Close_scaler
0,2018-12-31,66.559998,0.000000,0.000000,TSLA,0,0,0.045928
1,2019-01-02,62.023998,0.000000,0.000000,TSLA,0,0,0.039157
2,2019-01-03,60.071999,-0.526700,-0.128000,TSLA,2020: Several Chinese Stocks Outperformed Thei...,2020: Several Chinese Stocks Outperformed Thei...,0.036243
3,2019-01-04,63.537998,0.000000,0.000000,TSLA,0,0,0.041417
4,2019-01-07,66.991997,0.000000,0.000000,TSLA,EV Company News For The Month Of December 2020,Global electric car sales records for November...,0.046573
...,...,...,...,...,...,...,...,...
501,2020-12-24,661.770020,0.158990,0.276630,TSLA,Cramer on Tesla's plans to sell $5 billion in ...,Tesla stock was lower Tuesday morning followin...,0.934466
502,2020-12-28,663.690002,0.081800,0.378638,TSLA,"LIVE | Apple, Pfizer, Tesla: Jim Cramer's Stoc...","In Tuesday's market breakdown, Jim Cramer talk...",0.937332
503,2020-12-29,665.989990,0.209004,0.202939,TSLA,"Tesla Stock Price Receives $2,500 3-Year Targe...",Shares of Tesla Inc (NASDAQ: TSLA) have receiv...,0.940765
504,2020-12-30,694.780029,0.209600,0.323291,TSLA,Tesla files to sell $5B in stock while its sha...,Tesla is striking while its share price — and ...,0.983743


# Merge everyday's indicator with poliarity dataframe

In [19]:
for each_polarity in polarity:
  data = pd.read_csv("./stock_price/compare_previous_day/"+each_polarity.symbol.loc[0]+".csv")
  each_polarity['indicator'] = data['indicator']

In [20]:
polarity[0]

Unnamed: 0,publishedDate,Close,title_compound,text_compound,symbol,title,text,Close_scaler,indicator
0,2018-12-31,66.559998,0.000000,0.000000,TSLA,0,0,0.045928,0.0
1,2019-01-02,62.023998,0.000000,0.000000,TSLA,0,0,0.039157,0.0
2,2019-01-03,60.071999,-0.526700,-0.128000,TSLA,2020: Several Chinese Stocks Outperformed Thei...,2020: Several Chinese Stocks Outperformed Thei...,0.036243,0.0
3,2019-01-04,63.537998,0.000000,0.000000,TSLA,0,0,0.041417,1.0
4,2019-01-07,66.991997,0.000000,0.000000,TSLA,EV Company News For The Month Of December 2020,Global electric car sales records for November...,0.046573,1.0
...,...,...,...,...,...,...,...,...,...
501,2020-12-24,661.770020,0.158990,0.276630,TSLA,Cramer on Tesla's plans to sell $5 billion in ...,Tesla stock was lower Tuesday morning followin...,0.934466,1.0
502,2020-12-28,663.690002,0.081800,0.378638,TSLA,"LIVE | Apple, Pfizer, Tesla: Jim Cramer's Stoc...","In Tuesday's market breakdown, Jim Cramer talk...",0.937332,1.0
503,2020-12-29,665.989990,0.209004,0.202939,TSLA,"Tesla Stock Price Receives $2,500 3-Year Targe...",Shares of Tesla Inc (NASDAQ: TSLA) have receiv...,0.940765,1.0
504,2020-12-30,694.780029,0.209600,0.323291,TSLA,Tesla files to sell $5B in stock while its sha...,Tesla is striking while its share price — and ...,0.983743,1.0


# Add previous 7 day's scaler to each row

In [21]:
for each_polarity in polarity:
  # Add scaler to first row
  for i in range(0, 6):
    name="scaler-"+str(i+1)
    each_polarity.loc[0, name] = each_polarity.loc[0, 'Close_scaler']
  # Add scaler to the remaining rows
  for i in range(1, len(each_polarity)):
    for s in range(0, 6):
      name="scaler-"+str(s+1)
      if (i-(s+1) >= 0):
        each_polarity.loc[i, name] = each_polarity.loc[i-(s+1), 'Close_scaler']
      else:
        each_polarity.loc[i, name] = each_polarity.loc[i-1, 'Close_scaler']

In [22]:
polarity[0]

Unnamed: 0,publishedDate,Close,title_compound,text_compound,symbol,title,text,Close_scaler,indicator,scaler-1,scaler-2,scaler-3,scaler-4,scaler-5,scaler-6
0,2018-12-31,66.559998,0.000000,0.000000,TSLA,0,0,0.045928,0.0,0.045928,0.045928,0.045928,0.045928,0.045928,0.045928
1,2019-01-02,62.023998,0.000000,0.000000,TSLA,0,0,0.039157,0.0,0.045928,0.045928,0.045928,0.045928,0.045928,0.045928
2,2019-01-03,60.071999,-0.526700,-0.128000,TSLA,2020: Several Chinese Stocks Outperformed Thei...,2020: Several Chinese Stocks Outperformed Thei...,0.036243,0.0,0.039157,0.045928,0.039157,0.039157,0.039157,0.039157
3,2019-01-04,63.537998,0.000000,0.000000,TSLA,0,0,0.041417,1.0,0.036243,0.039157,0.045928,0.036243,0.036243,0.036243
4,2019-01-07,66.991997,0.000000,0.000000,TSLA,EV Company News For The Month Of December 2020,Global electric car sales records for November...,0.046573,1.0,0.041417,0.036243,0.039157,0.045928,0.041417,0.041417
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,2020-12-24,661.770020,0.158990,0.276630,TSLA,Cramer on Tesla's plans to sell $5 billion in ...,Tesla stock was lower Tuesday morning followin...,0.934466,1.0,0.910894,0.902475,0.916686,0.984072,0.925703,0.876246
502,2020-12-28,663.690002,0.081800,0.378638,TSLA,"LIVE | Apple, Pfizer, Tesla: Jim Cramer's Stoc...","In Tuesday's market breakdown, Jim Cramer talk...",0.937332,1.0,0.934466,0.910894,0.902475,0.916686,0.984072,0.925703
503,2020-12-29,665.989990,0.209004,0.202939,TSLA,"Tesla Stock Price Receives $2,500 3-Year Targe...",Shares of Tesla Inc (NASDAQ: TSLA) have receiv...,0.940765,1.0,0.937332,0.934466,0.910894,0.902475,0.916686,0.984072
504,2020-12-30,694.780029,0.209600,0.323291,TSLA,Tesla files to sell $5B in stock while its sha...,Tesla is striking while its share price — and ...,0.983743,1.0,0.940765,0.937332,0.934466,0.910894,0.902475,0.916686


# Add S&P 500 Index

In [23]:
sp_df = pd.read_csv("./stock_price/original/S&P500.csv")
def reorder_date(date):
  return date[6:10]+'-'+date[0:5]
sp_df['publishedDate'] = sp_df['Date'].apply(reorder_date)
sp_df = sp_df[['publishedDate', 'Close']]
sp_df = sp_df.iloc[::-1]
sp_df = sp_df.rename(columns= {'Close': "s&p500_close_price"}, inplace=False)
# Convert str to float
sp_df['s&p500_close_price'] = pd.to_numeric(sp_df['s&p500_close_price'], downcast="float")
sp_df


Unnamed: 0,publishedDate,s&p500_close_price
755,2018-01-02,2695.810059
754,2018-01-03,2713.060059
753,2018-01-04,2723.989990
752,2018-01-05,2743.149902
751,2018-01-08,2747.709961
...,...,...
4,2020-12-24,3703.060059
3,2020-12-28,3735.360107
2,2020-12-29,3727.040039
1,2020-12-30,3732.040039


In [24]:
# Merge with polarity dataframe
for index, ep in enumerate(polarity):
  polarity[index] = pd.merge(ep, sp_df, on="publishedDate")

In [25]:
# a = polarity[1]
# a.drop(columns=['scaler-7', 'scaler-6']).tail(50)

In [26]:
polarity[1].tail(50)

Unnamed: 0,publishedDate,Close,title_compound,text_compound,symbol,title,text,Close_scaler,indicator,scaler-1,scaler-2,scaler-3,scaler-4,scaler-5,scaler-6,s&p500_close_price
456,2020-10-21,53.5,0.179068,0.232605,INTC,Intel nears deal to sell NAND unit to South Ko...,,0.401439,1.0,0.398641,0.444622,0.427829,0.415434,0.403439,0.414634,3435.560059
457,2020-10-22,53.900002,0.026352,0.14519,INTC,Intel Sells Its NAND Flash Business To SK Hynix,Intel is selling its NAND flash business to SK...,0.417433,1.0,0.401439,0.398641,0.444622,0.427829,0.415434,0.403439,3453.48999
458,2020-10-23,48.200001,-0.015557,-0.015129,INTC,Intel near deal to sell storage unit to SK Hyn...,Intel Corp. is nearing a deal to sell its Nand...,0.189524,0.0,0.417433,0.401439,0.398641,0.444622,0.427829,0.415434,3465.389893
459,2020-10-26,46.720001,-0.16588,0.25834,INTC,Intel Reported to be In Talks to Sell a Chip B...,RBC Capital Markets analyst Mitch Steves wrote...,0.130348,0.0,0.189524,0.417433,0.401439,0.398641,0.444622,0.427829,3400.969971
460,2020-10-27,45.639999,-0.1865,0.204133,INTC,SK hynix could buy Intel NAND business – Block...,"The WSJ is reporting Korean DRAM, NAND and SSD...",0.087165,0.0,0.130348,0.189524,0.417433,0.401439,0.398641,0.444622,3390.679932
461,2020-10-28,44.25,-0.01015,0.145175,INTC,Intel and partners announce high-performance S...,Intel has been a leader in Ethernet networking...,0.031587,0.0,0.087165,0.130348,0.189524,0.417433,0.401439,0.398641,3271.030029
462,2020-10-29,44.110001,0.210612,0.325688,INTC,Intel Nears Deal to Sell NAND Memory Unit to S...,,0.02599,0.0,0.031587,0.087165,0.130348,0.189524,0.417433,0.401439,3310.110107
463,2020-10-30,44.279999,0.4215,0.0,INTC,Intel : Nears Deal to Sell NAND Memory Unit to...,,0.032787,1.0,0.02599,0.031587,0.087165,0.130348,0.189524,0.417433,3269.959961
464,2020-11-02,44.459999,0.02254,0.12384,INTC,Intel's stock spikes into the green after WSJ ...,Shares of Intel Corp. undefined swung higher i...,0.039984,1.0,0.032787,0.02599,0.031587,0.087165,0.130348,0.189524,3310.23999
465,2020-11-03,44.849998,0.0,0.0,INTC,WSJ News Exclusive | Intel Nears Deal to Sell ...,The U.S. semiconductor giant is nearing a deal...,0.055578,1.0,0.039984,0.032787,0.02599,0.031587,0.087165,0.130348,3369.159912


# Predict the trend using Passive Aggressive Classifier model

In [27]:
from sklearn.neural_network import MLPClassifier
from sklearn.utils.testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier




In [28]:
@ignore_warnings(category=ConvergenceWarning)
def predict(polarity):
  avg_accuracy = 0
  for each_polarity in polarity:
    y = each_polarity['indicator']
    X = each_polarity.drop(columns=['indicator', 'publishedDate', 'symbol', 'title', 'text'])
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0, shuffle=False)
    scaler = StandardScaler()
    scaler.fit(X_train)
    X_train = scaler.transform(X_train)
    X_test = scaler.transform(X_test)

    # apply passive aggressive classifier
    out_date = each_polarity.publishedDate.values
    nn = PassiveAggressiveClassifier()

    nn.fit(X_train, y_train)
    #rf.fit(X_train, y_train)
    y_pred = nn.predict(X_test)
    print(accuracy_score(y_test, y_pred))
    avg_accuracy+=accuracy_score(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    f = open(each_polarity.symbol.iloc[0]+".summaryRandomForest.csv", "w")
    f.write("accuracy\n")
    f.write("{:.2f}\n".format(accuracy))
    f.close()

    f = open(each_polarity.symbol.iloc[0]+".outputRandomForest.csv", "w")
    f.write("date,predicted_indicator\n")
    dd = 338
    for i in range(y_pred.shape[0]):
      f.write("{},{}\n".format(out_date[dd], y_pred[i]))
      dd+=1
    f.close()
  print("AVERAGE ACCURACY: "+str(avg_accuracy/11))

In [29]:
# # split train and test data in consecutive period
# @ignore_warnings(category=ConvergenceWarning)
# def predict_test(polarity):
#   avg_accuracy = 0
#   for each_polarity in polarity:
#     y = each_polarity['indicator']
#     X = each_polarity.drop(columns=['indicator', 'symbol', 'title', 'text'])
#     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=0, shuffle=False)
#     # print(len(X_test))
#     # print(len(X_train))
#     print(X_test)
#     # scaler = StandardScaler()
#     # scaler.fit(X_train)
#     # X_train = scaler.transform(X_train)
#     # X_test = scaler.transform(X_test)

#     # # apply Random Forest Classifier
#     # """ out_date = each_polarity.publishedDate.values
#     # nn = RandomForestClassifier(n_estimators=200,criterion='entropy') """

#     # # apply passive aggressive classifier
#     # out_date = each_polarity.publishedDate.values
#     # nn = PassiveAggressiveClassifier()

#     # nn.fit(X_train, y_train)
#     # #rf.fit(X_train, y_train)
#     # y_pred = nn.predict(X_test)
#     # print(accuracy_score(y_test, y_pred))
#     # avg_accuracy+=accuracy_score(y_test, y_pred)
#     # accuracy = accuracy_score(y_test, y_pred)
#     # f = open(each_polarity.symbol.iloc[0]+".summaryRandomForest.csv", "w")
#     # f.write("accuracy\n")
#     # f.write("{:.2f}\n".format(accuracy))
#     # f.close()

#     # f = open(each_polarity.symbol.iloc[0]+".outputRandomForest.csv", "w")
#     # f.write("date,predicted_indicator\n")
#     # for i in range(y_pred.shape[0]):
#     #   f.write("{},{}\n".format(out_date[i], y_pred[i]))
#     # f.close()
#     break
#   print("AVERAGE ACCURACY: "+str(avg_accuracy/11))

In [30]:
# len(polarity)

In [31]:
# """ i = 0
# for each_polarity in polarity:
#     each_polarity = each_polarity.drop('title', axis=1)
#     each_polarity = each_polarity.drop('text', axis=1)
#     polarity[i] = each_polarity
#     i+=1 """

# i = 0
# for each_polarity in polarity:
#     each_polarity.fillna(' ')
#     polarity[i] = each_polarity
#     i+=1

In [32]:
# i=0
# for each_polarity in polarity:
#     each_polarity = each_polarity.drop('scaler-7', axis=1)
#     polarity[i] = each_polarity
#     i+=1

In [33]:
# polarity[0]

In [34]:
# polarity[1].indicator.to_csv('testesttest.txt', index=False)


In [35]:
# predict_test(polarity)

In [36]:
predict(polarity)

0.6766467065868264
0.8802395209580839
0.8802395209580839
0.7365269461077845
0.7904191616766467
0.844311377245509
0.9101796407185628
0.8203592814371258
0.8502994011976048
0.5449101796407185
0.9461077844311377
AVERAGE ACCURACY: 0.8072945019052805
