In [3]:
import pandas as pd
import numpy as np
import json
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import datetime
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import precision_score, recall_score, accuracy_score, mean_squared_error

# Convert price to 0/1, 0 represents decrease, 1 represents increase
Only need to run once

In [15]:
# Add indicator column to each stock price file
def add_indicator_column(ticker):
  data = pd.read_csv("./stock_price/"+ticker[1:]+".csv")
  data.loc[0, 'indicator'] = 0
  for i in range(1, len(data)):
    data.loc[i, 'indicator'] = 0 if data.loc[i-1, 'Close'] >= data.loc[i, 'Close'] else 1
  data.to_csv("./stock_price/compare_previous_day/"+ticker[1:]+".csv", index=False)

In [16]:
ALL_TICKERS = ["$TSLA", "$NVDA", "$INTC", "$PFE", "$SPGI", "$LRCX", "$TMUS", "$ADSK", "$VRTX", "$TWTR", "$EBAY", "$CARR", "$VRSN", "$GRMN", "$ANET", "$AAL"]
for ticker in ALL_TICKERS:
  add_indicator_column(ticker)

# Calculate 7-day MA
Only need to run once

In [17]:
def add_7day_ma(ticker):
  data = pd.read_csv("./stock_price/compare_previous_day/"+ticker[1:]+".csv")
  data.loc[0, '7dayMA'] = data.loc[0, 'Close']
  data.loc[1, '7dayMA'] = (data.loc[0, 'Close'] + data.loc[1, 'Close']) / 2
  data.loc[2, '7dayMA'] = (data.loc[0, 'Close'] + data.loc[1, 'Close'] + data.loc[2, 'Close']) / 3
  data.loc[3, '7dayMA'] = (data.loc[0, 'Close'] + data.loc[1, 'Close'] + data.loc[2, 'Close'] + data.loc[3, 'Close']) / 4
  data.loc[4, '7dayMA'] = (data.loc[0, 'Close'] + data.loc[1, 'Close'] + data.loc[2, 'Close'] + data.loc[3, 'Close'] + data.loc[4, 'Close']) / 5
  data.loc[5, '7dayMA'] = (data.loc[0, 'Close'] + data.loc[1, 'Close'] + data.loc[2, 'Close'] + data.loc[3, 'Close'] + data.loc[4, 'Close'] + data.loc[5, 'Close']) / 6
  for i in range(6, len(data)):
    data.loc[i, '7dayMA'] = (data.loc[i-1, '7dayMA'] * 6 + data.loc[i, 'Close']) / 7
  data.to_csv("./stock_price/with_7day_ma/"+ticker[1:]+".csv", index=False)

In [18]:
ALL_TICKERS = ["$TSLA", "$NVDA", "$INTC", "$PFE", "$SPGI", "$LRCX", "$TMUS", "$ADSK", "$VRTX", "$TWTR", "$EBAY", "$CARR", "$VRSN", "$GRMN", "$ANET", "$AAL"]
for ticker in ALL_TICKERS:
  add_7day_ma(ticker)

# For Long Period (3 years)

# Convert price to 0/1, 0 represents decrease, 1 represents increase
Only need to run once

In [10]:
# Add indicator column to each stock price file
def add_indicator_column_long_period(ticker):
  data = pd.read_csv("./stock_price/with_7day_ma_and7days_prediction/original/"+ticker[1:]+".csv")
  data.loc[0, 'indicator'] = 0
  for i in range(1, len(data)):
    data.loc[i, 'indicator'] = 0 if data.loc[i-1, 'Close'] >= data.loc[i, 'Close'] else 1
  data.to_csv("./stock_price/with_7day_ma_and7days_prediction/compare_previous_day/"+ticker[1:]+".csv", index=False)

In [11]:
ALL_TICKERS = ["$TSLA", "$INTC", "$PFE", "$SPGI", "$ADSK", "$VRTX", "$TWTR", "$EBAY", "$GRMN", "$ANET", "$AAL"]
for ticker in ALL_TICKERS:
  add_indicator_column_long_period(ticker)

$TSLA
$INTC
$PFE
$SPGI
$ADSK
$VRTX
$TWTR
$EBAY
$GRMN
$ANET
$AAL


# Calculate 7-day MA
Only need to run once

In [12]:
def add_7day_ma_long_period(ticker):
  data = pd.read_csv("./stock_price/with_7day_ma_and7days_prediction/compare_previous_day/"+ticker[1:]+".csv")
  data.loc[0, '7dayMA'] = data.loc[0, 'Close']
  data.loc[1, '7dayMA'] = (data.loc[0, 'Close'] + data.loc[1, 'Close']) / 2
  data.loc[2, '7dayMA'] = (data.loc[0, 'Close'] + data.loc[1, 'Close'] + data.loc[2, 'Close']) / 3
  data.loc[3, '7dayMA'] = (data.loc[0, 'Close'] + data.loc[1, 'Close'] + data.loc[2, 'Close'] + data.loc[3, 'Close']) / 4
  data.loc[4, '7dayMA'] = (data.loc[0, 'Close'] + data.loc[1, 'Close'] + data.loc[2, 'Close'] + data.loc[3, 'Close'] + data.loc[4, 'Close']) / 5
  data.loc[5, '7dayMA'] = (data.loc[0, 'Close'] + data.loc[1, 'Close'] + data.loc[2, 'Close'] + data.loc[3, 'Close'] + data.loc[4, 'Close'] + data.loc[5, 'Close']) / 6
  for i in range(6, len(data)):
    data.loc[i, '7dayMA'] = (data.loc[i-1, '7dayMA'] * 6 + data.loc[i, 'Close']) / 7
  data.to_csv("./stock_price/with_7day_ma_and7days_prediction/with_7day_ma/"+ticker[1:]+".csv", index=False)

In [13]:
ALL_TICKERS = ["$TSLA", "$INTC", "$PFE", "$SPGI", "$ADSK", "$VRTX", "$TWTR", "$EBAY", "$GRMN", "$ANET", "$AAL"]
for ticker in ALL_TICKERS:
  add_7day_ma_long_period(ticker)

# 2DayAfter

In [8]:
# Add indicator column to each stock price file
def add_indicator2day_column_long_period(ticker):
  data = pd.read_csv("./stock_price/with_7day_ma_and7days_prediction/original/"+ticker[1:]+".csv")
  data.loc[0, 'indicator'] = 0
  data.loc[1, 'indicator'] = 0
  for i in range(2, len(data)):
    data.loc[i, 'indicator'] = 0 if data.loc[i-2, 'Close'] >= data.loc[i, 'Close'] else 1
  data.to_csv("./stock_price/with_7day_ma_and7days_prediction/compare_previous_day_2/"+ticker[1:]+".csv", index=False)

In [9]:
ALL_TICKERS = ["$TSLA", "$INTC", "$PFE", "$SPGI", "$ADSK", "$VRTX", "$TWTR", "$EBAY", "$GRMN", "$ANET", "$AAL"]
for ticker in ALL_TICKERS:
  add_indicator2day_column_long_period(ticker)

# 3DayAfter

In [10]:
# Add indicator column to each stock price file
def add_indicator3day_column_long_period(ticker):
  data = pd.read_csv("./stock_price/with_7day_ma_and7days_prediction/original/"+ticker[1:]+".csv")
  data.loc[0, 'indicator'] = 0
  data.loc[1, 'indicator'] = 0
  data.loc[2, 'indicator'] = 0
  for i in range(3, len(data)):
    data.loc[i, 'indicator'] = 0 if data.loc[i-3, 'Close'] >= data.loc[i, 'Close'] else 1
  data.to_csv("./stock_price/with_7day_ma_and7days_prediction/compare_previous_day_3/"+ticker[1:]+".csv", index=False)

In [11]:
ALL_TICKERS = ["$TSLA", "$INTC", "$PFE", "$SPGI", "$ADSK", "$VRTX", "$TWTR", "$EBAY", "$GRMN", "$ANET", "$AAL"]
for ticker in ALL_TICKERS:
  add_indicator3day_column_long_period(ticker)

# 4DayAfter

In [12]:
# Add indicator column to each stock price file
def add_indicator4day_column_long_period(ticker):
  data = pd.read_csv("./stock_price/with_7day_ma_and7days_prediction/original/"+ticker[1:]+".csv")
  data.loc[0, 'indicator'] = 0
  data.loc[1, 'indicator'] = 0
  data.loc[2, 'indicator'] = 0
  data.loc[3, 'indicator'] = 0
  for i in range(4, len(data)):
    data.loc[i, 'indicator'] = 0 if data.loc[i-4, 'Close'] >= data.loc[i, 'Close'] else 1
  data.to_csv("./stock_price/with_7day_ma_and7days_prediction/compare_previous_day_4/"+ticker[1:]+".csv", index=False)

In [13]:
ALL_TICKERS = ["$TSLA", "$INTC", "$PFE", "$SPGI", "$ADSK", "$VRTX", "$TWTR", "$EBAY", "$GRMN", "$ANET", "$AAL"]
for ticker in ALL_TICKERS:
  add_indicator4day_column_long_period(ticker)

# 5DayAfter

In [14]:
# Add indicator column to each stock price file
def add_indicator5day_column_long_period(ticker):
  data = pd.read_csv("./stock_price/with_7day_ma_and7days_prediction/original/"+ticker[1:]+".csv")
  data.loc[0, 'indicator'] = 0
  data.loc[1, 'indicator'] = 0
  data.loc[2, 'indicator'] = 0
  data.loc[3, 'indicator'] = 0
  data.loc[4, 'indicator'] = 0
  for i in range(5, len(data)):
    data.loc[i, 'indicator'] = 0 if data.loc[i-5, 'Close'] >= data.loc[i, 'Close'] else 1
  data.to_csv("./stock_price/with_7day_ma_and7days_prediction/compare_previous_day_5/"+ticker[1:]+".csv", index=False)

In [15]:
ALL_TICKERS = ["$TSLA", "$INTC", "$PFE", "$SPGI", "$ADSK", "$VRTX", "$TWTR", "$EBAY", "$GRMN", "$ANET", "$AAL"]
for ticker in ALL_TICKERS:
  add_indicator5day_column_long_period(ticker)

# 6DayAfter

In [16]:
# Add indicator column to each stock price file
def add_indicator6day_column_long_period(ticker):
  data = pd.read_csv("./stock_price/with_7day_ma_and7days_prediction/original/"+ticker[1:]+".csv")
  data.loc[0, 'indicator'] = 0
  data.loc[1, 'indicator'] = 0
  data.loc[2, 'indicator'] = 0
  data.loc[3, 'indicator'] = 0
  data.loc[4, 'indicator'] = 0
  data.loc[5, 'indicator'] = 0
  for i in range(6, len(data)):
    data.loc[i, 'indicator'] = 0 if data.loc[i-6, 'Close'] >= data.loc[i, 'Close'] else 1
  data.to_csv("./stock_price/with_7day_ma_and7days_prediction/compare_previous_day_6/"+ticker[1:]+".csv", index=False)

In [17]:
ALL_TICKERS = ["$TSLA", "$INTC", "$PFE", "$SPGI", "$ADSK", "$VRTX", "$TWTR", "$EBAY", "$GRMN", "$ANET", "$AAL"]
for ticker in ALL_TICKERS:
  add_indicator6day_column_long_period(ticker)

# 7DayAfter

In [18]:
# Add indicator column to each stock price file
def add_indicator7day_column_long_period(ticker):
  data = pd.read_csv("./stock_price/with_7day_ma_and7days_prediction/original/"+ticker[1:]+".csv")
  data.loc[0, 'indicator'] = 0
  data.loc[1, 'indicator'] = 0
  data.loc[2, 'indicator'] = 0
  data.loc[3, 'indicator'] = 0
  data.loc[4, 'indicator'] = 0
  data.loc[5, 'indicator'] = 0
  data.loc[6, 'indicator'] = 0
  for i in range(7, len(data)):
    data.loc[i, 'indicator'] = 0 if data.loc[i-7, 'Close'] >= data.loc[i, 'Close'] else 1
  data.to_csv("./stock_price/with_7day_ma_and7days_prediction/compare_previous_day_7/"+ticker[1:]+".csv", index=False)

In [19]:
ALL_TICKERS = ["$TSLA", "$INTC", "$PFE", "$SPGI", "$ADSK", "$VRTX", "$TWTR", "$EBAY", "$GRMN", "$ANET", "$AAL"]
for ticker in ALL_TICKERS:
  add_indicator7day_column_long_period(ticker)