# Exploratory Data Analysis

In [70]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [66]:
data = pd.read_csv("../data/sp500_prices_2018-2021.csv")

### Ebay Stock

In [67]:
ebay = data[data['Ticker'] == 'EBAY']

In [68]:
ebay.head()

Unnamed: 0,Date,Ticker,Adj Close,Close,High,Low,Open,Volume
145,2018-01-02,EBAY,36.845531,38.060001,38.360001,37.919998,38.169998,6997300.0
641,2018-01-03,EBAY,37.968513,39.220001,39.279999,37.900002,37.990002,9134400.0
1137,2018-01-04,EBAY,37.339252,38.57,39.77,38.470001,39.419998,8958600.0
1633,2018-01-05,EBAY,38.423519,39.689999,39.84,38.810001,38.849998,7290400.0
2129,2018-01-08,EBAY,38.530006,39.799999,40.080002,39.439999,39.549999,9714200.0


#### Line Chart

In [69]:
fig = px.line(ebay, x = "Date", y = "Adj Close", title = "Historical Stock Price")
fig.show()

#### Candlestick Chart

In [71]:
fig = go.Figure(data = [go.Candlestick(x = ebay['Date'], 
                                       open = ebay['Open'],
                                      high = ebay['High'],
                                      low = ebay['Low'],
                                      close = ebay['Close'])])
fig.show()

#### Histogram

In [32]:
fig = px.histogram(ebay['Adj Close'], x = "Adj Close", title = "Histogram")
fig.show()

#### PDF

In [48]:
hist, bin_edges = np.histogram(ebay['Adj Close'], density = True, bins = 20)
bin_mid = (bin_edges[1:] + bin_edges[:-1]) / 2
px.bar(x = bin_mid, y = hist)

In [59]:
ebay['SMA_50'] = ebay['Adj Close'].rolling(window=50).mean()
ebay['SMA_200'] = ebay['Adj Close'].rolling(window=200).mean()
# fig = px.line(ebay, x = "Date", y = "Adj Close", title = "Historical Stock Price")
# fig.add_scatter(x = ebay["Date"], y = ebay["SMA_50"], mode = 'lines')
fig = px.line(ebay, x = "Date", y = ["Adj Close", "SMA_50", "SMA_200"])
fig.show()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [61]:
import math

class Welford():

    def __init__(self,a_list=None):
        self.n = 0
        self.M = 0
        self.S = 0

    def update(self,x):
        self.n += 1

        newM = self.M + (x - self.M) / self.n
        newS = self.S + (x - self.M) * (x - newM)

        self.M = newM
        self.S = newS

    @property
    def mean(self):
        return self.M

    @property
    def std(self):
        if self.n == 1:
            return 0
        return math.sqrt(self.S / (self.n - 1))



In [63]:
a_list = ebay["Adj Close"]
w = Welford()

running_mean = []
running_std = []

for x in a_list:
    w.update(x)
    running_mean.append(w.mean)
    running_std.append(w.std)

ebay["Welfords STD"] = running_std



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [64]:
fig = px.line(ebay, x = "Date", y = "Welfords STD")
fig.show()