# Stock analysis

One of the easiest way to see how the stock performaced in the past is to see how to increase compare to the index.

Not just look at the pandas dataframe or see how the stock increased in the past 1 year, we should visualize both performace in a line chart and see the performance.

We can use the same code in the Data Collection notebook to take the data for both stock and index ( [vnquant](https://github.com/phamdinhkhanh/vnquant)  and [investpy](https://investpy.readthedocs.io))

#### Data collection

In [30]:
#import needed libraries and packages
import pandas as pd
import vnquant.DataLoader as dl
import datetime
import investpy
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

In [31]:
#stock data
def load_stock_data(symbol):
    start = '2019-01-01'
    
    #you can fix the time frame by using timedelta function of datetime library
    now = datetime.datetime.now()
    end = now.strftime("%Y-%m-%d")
    loader = dl.DataLoader(symbol, start,end, data_source='VND', minimal=True)
    pricedata = loader.download()
    
    #format the data for the mplfinance
    stock = pricedata.copy()
    stock.reset_index(inplace = True)

    dailyInfo = pd.DataFrame( columns =['Date', 'Open', 'High', 'Low', 'Close', 'Volume'])
    dailyInfo['Date'] = stock['date']
    dailyInfo['Open'] = stock['open']
    dailyInfo['High'] = stock['high']
    dailyInfo['Low'] = stock['low']
    dailyInfo['Close'] = stock['close']
    dailyInfo['Volume'] = stock['volume']
    dailyInfo.set_index('Date', inplace= True)
    dailyInfo = dailyInfo.reset_index()
    
    
    #to store data remove the '#' on the following line
    #csv_file = close_data.to_csv(f'Data/ClosePrice/{symbol} historical since {start}', index=True)

    return dailyInfo

In [32]:
#index data
def load_index_data(x):
    end = datetime.datetime.now()
    index =investpy.get_index_historical_data(x,country='vietnam',from_date='01/01/2019',to_date=end.strftime('%d/%m/%Y'))
    index = pd.DataFrame(index)
    index = index.drop(columns =['Currency'])
    index = index.reset_index()
    return index

In [33]:
stock = load_stock_data('TCB')
index = load_index_data('VN')

2022-02-23 21:58:53,932 : INFO : data TCB from 2019-01-01 to 2022-02-23 have already cloned!


In [34]:
#Check the data and add
stock['pct_change'] = round(stock.Close.pct_change(),4)
stock['log_ret'] = round(np.log(stock.Close) - np.log(stock.Close.shift(1)),4)
stock['cum_returns'] = (stock['pct_change'] + 1).cumprod()
stock.dropna(inplace=True)
stock

Unnamed: 0,Date,Open,High,Low,Close,Volume,pct_change,log_ret,cum_returns
1,2019-01-03,25.70,25.75,25.00,25.00,1867580.0,-0.0234,-0.0237,0.976600
2,2019-01-04,25.00,25.00,23.25,24.95,3047830.0,-0.0020,-0.0020,0.974647
3,2019-01-07,25.15,25.50,25.10,25.10,1603430.0,0.0060,0.0060,0.980495
4,2019-01-08,25.15,25.20,25.05,25.10,843170.0,0.0000,0.0000,0.980495
5,2019-01-09,25.20,26.00,25.10,25.80,3499530.0,0.0279,0.0275,1.007850
...,...,...,...,...,...,...,...,...,...
779,2022-02-17,51.60,52.50,51.60,52.20,7990000.0,0.0116,0.0116,2.038820
780,2022-02-18,51.70,52.00,51.50,51.60,6622600.0,-0.0115,-0.0116,2.015374
781,2022-02-21,51.60,51.70,50.80,51.50,12203200.0,-0.0019,-0.0019,2.011544
782,2022-02-22,51.00,51.70,50.60,51.40,10832900.0,-0.0019,-0.0019,2.007722


In [35]:
#Check the data
index['pct_change'] = round(index.Close.pct_change(),4)
index['log_ret'] = round(np.log(index.Close) - np.log(index.Close.shift(1)), 4)
index['cum_returns'] = (index['pct_change'] + 1).cumprod()


index.dropna(inplace=True)
index

Unnamed: 0,Date,Open,High,Low,Close,Volume,pct_change,log_ret,cum_returns
1,2019-01-03,889.78,892.21,872.12,878.22,156575,-0.0152,-0.0153,0.984800
2,2019-01-04,872.69,881.48,861.85,880.90,136567,0.0031,0.0030,0.987853
3,2019-01-07,890.47,896.63,885.30,889.64,127549,0.0099,0.0099,0.997633
4,2019-01-08,891.37,891.87,883.67,887.44,112829,-0.0025,-0.0025,0.995139
5,2019-01-09,890.26,897.25,888.61,896.99,160508,0.0108,0.0107,1.005886
...,...,...,...,...,...,...,...,...,...
779,2022-02-17,1492.10,1507.99,1492.10,1507.99,615794,0.0106,0.0106,1.691858
780,2022-02-18,1507.99,1507.99,1492.38,1504.84,697546,-0.0021,-0.0021,1.688305
781,2022-02-21,1504.84,1516.63,1500.97,1510.84,791919,0.0040,0.0040,1.695058
782,2022-02-22,1510.84,1510.84,1484.81,1503.47,908054,-0.0049,-0.0049,1.686753


In [36]:
fig = px.line(stock, x='Date', y="pct_change")
fig.update_layout(title = 'Volatility',title_x=0.5)

fig.show()


In [37]:
df = pd.DataFrame()
df['Date'] = stock.Date
df['Stock'] = stock.cum_returns
df['Index'] = index.cum_returns
df.set_index('Date')

Unnamed: 0_level_0,Stock,Index
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-01-03,0.976600,0.984800
2019-01-04,0.974647,0.987853
2019-01-07,0.980495,0.997633
2019-01-08,0.980495,0.995139
2019-01-09,1.007850,1.005886
...,...,...
2022-02-17,2.038820,1.691858
2022-02-18,2.015374,1.688305
2022-02-21,2.011544,1.695058
2022-02-22,2.007722,1.686753


In [38]:
#Plot the performance of the stock vs the index in terms of performance (cumulative return).

fig = px.line(df, x="Date", y=df.columns,
              hover_data={"Date": "|%B %d, %Y"}
              )
fig.update_layout(title='Growth of $1 investment',title_x=0.5)
fig.update_xaxes(
    dtick="M1",
    tickformat="%b\n%Y",
    ticklabelmode="period"
    )
fig.show()

In [26]:
##Full code 
###Put those things in to a function

#import needed libraries and packages
import pandas as pd
import vnquant.DataLoader as dl
import datetime
import investpy
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

def load_stock_data(symbol):
    start = '2019-01-01'
    
    #you can fix the time frame by using timedelta function of datetime library
    now = datetime.datetime.now()
    end = now.strftime("%Y-%m-%d")
    loader = dl.DataLoader(symbol, start,end, data_source='VND', minimal=True)
    pricedata = loader.download()
    
    #format the data for the mplfinance
    stock = pricedata.copy()
    stock.reset_index(inplace = True)
    
    #reformat data for plotting
    dailyInfo = pd.DataFrame( columns =['Date', 'Open', 'High', 'Low', 'Close', 'Volume'])
    dailyInfo['Date'] = stock['date']
    dailyInfo['Open'] = stock['open']
    dailyInfo['High'] = stock['high']
    dailyInfo['Low'] = stock['low']
    dailyInfo['Close'] = stock['close']
    dailyInfo['Volume'] = stock['volume']
    dailyInfo.set_index('Date', inplace= True)
    dailyInfo = dailyInfo.reset_index()
    
    
    #to store data remove the '#' on the following line
    #csv_file = close_data.to_csv(f'Data/ClosePrice/{symbol} historical since {start}', index=True)

    return dailyInfo
#index data
def load_index_data(x):
    end = datetime.datetime.now()
    index =investpy.get_index_historical_data(x,country='vietnam',from_date='01/01/2019',to_date=end.strftime('%d/%m/%Y'))
    index = pd.DataFrame(index)
    index = index.drop(columns =['Currency'])
    index = index.reset_index()
    return index

def performance_vs_index(symbol, required_index):
    stock = load_stock_data(symbol)
    index = load_index_data(required_index)
    
    #Check the data and add
    stock['pct_change'] = round(stock.Close.pct_change(),4)
    stock['log_ret'] = round(np.log(stock.Close) - np.log(stock.Close.shift(1)),4)
    stock['cum_returns'] = (stock['pct_change'] + 1).cumprod()
    stock.dropna(inplace=True)
    
    #Check the data
    index['pct_change'] = round(index.Close.pct_change(),4)
    index['log_ret'] = round(np.log(index.Close) - np.log(index.Close.shift(1)), 4)
    index['cum_returns'] = (index['pct_change'] + 1).cumprod()
    index.dropna(inplace=True)
    
    #Create dataframe for plotting
    df = pd.DataFrame()
    df['Date'] = stock.Date
    df[symbol] = stock.cum_returns
    df[required_index] = index.cum_returns
    df.set_index('Date')
    
    #Plot the performance of the stock vs the index in terms of performance (cumulative return).

    fig = px.line(df, x="Date", y=df.columns,
                  hover_data={"Date": "|%B %d, %Y"}
                  )
    title= ('%s | Growth of $1 investment') % (symbol)
    fig.update_layout(title=title,title_x=0.5)
    fig.update_xaxes(
        dtick="M1",
        tickformat="%b\n%Y",
        ticklabelmode="period"
        )
    fig.show()
    
def performance_vs_other(symbol_1, symbol_2):
    stock1 = load_stock_data(symbol_1)
    stock2 = load_stock_data(symbol_2)
    
    #Check the data and add
    stock1['pct_change'] = round(stock1.Close.pct_change(),4)
    stock1['log_ret'] = round(np.log(stock1.Close) - np.log(stock1.Close.shift(1)),4)
    stock1['cum_returns'] = (stock1['pct_change'] + 1).cumprod()
    stock1.dropna(inplace=True)
    
    #Check the data
    stock2['pct_change'] = round(stock2.Close.pct_change(),4)
    stock2['log_ret'] = round(np.log(stock2.Close) - np.log(stock2.Close.shift(1)), 4)
    stock2['cum_returns'] = (stock2['pct_change'] + 1).cumprod()
    stock2.dropna(inplace=True)
    
    #Create dataframe for plotting
    df = pd.DataFrame()
    df['Date'] = stock1.Date
    df[symbol_1] = stock1.cum_returns
    df[symbol_2] = stock2.cum_returns
    df.set_index('Date')
    
    #Plot the performance of the stock vs the index in terms of performance (cumulative return).

    fig = px.line(df, x="Date", y=df.columns,
                  hover_data={"Date": "|%B %d, %Y"}
                  )
    title= ('%s vs %s | Growth of $1 investment') % (symbol_1,symbol_2)
    fig.update_layout(title=title,title_x=0.5)
    fig.update_xaxes(
        dtick="M1",
        tickformat="%b\n%Y",
        ticklabelmode="period"
        )
    fig.show()
    

In [27]:
performance_vs_index('HAH','VN')

2022-02-23 21:58:11,188 : INFO : data HAH from 2019-01-01 to 2022-02-23 have already cloned!


In [43]:
performance_vs_other('MSN','VHC')

2022-02-23 22:00:50,840 : INFO : data MSN from 2019-01-01 to 2022-02-23 have already cloned!
2022-02-23 22:00:51,286 : INFO : data VHC from 2019-01-01 to 2022-02-23 have already cloned!
