In [9]:
# Import standard libraries
import pandas as pd
import talib as ta

#Import custom module
import sys
import os
sys.path.insert(0,os.path.abspath("../scripts"))
import finance_analysis as fa
import importlib
importlib.reload(fa)

<module 'finance_analysis' from 'c:\\Users\\HP\\Desktop\\TenX\\financial-news-dataset\\scripts\\finance_analysis.py'>

In [10]:
# Load the dataset
df = pd.read_csv('../data/yfinance_data/AAPL_historical_data.csv')

# Display the first few rows of the dataset
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits
0,1980-12-12,0.128348,0.128906,0.128348,0.128348,0.098943,469033600,0.0,0.0
1,1980-12-15,0.12221,0.12221,0.121652,0.121652,0.093781,175884800,0.0,0.0
2,1980-12-16,0.113281,0.113281,0.112723,0.112723,0.086898,105728000,0.0,0.0
3,1980-12-17,0.115513,0.116071,0.115513,0.115513,0.089049,86441600,0.0,0.0
4,1980-12-18,0.118862,0.11942,0.118862,0.118862,0.09163,73449600,0.0,0.0


Apply Technical Indicators with TA-Lib

In [15]:
df = fa.ta_indicators(df)
df.head(25)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Dividends,Stock Splits,SMA_20,SMA_50,RSI,MACD,MACD_signal,MACD_hist
0,1980-12-12,0.128348,0.128906,0.128348,0.128348,0.098943,469033600,0.0,0.0,,,,,,
1,1980-12-15,0.12221,0.12221,0.121652,0.121652,0.093781,175884800,0.0,0.0,,,,,,
2,1980-12-16,0.113281,0.113281,0.112723,0.112723,0.086898,105728000,0.0,0.0,,,,,,
3,1980-12-17,0.115513,0.116071,0.115513,0.115513,0.089049,86441600,0.0,0.0,,,,,,
4,1980-12-18,0.118862,0.11942,0.118862,0.118862,0.09163,73449600,0.0,0.0,,,,,,
5,1980-12-19,0.126116,0.126674,0.126116,0.126116,0.097223,48630400,0.0,0.0,,,,,,
6,1980-12-22,0.132254,0.132813,0.132254,0.132254,0.101954,37363200,0.0,0.0,,,,,,
7,1980-12-23,0.137835,0.138393,0.137835,0.137835,0.106257,46950400,0.0,0.0,,,,,,
8,1980-12-24,0.145089,0.145647,0.145089,0.145089,0.111849,48003200,0.0,0.0,,,,,,
9,1980-12-26,0.158482,0.15904,0.158482,0.158482,0.122173,55574400,0.0,0.0,,,,,,


In [14]:
# Check the results
print(df[['Close', 'SMA_20']].head(25))


       Close    SMA_20
0   0.128348       NaN
1   0.121652       NaN
2   0.112723       NaN
3   0.115513       NaN
4   0.118862       NaN
5   0.126116       NaN
6   0.132254       NaN
7   0.137835       NaN
8   0.145089       NaN
9   0.158482       NaN
10  0.160714       NaN
11  0.156808       NaN
12  0.152344       NaN
13  0.154018       NaN
14  0.150670       NaN
15  0.143973       NaN
16  0.137835       NaN
17  0.135045       NaN
18  0.142299       NaN
19  0.141183  0.138588
20  0.136161  0.138979
21  0.136719  0.139732
22  0.139509  0.141071
23  0.138393  0.142215
24  0.146763  0.143611


Technical indicators like Simple Moving Average (SMA) require a certain number of data points before they can produce a valid output. For example, a 20-period SMA needs at least 20 data points to calculate the first value. Before that, the result will be NaN.

In [17]:
# Ensure proper column names and data types
df.columns = df.columns.str.lower().str.strip()
for col in ['open', 'high', 'low', 'close', 'volume']:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Handle NaNs
df.fillna(method='ffill', inplace=True)
df.dropna(inplace=True)

# Recalculate technical indicators
df['SMA_20'] = ta.SMA(df['close'], timeperiod=20)
df['RSI'] = ta.RSI(df['close'], timeperiod=14)
df['MACD'], df['MACD_signal'], df['MACD_hist'] = ta.MACD(df['close'], fastperiod=12, slowperiod=26, signalperiod=9)

# Verify results
print(df[['close', 'SMA_20', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist']].tail(25))


  df.fillna(method='ffill', inplace=True)


            close      SMA_20        RSI      MACD  MACD_signal  MACD_hist
10973  209.070007  202.242500  63.268212  6.584845     6.826872  -0.242027
10974  213.250000  203.405499  67.358232  6.578164     6.777130  -0.198966
10975  214.100006  204.596000  68.135240  6.565772     6.734859  -0.169087
10976  210.619995  205.562500  61.663474  6.203632     6.628613  -0.424981
10977  216.750000  206.787500  67.516480  6.338210     6.570533  -0.232322
10978  220.270004  208.099500  70.318783  6.652217     6.586870   0.065348
10979  221.550003  209.459500  71.288745  6.924533     6.654402   0.270131
10980  226.339996  210.983000  74.629962  7.441082     6.811738   0.629344
10981  227.820007  212.650001  75.575738  7.879050     7.025201   0.853850
10982  228.679993  214.239500  76.132520  8.201001     7.260361   0.940640
10983  232.979996  216.232500  78.741969  8.702803     7.548849   1.153954
10984  227.570007  217.253501  68.582565  8.565209     7.752121   0.813088
10985  230.539993  218.12