#### Import libraries

In [2]:
import pandas as pd
import numpy as np
import datetime

In [3]:
today = datetime.datetime.today()
today = today.strftime('%Y-%m-%d')

print("Today is " + str(today))

Today is 2025-02-03


#### Import data
Source: https://www.nasdaq.com/market-activity/stocks

In [5]:
data = pd.read_csv("slv.csv", parse_dates=["Date"])
data.head()

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
0,2025-01-31,28.51,26121560,28.77,28.845,28.39
1,2025-01-30,28.72,38787660,28.65,28.9,28.53
2,2025-01-29,28.0,28857810,28.06,28.21,27.85
3,2025-01-28,27.63,15636500,27.53,27.76,27.385
4,2025-01-27,27.46,22691540,27.5,27.74,27.05


#### Transform Data

In [7]:
# All columns to float
cols = ['Close/Last', 'Open', 'High', 'Low']

# Remove dollar sign and float values
dollar_to_float = lambda x: float(str(x).replace("$", ""))

In [8]:
# Apply transformation to columns
for c in cols:
    data[c] = data[c].apply(dollar_to_float)

In [9]:
data["Low"] = data["Low"].round(2)

In [10]:
data.set_index("Date", inplace=True)

In [11]:
data.rename(columns={"Close/Last":"Close"}, inplace=True)

In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1257 entries, 2025-01-31 to 2020-02-03
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   1257 non-null   float64
 1   Volume  1257 non-null   int64  
 2   Open    1257 non-null   float64
 3   High    1257 non-null   float64
 4   Low     1257 non-null   float64
dtypes: float64(4), int64(1)
memory usage: 58.9 KB


#### Data Cleaning

In [14]:
date_range = pd.date_range(start=min(data.index), end=max(data.index), freq="D")
data_filled = data.reindex(date_range, fill_value=np.nan)

In [15]:
# Fill missing dates with previous week's price
cols = ["Close", "Volume", "Open", "High", "Low"]

for c in cols:
    data_filled[c] = data_filled[c].fillna(method="ffill")

#data_filled.index = pd.to_datetime(data_filled.index)
data_filled.head(10)

  data_filled[c] = data_filled[c].fillna(method="ffill")


Unnamed: 0,Close,Volume,Open,High,Low
2020-02-03,16.5,17770560.0,16.61,16.6161,16.43
2020-02-04,16.45,13522260.0,16.52,16.5399,16.36
2020-02-05,16.46,8713851.0,16.45,16.4811,16.39
2020-02-06,16.63,10583480.0,16.65,16.66,16.55
2020-02-07,16.54,11066420.0,16.61,16.64,16.47
2020-02-08,16.54,11066420.0,16.61,16.64,16.47
2020-02-09,16.54,11066420.0,16.61,16.64,16.47
2020-02-10,16.61,7382747.0,16.59,16.66,16.54
2020-02-11,16.48,10832680.0,16.54,16.55,16.4
2020-02-12,16.33,10358680.0,16.35,16.4,16.3


#### Export Finalized Data

In [17]:
processed_data = data_filled.to_csv("processed_data.csv")