#### Import libraries

In [2]:
import pandas as pd
import numpy as np
import datetime

In [3]:
today = datetime.datetime.today()
today = today.strftime('%Y-%m-%d')

print("Today is " + str(today))

Today is 2024-12-18


#### Import data
Source: https://www.nasdaq.com/market-activity/stocks

In [5]:
data = pd.read_csv("qtum.csv", parse_dates=["Date"])
data.head()

Unnamed: 0,Date,Close/Last,Volume,Open,High,Low
0,2024-12-17,86.48,1513482,84.7,86.59,82.27
1,2024-12-16,82.59,1550022,77.92,84.59,76.84
2,2024-12-13,77.64,694437,75.59,77.665,74.9
3,2024-12-12,74.49,656844,76.5,77.54,74.37
4,2024-12-11,76.45,492946,78.33,78.39,74.064


#### Transform Data

In [13]:
# All columns to float
cols = ['Close/Last', 'Open', 'High', 'Low']

# Remove dollar sign and float values
dollar_to_float = lambda x: float(str(x).replace("$", ""))

In [15]:
# Apply transformation to columns
for c in cols:
    data[c] = data[c].apply(dollar_to_float)

In [17]:
data["Low"] = data["Low"].round(2)

In [19]:
data.set_index("Date", inplace=True)

In [21]:
data.rename(columns={"Close/Last":"Close"}, inplace=True)

In [23]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1258 entries, 2024-12-17 to 2019-12-18
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Close   1258 non-null   float64
 1   Volume  1258 non-null   int64  
 2   Open    1258 non-null   float64
 3   High    1258 non-null   float64
 4   Low     1258 non-null   float64
dtypes: float64(4), int64(1)
memory usage: 59.0 KB


#### Data Cleaning

In [26]:
date_range = pd.date_range(start=min(data.index), end=max(data.index), freq="D")
data_filled = data.reindex(date_range, fill_value=np.nan)

In [28]:
# Fill missing dates with previous week's price
cols = ["Close", "Volume", "Open", "High", "Low"]

for c in cols:
    data_filled[c] = data_filled[c].fillna(method="ffill")

#data_filled.index = pd.to_datetime(data_filled.index)
data_filled.head(10)

  data_filled[c] = data_filled[c].fillna(method="ffill")


Unnamed: 0,Close,Volume,Open,High,Low
2019-12-18,28.9787,27351.0,29.09,29.09,28.94
2019-12-19,29.16,18240.0,28.97,29.17,28.97
2019-12-20,29.42,38043.0,29.37,29.44,29.26
2019-12-21,29.42,38043.0,29.37,29.44,29.26
2019-12-22,29.42,38043.0,29.37,29.44,29.26
2019-12-23,29.51,62534.0,29.59,29.6553,29.42
2019-12-24,29.4322,8045.0,29.52,29.52,29.38
2019-12-25,29.4322,8045.0,29.52,29.52,29.38
2019-12-26,29.5264,27100.0,29.57,29.66,29.44
2019-12-27,29.4761,43437.0,29.72,29.72,29.43


#### Export Finalized Data

In [31]:
processed_data = data_filled.to_csv("processed_data.csv")