## <span style='color:#ff5f27'> 💽 Loading Historical Data</span>

In [None]:
!python -m pip install yfinance

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
!pip show yfinance

Name: yfinance
Version: 0.2.3
Summary: Download market data from Yahoo! Finance API
Home-page: https://github.com/ranaroussi/yfinance
Author: Ran Aroussi
Author-email: ran@aroussi.com
License: Apache
Location: /usr/local/lib/python3.8/dist-packages
Requires: appdirs, beautifulsoup4, cryptography, frozendict, html5lib, lxml, multitasking, numpy, pandas, pytz, requests
Required-by: 


In [None]:
import yfinance as yf

import pandas as pd
import datetime
from datetime import date, timedelta, datetime

#from functions import *


## <span style='color:#ff5f27'> 💽 Fetching Stock History</span>

### Get Meta stock history

In [None]:
meta_stock = yf.download("META", start=datetime(2010, 1, 1), 
                                     end=datetime(2023, 1, 7))
meta_stock = meta_stock.reset_index(level=0)
meta_stock.columns = meta_stock.columns.str.lower()
meta_stock.rename(columns={'adj close': 'adj_close'}, inplace=True)
meta_stock.insert(0, 'name', 'META')
meta_stock.head(10)

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,name,date,open,high,low,close,adj_close,volume
0,META,2012-05-18,42.049999,45.0,38.0,38.23,38.23,573576400
1,META,2012-05-21,36.529999,36.66,33.0,34.029999,34.029999,168192700
2,META,2012-05-22,32.610001,33.59,30.940001,31.0,31.0,101786600
3,META,2012-05-23,31.370001,32.5,31.360001,32.0,32.0,73600000
4,META,2012-05-24,32.950001,33.209999,31.77,33.029999,33.029999,50237200
5,META,2012-05-25,32.900002,32.950001,31.110001,31.91,31.91,37149800
6,META,2012-05-29,31.48,31.690001,28.65,28.84,28.84,78063400
7,META,2012-05-30,28.700001,29.549999,27.860001,28.190001,28.190001,57267900
8,META,2012-05-31,28.549999,29.67,26.83,29.6,29.6,111639200
9,META,2012-06-01,28.889999,29.15,27.389999,27.719999,27.719999,41855500


In [None]:
meta_performance_df = meta_stock.sort_values(by='date')

meta_performance_df['adj_close_nextday'] = meta_performance_df['adj_close'].shift(-1)
meta_performance_df['price_move'] = meta_performance_df.apply(lambda x: 1 if (x['adj_close_nextday']>= x['adj_close']) else 0, axis =1)
meta_performance_df = meta_performance_df.dropna()

meta_performance_df[['date', 'adj_close', 'adj_close_nextday', 'price_move']].tail(5)

Unnamed: 0,date,adj_close,adj_close_nextday,price_move
2671,2022-12-29,120.260002,120.339996,1
2672,2022-12-30,120.339996,124.739998,1
2673,2023-01-03,124.739998,127.370003,1
2674,2023-01-04,127.370003,126.940002,0
2675,2023-01-05,126.940002,130.020004,1


In [None]:
meta_stock['price_move'] = meta_performance_df['price_move']
meta_stock = meta_stock.dropna()
meta_stock.tail(5)

Unnamed: 0,name,date,open,high,low,close,adj_close,volume,price_move
2671,META,2022-12-29,116.400002,121.029999,115.769997,120.260002,120.260002,22366200,1.0
2672,META,2022-12-30,118.160004,120.419998,117.739998,120.339996,120.339996,19492100,1.0
2673,META,2023-01-03,122.82,126.370003,122.279999,124.739998,124.739998,35528500,1.0
2674,META,2023-01-04,127.379997,129.050003,125.849998,127.370003,127.370003,32397100,0.0
2675,META,2023-01-05,126.129997,128.520004,124.540001,126.940002,126.940002,25447100,1.0


### Get Apple stock history

In [None]:
apple_stock = yf.download("AAPL", start=datetime(2010, 1, 1), 
                                     end=datetime(2023, 1, 7))
apple_stock = apple_stock.reset_index(level=0)
apple_stock.columns = apple_stock.columns.str.lower()
apple_stock.rename(columns={'adj close': 'adj_close'}, inplace=True)
apple_stock.insert(0, 'name', 'APPLE')
apple_stock.head(10)

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,name,date,open,high,low,close,adj_close,volume
0,APPLE,2010-01-04,7.6225,7.660714,7.585,7.643214,6.515212,493729600
1,APPLE,2010-01-05,7.664286,7.699643,7.616071,7.656429,6.526476,601904800
2,APPLE,2010-01-06,7.656429,7.686786,7.526786,7.534643,6.422665,552160000
3,APPLE,2010-01-07,7.5625,7.571429,7.466071,7.520714,6.410789,477131200
4,APPLE,2010-01-08,7.510714,7.571429,7.466429,7.570714,6.453412,447610800
5,APPLE,2010-01-11,7.6,7.607143,7.444643,7.503929,6.396482,462229600
6,APPLE,2010-01-12,7.471071,7.491786,7.372143,7.418571,6.323723,594459600
7,APPLE,2010-01-13,7.423929,7.533214,7.289286,7.523214,6.412921,605892000
8,APPLE,2010-01-14,7.503929,7.516429,7.465,7.479643,6.375782,432894000
9,APPLE,2010-01-15,7.533214,7.557143,7.3525,7.354643,6.269229,594067600


In [None]:
apple_performance_df = apple_stock.sort_values(by='date')

apple_performance_df['adj_close_nextday'] = apple_performance_df['adj_close'].shift(-1)
apple_performance_df['price_move'] = apple_performance_df.apply(lambda x: 1 if (x['adj_close_nextday']>= x['adj_close']) else 0, axis =1)
apple_performance_df = apple_performance_df.dropna()

apple_performance_df[['date', 'adj_close', 'adj_close_nextday', 'price_move']].tail(5)

Unnamed: 0,date,adj_close,adj_close_nextday,price_move
3270,2022-12-29,129.610001,129.929993,1
3271,2022-12-30,129.929993,125.07,0
3272,2023-01-03,125.07,126.360001,1
3273,2023-01-04,126.360001,125.019997,0
3274,2023-01-05,125.019997,129.619995,1


In [None]:
apple_stock['price_move'] = apple_performance_df['price_move']
apple_stock = apple_stock.dropna()
apple_stock.tail(5)

Unnamed: 0,name,date,open,high,low,close,adj_close,volume,price_move
3270,APPLE,2022-12-29,127.989998,130.479996,127.730003,129.610001,129.610001,75703700,1.0
3271,APPLE,2022-12-30,128.410004,129.949997,127.43,129.929993,129.929993,76960600,0.0
3272,APPLE,2023-01-03,130.279999,130.899994,124.169998,125.07,125.07,112117500,1.0
3273,APPLE,2023-01-04,126.889999,128.660004,125.080002,126.360001,126.360001,89113600,0.0
3274,APPLE,2023-01-05,127.129997,127.769997,124.760002,125.019997,125.019997,80962700,1.0


### Get Amazon stock history

In [None]:
amazon_stock = yf.download("AMZN", start=datetime(2010, 1, 1), 
                                     end=datetime(2023, 1, 7))
amazon_stock = amazon_stock.reset_index(level=0)
amazon_stock.columns = amazon_stock.columns.str.lower()
amazon_stock.rename(columns={'adj close': 'adj_close'}, inplace=True)
amazon_stock.insert(0, 'name', 'AMAZON')
amazon_stock.head(10)

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,name,date,open,high,low,close,adj_close,volume
0,AMAZON,2010-01-04,6.8125,6.8305,6.657,6.695,6.695,151998000
1,AMAZON,2010-01-05,6.6715,6.774,6.5905,6.7345,6.7345,177038000
2,AMAZON,2010-01-06,6.73,6.7365,6.5825,6.6125,6.6125,143576000
3,AMAZON,2010-01-07,6.6005,6.616,6.44,6.5,6.5,220604000
4,AMAZON,2010-01-08,6.528,6.684,6.4515,6.676,6.676,196610000
5,AMAZON,2010-01-11,6.631,6.64,6.4605,6.5155,6.5155,175588000
6,AMAZON,2010-01-12,6.4495,6.491,6.3275,6.3675,6.3675,181926000
7,AMAZON,2010-01-13,6.395,6.4855,6.2875,6.4555,6.4555,214464000
8,AMAZON,2010-01-14,6.457,6.519,6.32,6.3675,6.3675,195498000
9,AMAZON,2010-01-15,6.459,6.4825,6.353,6.357,6.357,307530000


In [None]:
amazon_performance_df = amazon_stock.sort_values(by='date')

amazon_performance_df['adj_close_nextday'] = amazon_performance_df['adj_close'].shift(-1)
amazon_performance_df['price_move'] = amazon_performance_df.apply(lambda x: 1 if (x['adj_close_nextday']>= x['adj_close']) else 0, axis =1)
amazon_performance_df = amazon_performance_df.dropna()

amazon_performance_df[['date', 'adj_close', 'adj_close_nextday', 'price_move']].tail(5)

Unnamed: 0,date,adj_close,adj_close_nextday,price_move
3270,2022-12-29,84.18,84.0,0
3271,2022-12-30,84.0,85.82,1
3272,2023-01-03,85.82,85.139999,0
3273,2023-01-04,85.139999,83.120003,0
3274,2023-01-05,83.120003,86.080002,1


In [None]:
amazon_stock['price_move'] = amazon_performance_df['price_move']
amazon_stock = amazon_stock.dropna()
amazon_stock.tail(5)

Unnamed: 0,name,date,open,high,low,close,adj_close,volume,price_move
3270,AMAZON,2022-12-29,82.870003,84.550003,82.550003,84.18,84.18,54995900,0.0
3271,AMAZON,2022-12-30,83.120003,84.050003,82.470001,84.0,84.0,62330000,1.0
3272,AMAZON,2023-01-03,85.459999,86.959999,84.209999,85.82,85.82,76706000,0.0
3273,AMAZON,2023-01-04,86.550003,86.980003,83.360001,85.139999,85.139999,68885100,0.0
3274,AMAZON,2023-01-05,85.330002,85.419998,83.07,83.120003,83.120003,67930800,1.0


In [None]:
def timestamp_2_time(x):
    dt_obj = datetime.strptime(str(x), '%Y-%m-%d')
    dt_obj = dt_obj.timestamp() * 1000
    return int(dt_obj)

### Merge data

In [None]:
frames = [meta_stock, apple_stock, amazon_stock]
stocks_df = pd.concat(frames)
stocks_df['date'] = stocks_df['date'].apply(lambda x: x.strftime('%Y-%m-%d'))
stocks_df.date = stocks_df.date.apply(timestamp_2_time)
stocks_df

Unnamed: 0,name,date,open,high,low,close,adj_close,volume,price_move
0,META,1337299200000,42.049999,45.000000,38.000000,38.230000,38.230000,573576400,0.0
1,META,1337558400000,36.529999,36.660000,33.000000,34.029999,34.029999,168192700,0.0
2,META,1337644800000,32.610001,33.590000,30.940001,31.000000,31.000000,101786600,1.0
3,META,1337731200000,31.370001,32.500000,31.360001,32.000000,32.000000,73600000,1.0
4,META,1337817600000,32.950001,33.209999,31.770000,33.029999,33.029999,50237200,0.0
...,...,...,...,...,...,...,...,...,...
3270,AMAZON,1672272000000,82.870003,84.550003,82.550003,84.180000,84.180000,54995900,0.0
3271,AMAZON,1672358400000,83.120003,84.050003,82.470001,84.000000,84.000000,62330000,1.0
3272,AMAZON,1672704000000,85.459999,86.959999,84.209999,85.820000,85.820000,76706000,0.0
3273,AMAZON,1672790400000,86.550003,86.980003,83.360001,85.139999,85.139999,68885100,0.0


## <span style="color:#ff5f27;"> 🔮 Connecting to Hopsworks Feature Store </span>

In [None]:
!pip install hopsworks

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import hopsworks

project = hopsworks.login()

fs = project.get_feature_store() 

Copy your Api Key (first register/login): https://c.app.hopsworks.ai/account/api/generated

Paste it here: ··········
Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/5287




Connected. Call `.close()` to terminate connection gracefully.


## <span style="color:#ff5f27;">🪄 Creating Feature Groups</span>

In [None]:
stocks_fg = fs.get_or_create_feature_group(
        name = 'stocks_fg',
        description = 'Meta, Apple & Amazon stock statistics each day',
        version = 1,
        primary_key = ['name', 'date'],
        online_enabled = True,
        event_time = 'date'
    )    

stocks_fg.insert(stocks_df)

Feature Group created successfully, explore it at 
https://c.app.hopsworks.ai:443/p/5287/fs/5207/fg/15748


Uploading Dataframe: 0.00% |          | Rows 0/9226 | Elapsed Time: 00:00 | Remaining Time: ?

Launching offline feature group backfill job...
Backfill Job started successfully, you can follow the progress at 
https://c.app.hopsworks.ai/p/5287/jobs/named/stocks_fg_1_offline_fg_backfill/executions


(<hsfs.core.job.Job at 0x7f5d7a43d3a0>, None)