In [1]:
# imports
import pandas as pd
import numpy as np
import yfinance as yf
import tsfel
import logging

In [2]:
# Technology Stocks
technology_stocks = [
    "AAPL", "MSFT", "GOOGL", "AMZN", "META",
    "NVDA", "TSLA", "CRM", "ADBE", "IBM",
    "INTC", "CSCO", "ORCL", "QCOM", "PYPL",
    "NFLX", "AVGO", "AMD", "ZM", "SHOP"
]

# Energy Stocks
energy_stocks = [
    "XOM", "CVX", "BP", "TTE", "PXD",
    "COP", "OXY", "SLB", "EOG", "KMI",
    "PSX", "VLO", "EPD", "HAL", "E",
    "CNQ", "DVN", "APA", "BKR", "WMB"
]

# Retail Stocks
retail_stocks = [
    "WMT", "HD", "COST", "BABA",
    "TGT", "TJX", "NKE", "MCD",
    "SBUX", "KR", "CVS", "WBA", "DG",
    "DLTR", "ROST", "BBY", "GPS", "YUM"
]

# Healthcare Stocks
healthcare_stocks = [
    "JNJ", "PFE", "MRK", "ABBV", "UNH",
    "TMO", "MDT", "ABT", "BMY", "LLY",
    "AMGN", "GILD", "REGN", "VRTX", "ZTS",
    "BIIB", "ISRG", "SYK", "DHR", "CVS"
]

# Financial Stocks
financial_stocks = [
    "JPM", "BAC", "C", "WFC", "GS",
    "MS", "AXP", "COF", "USB", "PNC",
    "SCHW", "BK", "BLK", "TROW", "STT",
    "AMTD", "ICE", "NDAQ", "MMC", "CB"
]

# Industrial Stocks
industrial_stocks = [
    "HON", "GE", "MMM", "CAT", "DE",
    "UPS", "LMT", "BA", "NOC", "RTX",
    "GD", "EMR", "ETN", "ITW", "ROP",
    "PH", "DOV", "FTV", "IR", "TXT"
]

# Consumer Discretionary Stocks
consumer_discretionary_stocks = [
    "NKE", "MCD", "SBUX", "TJX", "LOW",
    "TGT", "DLTR", "DG", "ROST", "YUM",
    "BBY", "GPS", "KMX", "LVS", "MAR",
    "HLT", "WYNN", "MGM", "CMG", "RCL"
]

Stock Prices Data

In [3]:
retail_prices = yf.download(retail_stocks, period='2y', interval='1h')['Close']
technology_prices = yf.download(technology_stocks, period='2y', interval='1h')['Close']
energy_prices = yf.download(energy_stocks, period='2y', interval='1h')['Close']
healthcare_prices = yf.download(healthcare_stocks, period='2y', interval='1h')['Close']
financial_prices = yf.download(financial_stocks, period='2y', interval='1h')['Close']
industrial_prices = yf.download(industrial_stocks, period='2y', interval='1h')['Close']
consumer_discretionary_prices = yf.download(consumer_discretionary_stocks, period='2y', interval='1h')['Close']

# Drop nan
retail_prices = retail_prices.dropna()
technology_prices = technology_prices.dropna()
energy_prices = energy_prices.dropna()
healthcare_prices = healthcare_prices.dropna()
financial_prices = financial_prices.dropna()
industrial_prices = industrial_prices.dropna()
consumer_discretionary_prices = consumer_discretionary_prices.dropna()

[*********************100%%**********************]  18 of 18 completed
[*********************100%%**********************]  20 of 20 completed
[*********************100%%**********************]  20 of 20 completed
[*********************100%%**********************]  20 of 20 completed
[*********************100%%**********************]  20 of 20 completed
[*********************100%%**********************]  20 of 20 completed
[*********************100%%**********************]  20 of 20 completed


In [4]:
retail_prices.to_csv('./Data/retail_prices.csv')
technology_prices.to_csv('./Data/technology_prices.csv')
energy_prices.to_csv('./Data/energy_prices.csv')
healthcare_prices.to_csv('./Data/healthcare_prices.csv')
financial_prices.to_csv('./Data/financial_prices.csv')
industrial_prices.to_csv('./Data/industrial_prices.csv')
consumer_discretionary_prices.to_csv('./Data/consumer_discretionary_prices.csv')

Time Series Analysis Data

In [5]:
# No logging
logging.disable(logging.CRITICAL)

# # Transpose the DataFrame so that stocks become rows and time points become columns
# retail_transposed = retail_prices.T

def tech_features_extraction(dataframe):
    df_transposed = dataframe.T

    cfg_file = tsfel.get_features_by_domain('statistical')

    # Initialize an empty DataFrame to store the extracted features
    extracted_features = pd.DataFrame()

    # Iterate over each stock
    for stock_name, stock_data in df_transposed.iterrows():
        signal_windows = [stock_data.values]

        # Extract statistical features using TSFEL
        features = tsfel.time_series_features_extractor(cfg_file, signal_windows)

        # Add column with stock name
        features['stock'] = stock_name
        
        # Put index names as stock names
        features.set_index('stock', inplace=True)
        
        # Append the features for this stock to the extracted_features DataFrame
        extracted_features = pd.concat([extracted_features, features])

    # Reset the index to have stocks as regular columns
    extracted_features.reset_index(inplace=True)

    # Put stock column as index
    extracted_features.set_index('stock', inplace=True)

    # Display the final DataFrame with each stock in the rows and extracted features in the columns
    # print(extracted_features)

    return extracted_features


# # time series analysis -> return some features
# cfg_file = tsfel.get_features_by_domain('statistical')

# # Extract features
# retail_features = tsfel.time_series_features_extractor(cfg_file, retail_prices)
# technology_features = tsfel.time_series_features_extractor(cfg_file, technology_prices)
# energy_features = tsfel.time_series_features_extractor(cfg_file, energy_prices)
retail_features = tech_features_extraction(retail_prices)
technology_features = tech_features_extraction(technology_prices)
energy_features = tech_features_extraction(energy_prices)
healthcare_features = tech_features_extraction(healthcare_prices)
financial_features = tech_features_extraction(financial_prices)
industrial_features = tech_features_extraction(industrial_prices)
consumer_discretionary_features = tech_features_extraction(consumer_discretionary_prices)

*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***
*** Feature extraction started ***


  features = tsfel.time_series_features_extractor(cfg_file, signal_windows)



*** Feature extraction finished ***


In [6]:
retail_features.to_csv('./Data/retail_TSFEL_features.csv')
technology_features.to_csv('./Data/technology_TSFEL_features.csv')
energy_features.to_csv('./Data/energy_TSFEL_features.csv')
healthcare_features.to_csv('./Data/healthcare_TSFEL_features.csv')
financial_features.to_csv('./Data/financial_TSFEL_features.csv')
industrial_features.to_csv('./Data/industrial_TSFEL_features.csv')
consumer_discretionary_features.to_csv('./Data/consumer_discretionary_TSFEL_features.csv')

Stock Fundamental Data

In [7]:
def fundamental_data(ticker, drop_list):
    """
    Get fundamental data from Yahoo Finance
    """
    # get fundamental data
    data = yf.Ticker(ticker)
    # get the data
    data = data.info

    df = pd.DataFrame.from_dict(data).iloc[0]
    df = df.to_frame().T
    df.set_index('symbol', inplace=True)
    # drop columns if exists
    for col in drop_list:
        if col in df.columns:
            df.drop(col, axis=1, inplace=True)
    
    return df

Remove columns:

* address1
* address2
* fax
* industrySymbol
* city
* state
* country
* phone
* website
* industryKey
* insdustryDisp
* sectorKey
* sectorDisp
* longBussinessSummary
* irWebsite
* underlyingSymbol
* shortName
* longName
* timeZoneFullNmae
* timeZoneShortName
* uuid
* messageBoardID

In [8]:
columns2drop = ['address1', 'address2', 'fax', 'industrySymbol', 'city', 'state', 'country', 'phone', 'website', 'industryKey', 'industryDisp', 'sectorKey', 'sectorDisp', 'longBusinessSummary', 'irWebsite', 'underlyingSymbol', 'shortName', 'longName', 'timeZoneFullName', 'timeZoneShortName', 'uuid', 'messageBoardId', 'companyOfficers']

tech_data = pd.DataFrame()
# get fundamental data
for ticker in technology_stocks:
    data = fundamental_data(ticker, columns2drop)
    tech_data = pd.concat([tech_data, data])

tech_data.to_csv('./Data/tech_data.csv')

energy_data = pd.DataFrame()
# get fundamental data
for ticker in energy_stocks:
    data = fundamental_data(ticker, columns2drop)
    energy_data = pd.concat([energy_data, data])

energy_data.to_csv('./Data/energy_data.csv')

retail_data = pd.DataFrame()
# get fundamental data
for ticker in retail_stocks:
    data = fundamental_data(ticker, columns2drop)
    retail_data = pd.concat([retail_data, data])

retail_data.to_csv('./Data/retail_data.csv')

healthcare_data = pd.DataFrame()
# get fundamental data
for ticker in healthcare_stocks:
    data = fundamental_data(ticker, columns2drop)
    healthcare_data = pd.concat([healthcare_data, data])

healthcare_data.to_csv('./Data/healthcare_data.csv')

financial_data = pd.DataFrame()
# get fundamental data
for ticker in financial_stocks:
    data = fundamental_data(ticker, columns2drop)
    financial_data = pd.concat([financial_data, data])

financial_data.to_csv('./Data/financial_data.csv')

industrial_data = pd.DataFrame()
# get fundamental data
for ticker in industrial_stocks:
    data = fundamental_data(ticker, columns2drop)
    industrial_data = pd.concat([industrial_data, data])

industrial_data.to_csv('./Data/industrial_data.csv')

consumer_discretionary_data = pd.DataFrame()
# get fundamental data
for ticker in consumer_discretionary_stocks:
    data = fundamental_data(ticker, columns2drop)
    consumer_discretionary_data = pd.concat([consumer_discretionary_data, data])

consumer_discretionary_data.to_csv('./Data/consumer_discretionary_data.csv')

In [9]:
# Print data shapes
print("Retail data shape:", retail_data.shape)
print("Technology data shape:", tech_data.shape)
print("Energy data shape:", energy_data.shape)
print("Healthcare data shape:", healthcare_data.shape)
print("Financial data shape:", financial_data.shape)
print("Industrial data shape:", industrial_data.shape)
print("Consumer Discretionary data shape:", consumer_discretionary_data.shape)

Retail data shape: (18, 111)
Technology data shape: (20, 111)
Energy data shape: (20, 111)
Healthcare data shape: (20, 111)
Financial data shape: (20, 111)
Industrial data shape: (20, 111)
Consumer Discretionary data shape: (20, 111)
