In [1]:
# Import the yfinance library to download financial data from Yahoo Finance
import yfinance as yf 

# Import the pandas library for data manipulation and analysis
import pandas as pd 

# Import the apyori library for association rule mining
from apyori import apriori

In [2]:
# Define a list of 50 ticker symbols for various companies
tickers = ["AAPL", "MSFT", "AMZN", "GOOG", "NVAX", "TSLA", "JPM", "JNJ", "V", "UNH", "NVDA", "PYPL", 
           "BAC", "PG", "MA", "DIS", "HD", "CMCSA", "NFLX", "PEP", "KO", "VZ", "INTC", "CSCO", "CVX", 
           "MRK", "WMT", "PFE", "ABBV", "ABT", "CRM", "ACN", "ORCL", "TMO", "T", "XOM", "MCD", "NKE", 
           "COST", "TMUS", "UNP", "BA", "LMT", "MMM", "UPS", "CAT", "HON", "CVS", "WFC"]

In [3]:
# Create an empty Pandas DataFrame called prices to store the downloaded financial data
prices = pd.DataFrame()

# Iterate over each ticker symbol in the tickers list
for ticker in tickers:
    # Use the yf.download() function to download financial data for the specified time period and interval
    data = yf.download(ticker, start="2022-01-01", end="2023-03-31", interval="1d", progress=False)
    
    # Add a column to the data with the ticker symbol
    data["Ticker"] = ticker
    
    # Append the data to the prices DataFrame
    prices = pd.concat([prices, data])

In [4]:
prices = prices.reset_index()
prices = prices[["Date", "Ticker", "Close"]]
prices = prices.dropna()

In [5]:
# Display the first five rows of the cleaned prices DataFrame
prices.head()

Unnamed: 0,Date,Ticker,Close
0,2022-01-03,AAPL,182.009995
1,2022-01-04,AAPL,179.699997
2,2022-01-05,AAPL,174.919998
3,2022-01-06,AAPL,172.0
4,2022-01-07,AAPL,172.169998


In [6]:
# Create a pivot table from the cleaned prices DataFrame
df = pd.pivot_table(prices, values="Close", index="Date", columns="Ticker", fill_value=0)

In [7]:
# Display the first five rows of the pivot table
df.head()

Ticker,AAPL,ABBV,ABT,ACN,AMZN,BA,BAC,CAT,CMCSA,COST,...,TMUS,TSLA,UNH,UNP,UPS,V,VZ,WFC,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-03,182.009995,135.419998,139.039993,407.209991,170.404495,207.860001,46.18,207.0,50.740002,566.710022,...,114.43,399.926666,502.279999,247.770004,213.5,221.429993,52.439999,50.73,144.649994,63.540001
2022-01-04,179.699997,135.160004,135.770004,404.299988,167.522003,213.630005,47.990002,218.080002,50.189999,564.22998,...,114.059998,383.196655,490.899994,252.029999,217.289993,222.460007,53.470001,52.75,142.0,65.93
2022-01-05,174.919998,135.869995,135.160004,397.179993,164.356995,213.070007,47.18,219.75,50.220001,549.919983,...,115.360001,362.706665,489.690002,252.789993,214.559998,220.0,54.02,52.290001,143.919998,66.75
2022-01-06,172.0,135.229996,135.139999,378.0,163.253998,211.339996,48.130001,221.990005,50.509998,549.799988,...,115.57,354.899994,469.649994,253.490005,216.639999,219.75,53.759998,53.630001,143.520004,68.32
2022-01-07,172.169998,134.880005,135.559998,370.75,162.554001,215.5,49.18,224.190002,50.049999,536.179993,...,109.739998,342.320007,458.600006,254.779999,218.160004,216.960007,54.240002,54.77,144.889999,68.879997


In [8]:
# Create a new DataFrame with the percentage change between rows of the pivot table
new_df = df.pct_change()

# Fill any missing values with 0
new_df = new_df.fillna(0)

# Create a new DataFrame with the same structure as new_df, but with values of 'positive' or 'negative'
pos_neg_df = pd.DataFrame(columns=new_df.columns)
for col in new_df.columns:
    pos_neg_df[col] = new_df[col].apply(lambda x: 'positive' if x > 0 else 'negative')

# Display the first five rows of the pos_neg_df DataFrame
pos_neg_df.head()

Ticker,AAPL,ABBV,ABT,ACN,AMZN,BA,BAC,CAT,CMCSA,COST,...,TMUS,TSLA,UNH,UNP,UPS,V,VZ,WFC,WMT,XOM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-03,negative,negative,negative,negative,negative,negative,negative,negative,negative,negative,...,negative,negative,negative,negative,negative,negative,negative,negative,negative,negative
2022-01-04,negative,negative,negative,negative,negative,positive,positive,positive,negative,negative,...,negative,negative,negative,positive,positive,positive,positive,positive,negative,positive
2022-01-05,negative,positive,negative,negative,negative,negative,negative,positive,positive,negative,...,positive,negative,negative,positive,negative,negative,positive,negative,positive,positive
2022-01-06,negative,negative,negative,negative,negative,negative,positive,positive,positive,negative,...,positive,negative,negative,positive,positive,negative,negative,positive,negative,positive
2022-01-07,positive,negative,positive,negative,negative,positive,positive,positive,negative,negative,...,negative,negative,negative,positive,positive,negative,positive,positive,positive,positive


In [9]:
# Create an empty list to store lists of column names with positive percentage changes
pos_cols = []

# Iterate through each row of the pos_neg_df DataFrame
for index, row in pos_neg_df.iterrows():
    # Create a new list to store the column names with positive percentage changes for this row
    pos_row = []
    for col in pos_neg_df.columns:
        # If the value in this column for this row is 'positive', add the column name to the pos_row list
        if row[col] == 'positive':
            pos_row.append(col)
    # Add the pos_row list to the pos_cols list, but only if it's not empty
    pos_cols.append(pos_row)
pos_cols = [lst for lst in pos_cols if lst]

In [10]:
# Set the minimum support threshold for frequent itemset mining
min_support = 0.31

# Set the minimum confidence threshold for association rule mining
min_confidence = 0.95

# Set the minimum lift threshold for association rule mining
min_lift = 1.0

In [11]:
# Apply the Apriori algorithm to the list of positive itemsets
final = list(apriori(pos_cols, 
                     min_support=min_support, 
                     min_confidence=min_confidence, 
                     min_lift=min_lift))

In [12]:
# Print out the association rules and their support, confidence, and lift
for r in final:
    # Print out the items in the antecedent and consequent of the association rule
    print(f"Itemset: {', '.join(r.items)}")
    
    # Print out the support of the frequent itemset
    print(f"Support: {r.support:.3f}")
    
    # Print out the confidence of the association rule
    print(f"Confidence: {r.ordered_statistics[0].confidence:.3f}")
    
    # Print out the lift of the association rule
    print(f"Lift: {r.ordered_statistics[0].lift:.3f}")
    
    # Print a blank line to separate the output for each rule
    print()

Itemset: WFC, JPM, BAC
Support: 0.376
Confidence: 0.950
Lift: 1.913

Itemset: ACN, NVDA, AAPL, CSCO
Support: 0.310
Confidence: 0.960
Lift: 1.894

Itemset: ACN, NVDA, AAPL, MSFT
Support: 0.320
Confidence: 0.951
Lift: 1.820

Itemset: MSFT, NVDA, AMZN, AAPL
Support: 0.310
Confidence: 0.950
Lift: 1.817

Itemset: MSFT, COST, AAPL, GOOG
Support: 0.317
Confidence: 0.951
Lift: 1.980

Itemset: NVDA, COST, AAPL, GOOG
Support: 0.314
Confidence: 0.950
Lift: 1.876

Itemset: MSFT, NVDA, COST, AAPL
Support: 0.320
Confidence: 0.951
Lift: 1.981

Itemset: MSFT, CRM, AAPL, GOOG
Support: 0.327
Confidence: 0.962
Lift: 2.002

Itemset: NVDA, CRM, AAPL, GOOG
Support: 0.324
Confidence: 0.952
Lift: 1.821

Itemset: MSFT, NVDA, CRM, AAPL
Support: 0.327
Confidence: 0.952
Lift: 1.821

Itemset: GOOG, NVDA, AAPL, CSCO
Support: 0.310
Confidence: 0.960
Lift: 1.894

Itemset: MSFT, NVDA, AAPL, CSCO
Support: 0.314
Confidence: 0.950
Lift: 1.818

Itemset: MSFT, NVDA, AAPL, GOOG
Support: 0.343
Confidence: 0.963
Lift: 1.842



In [13]:
for r in final:
    # Extract the antecedent and consequent items from the association rule
    antecedent = ', '.join(r.ordered_statistics[0].items_base)
    consequent = ', '.join(r.ordered_statistics[0].items_add)
    # Extract the support, confidence, and lift values from the association rule
    support = r.support
    confidence = r.ordered_statistics[0].confidence
    lift = r.ordered_statistics[0].lift
    # Print the association rule with its support, confidence, and lift values
    print(f"{antecedent} -> {consequent}: Support={support:.3f}, Confidence={confidence:.3f}, Lift={lift:.3f}")

WFC, BAC -> JPM: Support=0.376, Confidence=0.950, Lift=1.913
ACN, NVDA, CSCO -> AAPL: Support=0.310, Confidence=0.960, Lift=1.894
ACN, AAPL, MSFT -> NVDA: Support=0.320, Confidence=0.951, Lift=1.820
MSFT, AMZN, AAPL -> NVDA: Support=0.310, Confidence=0.950, Lift=1.817
COST, AAPL, GOOG -> MSFT: Support=0.317, Confidence=0.951, Lift=1.980
NVDA, COST, GOOG -> AAPL: Support=0.314, Confidence=0.950, Lift=1.876
NVDA, COST, AAPL -> MSFT: Support=0.320, Confidence=0.951, Lift=1.981
CRM, AAPL, GOOG -> MSFT: Support=0.327, Confidence=0.962, Lift=2.002
CRM, AAPL, GOOG -> NVDA: Support=0.324, Confidence=0.952, Lift=1.821
MSFT, CRM, AAPL -> NVDA: Support=0.327, Confidence=0.952, Lift=1.821
NVDA, GOOG, CSCO -> AAPL: Support=0.310, Confidence=0.960, Lift=1.894
MSFT, AAPL, CSCO -> NVDA: Support=0.314, Confidence=0.950, Lift=1.818
MSFT, AAPL, GOOG -> NVDA: Support=0.343, Confidence=0.963, Lift=1.842
MSFT, AAPL, INTC -> NVDA: Support=0.314, Confidence=0.960, Lift=1.836
NVDA, MA, AAPL -> MSFT: Support=0.