In [None]:
import yfinance as yf
import csv
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from requests.exceptions import HTTPError
import matplotlib.pyplot as plt
from datetime import datetime
from numpy.linalg import eig
from scipy.linalg import svd
from numpy import mean
from numpy import cov

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [None]:
import numpy as np
from numpy import mean, std
from scipy.linalg import svd

def pca(data):
    # Define a matrix
    A = np.array(data)
    
    # Calculate the mean of each column
    M = mean(A.T, axis=1)
    
    # Calculate the standard deviation of each column
    S = std(A.T, axis=1, ddof=1)
    
    # Standardize the data by subtracting column means and dividing by standard deviation
    X = (A - M) / S
    
    # Calculate covariance matrix of standardized matrix
    C = X.T.dot(X) * (1 / (X.shape[0] - 1))
    
    # Perform Singular Value Decomposition (SVD) on covariance matrix
    V, s, V_T = svd(C, full_matrices=False)
    
    # Project data
    pca = X.dot(V)
    
    # Calculate eigenvalues from the singular values
    eigenvalues = s**2 / (X.shape[0] - 1)

    # Calculate total variance
    total_variance = np.sum(eigenvalues)

    # Calculate explained variance ratio
    explained_variance_ratio = eigenvalues / total_variance

    return pca, explained_variance_ratio

In [None]:
# Set the desired minimum and maximum values for the scaled data
min_val = 0
max_val = 1

# Normalize data between 0 and 1
def normalization(column):
    col_std = (column - column.min()) / (column.max() - column.min())
    col_scaled = col_std * (max_val - min_val) + min_val
    return col_scaled

In [None]:
# Extract today's data
# Define a list of stocks in technology sector
stocks = ['AAPL', 'ABCL', 'ABNB', 'ADBE','AMD', 
          'APPS',  'ASML',  'AVGO','AZPN', 'BIDU', 
          'BR', 'CARR', 'CDNS', 'CHGG', 'CRM', 'CSCO', 
           'DLO', 'DOX', 'DXCM', 'ET', 'EXEL', 'EXPI', 'FLGT',
           'FUTU', 'GBDC', 'GGG', 'GLOB',  'GNRC', 'GOOGL', 'GRMN', 
           'HAE', 'HLNE', 'IDXX',  'INTC', 'INTU', 'KIDS', 
          'LOGI', 'LPRO', 'LRCX', 'MCHP','MDRX', 'MDT', 'MEDP', 'MELI', 
            'MKTX', 'MRNA', 'MSFT', 'MU', 'NOW', 'NTES', 'NVDA', 
           'NXPI', 'OLED', 'OLLI', 'ON',  'PAYC', 'PCRX', 
           'PYPL', 'QCOM', 'SEDG', 'TSLA', 'TTD', 'TXN', 'ZM']


# Create an empty list to store ratios data for each stock
pe_ratios = []
pes_ratios = []
de_ratios = []
pb_ratios = []
pr_ratios = []

# Loop through each stock in the list and retrieve its ratios from Yahoo Finance
for stock in stocks:
    ticker = yf.Ticker(stock)
    pe_ratio = ticker.info['trailingPE']
    pes_ratio = ticker.info['trailingEps']
    de_ratio = ticker.info['debtToEquity']
    pb_ratio = ticker.info['priceToBook']
    pr_ratio = ticker.info['fiftyTwoWeekHigh'] - ticker.info['fiftyTwoWeekLow']
    
    pe_ratios.append(pe_ratio)
    pes_ratios.append(pes_ratio)
    de_ratios.append(de_ratio)
    pb_ratios.append(pb_ratio)
    pr_ratios.append(pr_ratio)
    
    
# Write ratios data to a CSV file
with open('technology_stock_5ratios.csv', mode='w') as file:
    writer = csv.writer(file)
    writer.writerow(['Stock', 'Price to Earnings', "Earnings Per Share", 
                     "Dept to Equity", 'Price to Book', 'Price to 52W Range'])
    for i in range(len(stocks)):
        writer.writerow([stocks[i], pe_ratios[i], pes_ratios[i], de_ratios[i], pb_ratios[i], pr_ratios[i]])

In [None]:
df = pd.read_csv('technology_stock_5ratios.csv')
df.set_index("Stock")

In [None]:
# Apply normalization to all columns except the first one
df.iloc[:, 1:] = df.iloc[:, 1:].apply(normalization)
normalized_data = df

# Add reference point
new_row = pd.DataFrame([['REFERENCE POINT', 0, 1, 0, 0, 0]], columns=normalized_data.columns)

# Add the new row using pandas.concat
normalized_data = pd.concat([normalized_data, new_row], ignore_index=True)


modified_data = normalized_data.copy()

print(modified_data)
# subtract normalize data from 1
modified_data.iloc[:, [1, 3, 4, 5]] =1 - normalized_data.iloc[:, [1, 3, 4, 5]]

modified_data

In [None]:
# stock name
stocks = modified_data.iloc[: , 0]
# stock data
data = modified_data.iloc[: , 1 :]
# pca 
reducedData, explained_variance_ratio = pca(data.values)

Comp1 = reducedData[:, 0]
Comp2 = reducedData[:, 1]

df = pd.DataFrame(reducedData, columns=['Comp1', 'Comp2', 'Comp3', 'Comp4', 'Comp5'])
df.insert(0, 'Stock', stocks)
frame = df
frame.set_index("Stock")

plt.scatter(Comp1, Comp2)
current_date = datetime.now().date()
plt.title('PCA of tech stocks ' + str(current_date))
plt.xlabel('Principle Component 1')
plt.ylabel('Principle Component 2')

for i in range(len(stocks)):
    plt.annotate(stocks[i], (Comp1[i], Comp2[i]))
    
plt.xlim(-1, 1)
plt.ylim(-1, 1)
plt.show()