# Import Libraries

In [3]:
import yfinance as yf
import pandas as pd
import numpy as np
from typing import Dict, Optional
from tqdm import tqdm

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

# Query from CSV

In [8]:
df_enriched_stock_data = pd.read_csv('/Users/ani/Projects/6_stock_portfolio_recommendation/data//enriched_stock_data.csv')
df_enriched_stock_data

Unnamed: 0,Ticker,Closing_Price,All_Time_High,Percent_From_All_Time_High,Percent_Difference_200_Day_Moving_Average,24_Hour_Percent_Change,7_Day_Percent_Change,30_Day_Percent_Change,Annualized_Return,YTD_Return,...,Sector,Industry,Country,Business_Summary,Dividend_Yield,Trailing_PE,Forward_PE,Average_Volume,Average_Volume_10days,52_Week_Change
0,AAPL,196.58,258.4,-23.92,-12.05,0.48,-2.42,-0.84,15.0,-19.19,...,Technology,Consumer Electronics,United States,"Apple Inc. designs, manufactures, and markets ...",0.53,30.572319,23.655836,61130764,51288240,-0.052581
1,AMZN,212.52,242.06,-12.2,4.29,-1.07,-2.06,14.87,8.02,-3.5,...,Consumer Cyclical,Internet Retail,United States,"Amazon.com, Inc. engages in the retail sale of...",,34.61238,34.5561,48685895,36713730,0.123969
2,GOOG,173.98,207.22,-16.04,0.57,-1.83,-2.05,5.44,16.07,-8.52,...,Communication Services,Internet Content & Information,United States,Alphabet Inc. offers various products and plat...,0.48,19.41741,19.439106,26395583,22489300,-0.034839
3,GOOGL,173.32,205.89,-15.82,1.13,-1.49,-1.57,6.31,16.09,-8.29,...,Communication Services,Internet Content & Information,United States,Alphabet Inc. offers various products and plat...,0.48,19.34375,19.34375,39809440,32413440,-0.035128
4,ABBV,185.49,214.68,-13.6,-0.46,0.01,-1.94,-0.89,15.34,5.31,...,Healthcare,Drug Manufacturers - General,United States,"AbbVie Inc., a research-based biopharmaceutica...",3.54,79.26923,15.291839,7701938,4394080,0.08862
5,ABT,132.41,139.57,-5.13,8.03,0.1,-0.74,-0.49,8.1,17.88,...,Healthcare,Medical Devices,United States,"Abbott Laboratories, together with its subsidi...",1.78,17.173801,25.660854,6692629,4782250,0.252459
6,AXP,296.42,324.79,-8.74,4.63,1.23,-1.7,7.64,20.98,-0.11,...,Financial Services,Credit Services,United States,"American Express Company, together with its su...",1.11,20.699722,19.604498,3185351,2358770,0.286657
7,AMD,126.79,211.38,-40.02,2.12,-0.24,4.16,28.56,15.21,5.11,...,Technology,Semiconductors,United States,"Advanced Micro Devices, Inc. operates as a sem...",,92.54745,24.860785,43686245,51216520,-0.213608
8,T,27.66,28.42,-2.67,14.3,0.04,-0.72,-1.78,9.63,24.0,...,Communication Services,Telecom Services,United States,AT&T Inc. provides telecommunications and tech...,4.01,16.969326,12.348214,37241861,28881250,0.503261
9,ACN,306.38,396.28,-22.69,-9.65,-1.81,-3.23,0.85,8.57,-11.36,...,Technology,Information Technology Services,Ireland,Accenture plc provides strategy and consulting...,1.93,25.258038,21.77541,3365711,2795730,-0.008415


# Content-Based Recommendation System

In [None]:
# calculate cosine similarity and recommend similar stocks
def recommend_similar_stocks(df, min_return, ticker, founded_years, top_n=5):
    """Recommend similar stocks based on cosine similarity of financial metrics.
    
    Args:
        df (pd.DataFrame): DataFrame containing stock data.
        ticker (str): The ticker symbol of the stock to find recommendations for.
        top_n (int): Number of similar stocks to recommend.
        
    Returns:
        pd.DataFrame: DataFrame containing recommended stocks.
    """
    # Select relevant columns for similarity calculation
    features = ['Annualized_Return', 'YTD_Pct_Return', 'Pct_Diff_200_MA', 
                'Annualized_Volatility', 'Sharpe_Ratio', 'Beta', 'Market_Cap']
    
    df_selected_stock = df.copy()
    df_selected_stock['Scores'] = np.nan  # Initialize Scores column for the selected stock
    df_selected_stock = df_selected_stock[df_selected_stock['Ticker'] == ticker][['Ticker', 'Security', 'Scores', 'Annualized_Return', 'YTD_Pct_Return', 
                                                'Pct_Diff_200_MA', 'Annualized_Volatility', 
                                                'Sharpe_Ratio', 'Beta', 'Market_Cap', 'Years_Since_Founded']]

    # Filter out rows with NaN values in the features
    df_filtered = df.dropna(subset=features)
    
    # Scale the features
    scaler = MinMaxScaler()
    scaled_features = scaler.fit_transform(df_filtered[features])
    
    # Calculate cosine similarity
    similarity_matrix = cosine_similarity(scaled_features)
    
    # Get index of the input ticker
    try:
        ticker_index = df_filtered[df_filtered['Ticker'] == ticker].index[0]
        
        # Get similarity scores for the input ticker
        similarity_scores = list(enumerate(similarity_matrix[ticker_index]))
        
        # Sort by similarity score and get top N recommendations
        sorted_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
        
        # Get recommended tickers
        recommended_indices = [i[0] for i in sorted_scores]
        
        # add scores to the DataFrame
        df_filtered = df_filtered.reset_index(drop=True)
        df_filtered['Ticker'] = df_filtered['Ticker'].astype(str)
        df_filtered['Scores'] = np.nan  # Initialize Scores column
        df_filtered.loc[recommended_indices, 'Scores'] = [i[1] for i in sorted_scores]
        df_filtered['Scores'] = df_filtered['Scores'].astype(float)
        
        # Return the recommended stocks with relevant columns and the selected ticker and greater than min_return      
        df_filtered = df_filtered.iloc[recommended_indices][['Ticker', 'Security', 'Scores', 'Annualized_Return', 'YTD_Pct_Return', 
                                                    'Pct_Diff_200_MA', 'Annualized_Volatility', 
                                                    'Sharpe_Ratio', 'Beta', 'Market_Cap', 'Years_Since_Founded']]
        df_recommended_stocks = df_filtered[(df_filtered['Annualized_Return'] >= min_return) & (df_filtered['Ticker'] != ticker) & (df_filtered['Years_Since_Founded'] >= founded_years)].sort_values(by='Scores', ascending=False).head(top_n)
        
        return pd.concat([df_selected_stock, df_recommended_stocks], ignore_index=True)

    except IndexError:
        print(f"Ticker {ticker} not found in the DataFrame.")

In [None]:
# Example usage
ticker_to_recommend = 'NFLX'  # Example ticker
recommended_stocks = recommend_similar_stocks(df_sp500, 50, ticker_to_recommend, 1, top_n=5)
recommended_stocks