In [1]:
import yfinance as yf
import pandas as pd
import numpy as np

class StockDataAnalyzer:
    def __init__(self, stock_codes, period='1y', rsi_period=14):
        self.stock_codes = stock_codes
        self.period = period
        self.rsi_period = rsi_period  # New parameter for RSI period
        self.stock_data_df = pd.DataFrame()
        self.summary_df = pd.DataFrame()
        self.merged_df = pd.DataFrame()

    def fetch_stock_data(self):
        all_stock_data = []
        
        for stock_code in self.stock_codes:
            try:
                yahoo_ticker = f"{stock_code}.NS"
                stock = yf.Ticker(yahoo_ticker)
                data = stock.history(period=self.period)
                
                if not data.empty:
                    stock_data = {
                        'Date': data.index.tolist(),
                        'Ticker': [stock_code] * len(data),
                        'Open': data['Open'].tolist(),
                        'High': data['High'].tolist(),
                        'Low': data['Low'].tolist(),
                        'Close': data['Close'].tolist(),
                        'Volume': data['Volume'].tolist(),
                        'Sector': [stock.info.get('industry', 'N/A')] * len(data)
                    }
                    all_stock_data.append(pd.DataFrame(stock_data))
                else:
                    print(f"No data available for {stock_code}")
            except Exception as e:
                print(f"Error fetching data for {stock_code}: {e}")

        if all_stock_data:
            self.stock_data_df = pd.concat(all_stock_data, ignore_index=True)
        else:
            print("No stock data fetched.")

    def calculate_rsi(self, data, column='Close'):
        # Calculate daily price changes
        delta = data[column].diff(1)

        # Calculate gains (positive changes) and losses (negative changes)
        gains = delta.where(delta > 0, 0)
        losses = -delta.where(delta < 0, 0)

        # Calculate average gains and losses over the specified period
        avg_gains = gains.rolling(window=self.rsi_period, min_periods=1).mean()
        avg_losses = losses.rolling(window=self.rsi_period, min_periods=1).mean()

        # Calculate relative strength (RS)
        rs = avg_gains / avg_losses

        # Calculate RSI
        rsi = 100 - (100 / (1 + rs))

        return rsi

    def calculate_summary_statistics(self):
        if self.stock_data_df.empty:
            print("Stock data is empty. Please fetch data first.")
            return

        ticker_grouped = self.stock_data_df.groupby('Ticker')
        agg_functions = {
            'Close': ['min', 'max', 'median', 'mean', 'var', 'std']
        }

        self.summary_df = ticker_grouped.agg(agg_functions).reset_index()
        self.summary_df.columns = ['{}_{}'.format(col[0], col[1]) for col in self.summary_df.columns]
        self.summary_df = self.summary_df.rename(columns={'Ticker_': 'Ticker'})

    def merge_summary(self):
        if self.stock_data_df.empty or self.summary_df.empty:
            print("Stock data or summary is empty. Please fetch data and calculate summary first.")
            return

        merged_df = pd.merge(self.stock_data_df, self.summary_df, on='Ticker', how='left')
        merged_df['Close_z_score'] = (merged_df['Close'] - merged_df['Close_mean']) / merged_df['Close_std']
        merged_df['Date'] = merged_df['Date'].dt.strftime('%Y-%m-%d')
        merged_df['Date'] = pd.to_datetime(merged_df['Date'])

        conditions = [
            (merged_df['Close_z_score'] <= 1) & (merged_df['Close_z_score'] >= -1),
            (merged_df['Close_z_score'] <= 2) & (merged_df['Close_z_score'] >= -2),
            (merged_df['Close_z_score'] <= 3) & (merged_df['Close_z_score'] >= -3),
            (merged_df['Close_z_score'] > 3) | (merged_df['Close_z_score'] < -3),
        ]

        values = ['Within_1', 'Within_2', 'Within_3', 'More_3']
        merged_df['ZScore_Category'] = np.select(conditions, values, default='Other')

        # Calculate RSI for the merged DataFrame
        merged_df['RSI'] = self.calculate_rsi(merged_df)

        self.merged_df = merged_df.sort_values(by=['Ticker', 'Date'], ascending=[True, False]).reset_index(drop=True)

    def get_latest_data(self):
        if self.merged_df.empty:
            print("Merged DataFrame does not exist. Please run the merge_summary method first.")
            return

        max_date_indices = self.merged_df.groupby('Ticker')['Date'].idxmax()
        subset_df = self.merged_df.loc[max_date_indices].reset_index(drop=True)
        return subset_df

    def analyze(self):
        self.fetch_stock_data()
        self.calculate_summary_statistics()
        self.merge_summary()
        latest_data = self.get_latest_data()
        return latest_data

In [2]:
# Example usage
if __name__ == "__main__":
    # User-defined parameters
    stock_codes_list = ['EDELWEISS', 'JIOFIN', 'SYNCOMF', 'GREENPOWER', 'SALASAR', 'COMFINTE', 'MISHTANN']
    period = '1y'  # Change the duration as needed
    rsi_period = 14  # You can change this value as needed

    # Create an instance of StockDataAnalyzer
    analyzer = StockDataAnalyzer(stock_codes_list, period=period, rsi_period=rsi_period)

    # Perform analysis
    latest_stock_data = analyzer.analyze()

    # Display the latest stock data DataFrame
    latest_stock_data.shape

$COMFINTE.NS: possibly delisted; no price data found  (period=1y) (Yahoo error = "No data found, symbol may be delisted")


No data available for COMFINTE


$MISHTANN.NS: possibly delisted; no price data found  (period=1y) (Yahoo error = "No data found, symbol may be delisted")


No data available for MISHTANN


In [3]:
latest_stock_data

Unnamed: 0,Date,Ticker,Open,High,Low,Close,Volume,Sector,Close_min,Close_max,Close_median,Close_mean,Close_var,Close_std,Close_z_score,ZScore_Category,RSI
0,2024-10-04,EDELWEISS,132.100006,132.880005,120.25,121.349998,10028123,Capital Markets,59.160854,142.050003,72.431274,77.214231,344.962212,18.573158,2.37632,Within_3,48.050597
1,2024-10-04,GREENPOWER,20.35,21.0,19.809999,20.18,12257369,Utilities - Independent Power Producers,12.65,32.849998,21.035,20.927485,10.171549,3.189287,-0.234374,Within_1,49.931599
2,2024-10-04,JIOFIN,343.5,345.299988,336.5,338.799988,18945510,Asset Management,206.600006,387.950012,329.475006,305.10164,3187.821235,56.460794,0.596845,Within_1,37.892773
3,2024-10-04,SALASAR,19.040001,19.549999,18.6,19.23,11652353,Steel,9.02,33.950001,19.11,18.000366,28.952712,5.380772,0.228524,Within_1,40.252291
4,2024-10-04,SYNCOMF,21.09,21.49,20.34,20.860001,5852375,Drug Manufacturers - Specialty & Generic,8.0,26.77,13.45,14.24874,14.377223,3.791731,1.7436,Within_2,20.595519


In [5]:
latest_stock_data.columns

Index(['Date', 'Ticker', 'Open', 'High', 'Low', 'Close', 'Volume', 'Sector',
       'Close_min', 'Close_max', 'Close_median', 'Close_mean', 'Close_var',
       'Close_std', 'Close_z_score', 'ZScore_Category', 'RSI'],
      dtype='object')

In [None]:
latest_stock_data['Date', 'Ticker', 'Open', 'Close', 'Volume','Close_min', 'Close_max', 'Close_median', 'Close_mean', 'Close_var','Close_std', 'Close_z_score', 'ZScore_Category', 'RSI']