In [2]:
!pip install yfinance pandas pyyaml nsepy yahoofinancials matplotlib seaborn scipy



In [3]:
import yfinance as yf
import yaml
from datetime import datetime, timedelta
import calendar
from concurrent.futures import ThreadPoolExecutor, as_completed
import numpy as np
import os

# Create directories if they don't exist
os.makedirs('nifty50_yaml_files', exist_ok=True)

# List of Nifty 50 symbols (strings with .NS suffix)
NIFTY_50_SYMBOLS = [
        "ADANIENT.NS","ADANIPORTS.NS","APOLLOHOSP.NS","ASIANPAINT.NS","AXISBANK.NS",
        "BAJAJ-AUTO.NS","BAJFINANCE.NS","BAJAJFINSV.NS","BEL.NS","BHARTIARTL.NS",
        "BRITANNIA.NS","CIPLA.NS","COALINDIA.NS","DIVISLAB.NS","DRREDDY.NS",
        "EICHERMOT.NS","GRASIM.NS","HCLTECH.NS","HDFCBANK.NS","HDFCLIFE.NS",
        "HEROMOTOCO.NS","HINDALCO.NS","ICICIBANK.NS","INFY.NS","INDUSINDBK.NS",
        "ITC.NS","JSWSTEEL.NS","KOTAKBANK.NS","LT.NS","LTIM.NS","M&M.NS",
        "MARUTI.NS","NESTLEIND.NS","NTPC.NS","ONGC.NS","POWERGRID.NS",
        "RELIANCE.NS","SBIN.NS","SBILIFE.NS","SUNPHARMA.NS","TATACONSUM.NS",
        "TATAMOTORS.NS","TATASTEEL.NS","TCS.NS","TECHM.NS","TITAN.NS",
        "ULTRACEMCO.NS","UPL.NS","WIPRO.NS","ZEEL.NS"
]

# Custom YAML representer for numpy types
def numpy_representer(dumper, data):
    if isinstance(data, np.integer):
        return dumper.represent_int(int(data))
    elif isinstance(data, np.floating):
        return dumper.represent_float(float(data))
    elif isinstance(data, np.ndarray):
        return dumper.represent_list(data.tolist())
    return None

yaml.add_representer(np.integer, numpy_representer)
yaml.add_representer(np.floating, numpy_representer)
yaml.add_representer(np.ndarray, numpy_representer)

# Dictionary to cache the sector for each symbol
sector_cache = {}

def get_sector(symbol):
    """Fetch the sector for a given stock symbol and cache it."""
    if symbol in sector_cache:
        return sector_cache[symbol]
    
    try:
        stock = yf.Ticker(symbol)
        sector = stock.info.get('sector', 'N/A')
        sector_cache[symbol] = sector
        return sector
    except Exception as e:
        print(f"Error fetching sector for {symbol}: {str(e)}")
        return 'N/A'

def get_stock_data(symbol, start_date, end_date):
    """Fetch stock data for given symbol and date range."""
    try:
        stock = yf.Ticker(symbol)
        hist = stock.history(start=start_date, end=end_date)
        sector = get_sector(symbol)
        return symbol, hist, sector
    except Exception as e:
        print(f"Error fetching {symbol}: {str(e)}")
        return symbol, None, 'N/A'

def process_month(year, month):
    """Process data for a single month."""
    start_date = datetime(year, month, 1)
    end_date = datetime(year, month, calendar.monthrange(year, month)[1])
    
    print(f"Fetching data for {start_date.strftime('%B %Y')}...")
    
    results = {}
    with ThreadPoolExecutor(max_workers=5) as executor:
        future_to_symbol = {
            executor.submit(
                get_stock_data, 
                symbol, 
                start_date, 
                end_date + timedelta(days=1)
            ): symbol for symbol in NIFTY_50_SYMBOLS
        }
        
        for future in as_completed(future_to_symbol):
            symbol, data, sector = future.result()
            if data is not None and not data.empty:
                # Convert DataFrame to list of dictionaries
                records = []
                for date, row in data.iterrows():
                    record = {
                        'Date': date.strftime('%Y-%m-%d'),
                        'Open': float(round(row['Open'], 2)),
                        'High': float(round(row['High'], 2)),
                        'Low': float(round(row['Low'], 2)),
                        'Close': float(round(row['Close'], 2)),
                        'Volume': int(row['Volume']),
                        'Sector': sector  # Add the sector here
                    }
                    records.append(record)
                results[symbol] = records
    
    # Save to YAML file with the requested naming format
    month_str = str(month).zfill(2)
    filename = f"nifty50_data_{month_str}_{year}.yaml"
    filepath = os.path.join('nifty50_yaml_files', filename)
    
    with open(filepath, 'w') as f:
        yaml.dump(results, f, sort_keys=False, default_flow_style=False)
    
    print(f"Saved {filepath}")

def main():
    year = 2024
    for month in range(1, 13):
        process_month(year, month)

if __name__ == "__main__":
    main()

Fetching data for January 2024...
Saved nifty50_yaml_files\nifty50_data_01_2024.yaml
Fetching data for February 2024...
Saved nifty50_yaml_files\nifty50_data_02_2024.yaml
Fetching data for March 2024...
Saved nifty50_yaml_files\nifty50_data_03_2024.yaml
Fetching data for April 2024...
Saved nifty50_yaml_files\nifty50_data_04_2024.yaml
Fetching data for May 2024...
Saved nifty50_yaml_files\nifty50_data_05_2024.yaml
Fetching data for June 2024...
Saved nifty50_yaml_files\nifty50_data_06_2024.yaml
Fetching data for July 2024...
Saved nifty50_yaml_files\nifty50_data_07_2024.yaml
Fetching data for August 2024...
Saved nifty50_yaml_files\nifty50_data_08_2024.yaml
Fetching data for September 2024...
Saved nifty50_yaml_files\nifty50_data_09_2024.yaml
Fetching data for October 2024...
Saved nifty50_yaml_files\nifty50_data_10_2024.yaml
Fetching data for November 2024...
Saved nifty50_yaml_files\nifty50_data_11_2024.yaml
Fetching data for December 2024...
Saved nifty50_yaml_files\nifty50_data_12_

In [4]:
import yaml
import pandas as pd
import os
import numpy as np
from datetime import datetime

# Create directories if they don't exist
os.makedirs('nifty50_csv_files', exist_ok=True)

# Custom YAML loader to handle numpy types
def numpy_constructor(loader, node):
    # Convert numpy types to native Python types
    value = loader.construct_scalar(node)
    try:
        # Try to convert to int first, then float
        return int(value)
    except ValueError:
        try:
            return float(value)
        except ValueError:
            return value

# Add the constructor to handle the specific tag
yaml.add_constructor('tag:yaml.org,2002:python/object/apply:numpy._core.multiarray.scalar', 
                     lambda loader, node: numpy_constructor(loader, node))

def yaml_to_csv(nifty50_yaml_file, nifty50_csv_file):
    """Convert YAML stock data to CSV format, including the 'Sector' column."""
    try:
        # Read YAML file with custom loader
        with open(nifty50_yaml_file, 'r') as file:
            data = yaml.load(file, Loader=yaml.FullLoader)
        
        # Create a list to hold all rows
        all_rows = []
        
        # Process each stock
        for stock, days_data in data.items():
            for day_data in days_data:
                # Create a row for each day, including the Sector
                row = {
                    'Stock': stock,
                    'Date': day_data['Date'],
                    'Open': day_data['Open'],
                    'High': day_data['High'],
                    'Low': day_data['Low'],
                    'Close': day_data['Close'],
                    'Volume': day_data['Volume'],
                    'Sector': day_data.get('Sector', 'N/A')  # Add the Sector column here 
                }
                all_rows.append(row)
        
        # Convert to DataFrame and save as CSV
        df = pd.DataFrame(all_rows)
        df.to_csv(nifty50_csv_file, index=False)
        print(f"Converted {nifty50_yaml_file} to {nifty50_csv_file}")
        
    except Exception as e:
        print(f"Error converting {nifty50_yaml_file}: {str(e)}")

def convert_all_yaml_files(year=2024):
    """Convert all YAML files for a given year to CSV."""
    yaml_dir = 'nifty50_yaml_files'
    csv_dir = 'nifty50_csv_files'
    
    for month in range(1, 13):
        month_str = str(month).zfill(2)  # Format month as two digits
        yaml_filename = f"nifty50_data_{month_str}_{year}.yaml"
        csv_filename = f"nifty50_data_{month_str}_{year}.csv"
        
        nifty50_yaml_file = os.path.join(yaml_dir, yaml_filename)
        nifty50_csv_file = os.path.join(csv_dir, csv_filename)
        
        if os.path.exists(nifty50_yaml_file):
            yaml_to_csv(nifty50_yaml_file, nifty50_csv_file)
        else:
            print(f"File {nifty50_yaml_file} not found")

def create_combined_csv(year=2024):
    """Create a combined CSV with all months' data."""
    csv_dir = 'nifty50_csv_files'
    all_data = []
    
    for month in range(1, 13):
        month_str = str(month).zfill(2)  # Format month as two digits
        nifty50_csv_file = os.path.join(csv_dir, f"nifty50_data_{month_str}_{year}.csv")
        
        if os.path.exists(nifty50_csv_file):
            df = pd.read_csv(nifty50_csv_file)
            all_data.append(df)
            print(f"Added data from {nifty50_csv_file}")
    
    if all_data:
        combined_df = pd.concat(all_data, ignore_index=True)
        nifty50_combined_csv = os.path.join(csv_dir, f"nifty50_full_year_{year}.csv")
        combined_df.to_csv(nifty50_combined_csv, index=False)
        print(f"Created combined CSV: {nifty50_combined_csv}")
        
        # Display some statistics
        print(f"Total records: {len(combined_df)}")
        print(f"Date range: {combined_df['Date'].min()} to {combined_df['Date'].max()}")
        print(f"Number of unique stocks: {combined_df['Stock'].nunique()}")
    else:
        print("No CSV files found to combine.")

if __name__ == "__main__":
    # Convert all YAML files to CSV
    convert_all_yaml_files(2024)
    
    # Create a combined CSV with all data
    create_combined_csv(2024)
    
    print("Conversion completed!")

Converted nifty50_yaml_files\nifty50_data_01_2024.yaml to nifty50_csv_files\nifty50_data_01_2024.csv
Converted nifty50_yaml_files\nifty50_data_02_2024.yaml to nifty50_csv_files\nifty50_data_02_2024.csv
Converted nifty50_yaml_files\nifty50_data_03_2024.yaml to nifty50_csv_files\nifty50_data_03_2024.csv
Converted nifty50_yaml_files\nifty50_data_04_2024.yaml to nifty50_csv_files\nifty50_data_04_2024.csv
Converted nifty50_yaml_files\nifty50_data_05_2024.yaml to nifty50_csv_files\nifty50_data_05_2024.csv
Converted nifty50_yaml_files\nifty50_data_06_2024.yaml to nifty50_csv_files\nifty50_data_06_2024.csv
Converted nifty50_yaml_files\nifty50_data_07_2024.yaml to nifty50_csv_files\nifty50_data_07_2024.csv
Converted nifty50_yaml_files\nifty50_data_08_2024.yaml to nifty50_csv_files\nifty50_data_08_2024.csv
Converted nifty50_yaml_files\nifty50_data_09_2024.yaml to nifty50_csv_files\nifty50_data_09_2024.csv
Converted nifty50_yaml_files\nifty50_data_10_2024.yaml to nifty50_csv_files\nifty50_data_10

In [None]:
#  Import necessary libraries
import pandas as pd
import numpy as np
import os
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#  Load the combined CSV file
# Assuming you've run the previous scripts and have the combined CSV file
combined_csv_path = r"D:\python_programs\NIFTY_50 STOCKS_ANALYSIS\nifty50_csv_files\nifty50_full_year_2024.csv"

# Load the data
df = pd.read_csv(combined_csv_path)

# Display basic information about the dataset
print("Dataset Shape:", df.shape)
print("\nFirst 5 rows:")
df.head()

Dataset Shape: (12300, 8)

First 5 rows:


Unnamed: 0,Stock,Date,Open,High,Low,Close,Volume,Sector
0,ADANIPORTS.NS,2024-01-01,1017.22,1048.33,1013.7,1038.32,3989711,Industrials
1,ADANIPORTS.NS,2024-01-02,1038.32,1073.05,1021.72,1068.59,6344621,Industrials
2,ADANIPORTS.NS,2024-01-03,1094.95,1133.6,1053.09,1084.3,33060778,Industrials
3,ADANIPORTS.NS,2024-01-04,1106.84,1120.72,1093.52,1112.99,9771995,Industrials
4,ADANIPORTS.NS,2024-01-05,1118.83,1149.45,1115.17,1143.75,10622789,Industrials


In [None]:
# Check data types and convert if necessary
print("Data types before conversion:")
print(df.dtypes)

# Convert Date column to datetime
df['Date'] = pd.to_datetime(df['Date'])

# Ensure numeric columns are properly formatted
numeric_columns = ['Open', 'High', 'Low', 'Close', 'Volume']
for col in numeric_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')

print("\nData types after conversion:")
print(df.dtypes)

Data types before conversion:
Stock      object
Date       object
Open      float64
High      float64
Low       float64
Close     float64
Volume      int64
Sector     object
dtype: object

Data types after conversion:
Stock             object
Date      datetime64[ns]
Open             float64
High             float64
Low              float64
Close            float64
Volume             int64
Sector            object
dtype: object


In [None]:
#  Check for missing values
print("Missing values in each column:")
missing_values = df.isnull().sum()
print(missing_values)

# Calculate percentage of missing values
missing_percentage = (df.isnull().sum() / len(df)) * 100
print("\nMissing values percentage:")
print(missing_percentage)

Missing values in each column:
Stock     0
Date      0
Open      0
High      0
Low       0
Close     0
Volume    0
Sector    0
dtype: int64

Missing values percentage:
Stock     0.0
Date      0.0
Open      0.0
High      0.0
Low       0.0
Close     0.0
Volume    0.0
Sector    0.0
dtype: float64


In [None]:
#  Handle missing values
# For OHLC data, we can forward fill then backward fill
df_cleaned = df.copy()

# Group by stock and fill missing values within each stock
df_cleaned = df_cleaned.groupby('Stock').apply(
    lambda group: group.ffill().bfill()
).reset_index(drop=True)

# Check if any missing values remain
print("Missing values after cleaning:")
print(df_cleaned.isnull().sum())

Missing values after cleaning:
Stock     0
Date      0
Open      0
High      0
Low       0
Close     0
Volume    0
Sector    0
dtype: int64


  df_cleaned = df_cleaned.groupby('Stock').apply(


In [None]:
#  Check for duplicates
print("Number of duplicate rows:", df_cleaned.duplicated().sum())

# Remove duplicates if any
df_cleaned = df_cleaned.drop_duplicates()

print("Number of duplicate rows after cleaning:", df_cleaned.duplicated().sum())

Number of duplicate rows: 0
Number of duplicate rows after cleaning: 0


In [None]:
#  Validate data consistency
# Check for negative prices or volumes
print("Rows with negative Open prices:", (df_cleaned['Open'] <= 0).sum())
print("Rows with negative High prices:", (df_cleaned['High'] <= 0).sum())
print("Rows with negative Low prices:", (df_cleaned['Low'] <= 0).sum())
print("Rows with negative Close prices:", (df_cleaned['Close'] <= 0).sum())
print("Rows with negative Volume:", (df_cleaned['Volume'] < 0).sum())

# Check if High is greater than or equal to Low
inconsistent_high_low = (df_cleaned['High'] < df_cleaned['Low']).sum()
print("Rows where High < Low:", inconsistent_high_low)

# Check if Open and Close are between High and Low
inconsistent_ohlc = ((df_cleaned['Open'] > df_cleaned['High']) | 
                     (df_cleaned['Open'] < df_cleaned['Low']) |
                     (df_cleaned['Close'] > df_cleaned['High']) | 
                     (df_cleaned['Close'] < df_cleaned['Low'])).sum()
print("Rows with inconsistent OHLC values:", inconsistent_ohlc)

Rows with negative Open prices: 0
Rows with negative High prices: 0
Rows with negative Low prices: 0
Rows with negative Close prices: 0
Rows with negative Volume: 0
Rows where High < Low: 0
Rows with inconsistent OHLC values: 0


In [None]:
#  Handle inconsistent data
# Fix rows where High < Low by swapping them
mask = df_cleaned['High'] < df_cleaned['Low']
df_cleaned.loc[mask, ['High', 'Low']] = df_cleaned.loc[mask, ['Low', 'High']].values

# Ensure Open and Close are within High and Low range
df_cleaned['Open'] = np.where(df_cleaned['Open'] > df_cleaned['High'], df_cleaned['High'], df_cleaned['Open'])
df_cleaned['Open'] = np.where(df_cleaned['Open'] < df_cleaned['Low'], df_cleaned['Low'], df_cleaned['Open'])
df_cleaned['Close'] = np.where(df_cleaned['Close'] > df_cleaned['High'], df_cleaned['High'], df_cleaned['Close'])
df_cleaned['Close'] = np.where(df_cleaned['Close'] < df_cleaned['Low'], df_cleaned['Low'], df_cleaned['Close'])

# Verify the fixes
inconsistent_high_low = (df_cleaned['High'] < df_cleaned['Low']).sum()
inconsistent_ohlc = ((df_cleaned['Open'] > df_cleaned['High']) | 
                     (df_cleaned['Open'] < df_cleaned['Low']) |
                     (df_cleaned['Close'] > df_cleaned['High']) | 
                     (df_cleaned['Close'] < df_cleaned['Low'])).sum()
print("Rows where High < Low after cleaning:", inconsistent_high_low)
print("Rows with inconsistent OHLC values after cleaning:", inconsistent_ohlc)

Rows where High < Low after cleaning: 0
Rows with inconsistent OHLC values after cleaning: 0


In [None]:
#  Save the cleaned data
# Create a new directory for cleaned data
os.makedirs('cleaned_data', exist_ok=True)

# Save the cleaned dataframe
cleaned_csv_path = "cleaned_data/nifty50_cleaned_2024.csv"
df_cleaned.to_csv(cleaned_csv_path, index=False)

print(f"Cleaned data saved to: {cleaned_csv_path}")
print(f"Final dataset shape: {df_cleaned.shape}")

Cleaned data saved to: cleaned_data/nifty50_cleaned_2024.csv
Final dataset shape: (12300, 8)


In [None]:
#  Load the cleaned data
cleaned_data_path = "cleaned_data/nifty50_cleaned_2024.csv"
df = pd.read_csv(cleaned_data_path)

# Display basic information
print("Dataset shape:", df.shape)
print("Columns:", df.columns.tolist())
print("\nFirst few rows:")
df.head()

Dataset shape: (12300, 8)
Columns: ['Stock', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Sector']

First few rows:


Unnamed: 0,Stock,Date,Open,High,Low,Close,Volume,Sector
0,ADANIENT.NS,2024-01-01,2849.69,2944.31,2839.45,2914.53,2898619,Energy
1,ADANIENT.NS,2024-01-02,2919.33,2945.51,2838.4,2929.72,2671368,Energy
2,ADANIENT.NS,2024-01-03,3044.22,3196.08,2952.9,3000.26,19725411,Energy
3,ADANIENT.NS,2024-01-04,3036.62,3041.67,2987.27,2995.56,2975620,Energy
4,ADANIENT.NS,2024-01-05,3007.2,3064.1,2978.38,3003.85,3219949,Energy


In [None]:
#  Get unique stock names
unique_stocks = df['Stock'].unique()
print(f"Number of unique stocks: {len(unique_stocks)}")
print("Stock names:")
for i, stock in enumerate(unique_stocks):
    print(f"{i+1}. {stock}")

Number of unique stocks: 50
Stock names:
1. ADANIENT.NS
2. ADANIPORTS.NS
3. APOLLOHOSP.NS
4. ASIANPAINT.NS
5. AXISBANK.NS
6. BAJAJ-AUTO.NS
7. BAJAJFINSV.NS
8. BAJFINANCE.NS
9. BEL.NS
10. BHARTIARTL.NS
11. BRITANNIA.NS
12. CIPLA.NS
13. COALINDIA.NS
14. DIVISLAB.NS
15. DRREDDY.NS
16. EICHERMOT.NS
17. GRASIM.NS
18. HCLTECH.NS
19. HDFCBANK.NS
20. HDFCLIFE.NS
21. HEROMOTOCO.NS
22. HINDALCO.NS
23. ICICIBANK.NS
24. INDUSINDBK.NS
25. INFY.NS
26. ITC.NS
27. JSWSTEEL.NS
28. KOTAKBANK.NS
29. LT.NS
30. LTIM.NS
31. M&M.NS
32. MARUTI.NS
33. NESTLEIND.NS
34. NTPC.NS
35. ONGC.NS
36. POWERGRID.NS
37. RELIANCE.NS
38. SBILIFE.NS
39. SBIN.NS
40. SUNPHARMA.NS
41. TATACONSUM.NS
42. TATAMOTORS.NS
43. TATASTEEL.NS
44. TCS.NS
45. TECHM.NS
46. TITAN.NS
47. ULTRACEMCO.NS
48. UPL.NS
49. WIPRO.NS
50. ZEEL.NS


In [20]:
import os

# Define OHLCV columns (adjust if needed)
ohlcv_columns = ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']

# Folder where CSVs will be stored
stocks_dir = "stocks_data_2024"
os.makedirs(stocks_dir, exist_ok=True)

# Step 6: Create separate CSV files for each stock
for stock in unique_stocks:
    # Filter data for the current stock
    stock_data = df[df['Stock'] == stock]
    
    # Select only OHLCV columns
    stock_data_ohlcv = stock_data[ohlcv_columns]
    
    # Create filename (replace special characters if needed)
    safe_stock_name = stock.replace('.NS', '').replace('-', '_').replace(' ', '_')
    filename = f"{safe_stock_name}_2024_OHLCV.csv"
    filepath = os.path.join(stocks_dir, filename)
    
    # Save to CSV
    stock_data_ohlcv.to_csv(filepath, index=False)
    
    print(f"Saved {stock} data to {filename}")

print(f"\nCreated {len(unique_stocks)} CSV files in the '{stocks_dir}' directory")


Saved ADANIENT.NS data to ADANIENT_2024_OHLCV.csv
Saved ADANIPORTS.NS data to ADANIPORTS_2024_OHLCV.csv
Saved APOLLOHOSP.NS data to APOLLOHOSP_2024_OHLCV.csv
Saved ASIANPAINT.NS data to ASIANPAINT_2024_OHLCV.csv
Saved AXISBANK.NS data to AXISBANK_2024_OHLCV.csv
Saved BAJAJ-AUTO.NS data to BAJAJ_AUTO_2024_OHLCV.csv
Saved BAJAJFINSV.NS data to BAJAJFINSV_2024_OHLCV.csv
Saved BAJFINANCE.NS data to BAJFINANCE_2024_OHLCV.csv
Saved BEL.NS data to BEL_2024_OHLCV.csv
Saved BHARTIARTL.NS data to BHARTIARTL_2024_OHLCV.csv
Saved BRITANNIA.NS data to BRITANNIA_2024_OHLCV.csv
Saved CIPLA.NS data to CIPLA_2024_OHLCV.csv
Saved COALINDIA.NS data to COALINDIA_2024_OHLCV.csv
Saved DIVISLAB.NS data to DIVISLAB_2024_OHLCV.csv
Saved DRREDDY.NS data to DRREDDY_2024_OHLCV.csv
Saved EICHERMOT.NS data to EICHERMOT_2024_OHLCV.csv
Saved GRASIM.NS data to GRASIM_2024_OHLCV.csv
Saved HCLTECH.NS data to HCLTECH_2024_OHLCV.csv
Saved HDFCBANK.NS data to HDFCBANK_2024_OHLCV.csv
Saved HDFCLIFE.NS data to HDFCLIFE_2024_

In [None]:
#  Verify the created files
import glob

# List all CSV files in the stocks directory
csv_files = glob.glob(os.path.join(stocks_dir, "*.csv"))
print(f"Found {len(csv_files)} CSV files:")

for file in sorted(csv_files)[:5]:  # Show first 5 files
    print(f"  - {os.path.basename(file)}")

if len(csv_files) > 5:
    print(f"  - ... and {len(csv_files) - 5} more")

Found 50 CSV files:
  - ADANIENT_2024_OHLCV.csv
  - ADANIPORTS_2024_OHLCV.csv
  - APOLLOHOSP_2024_OHLCV.csv
  - ASIANPAINT_2024_OHLCV.csv
  - AXISBANK_2024_OHLCV.csv
  - ... and 45 more


In [None]:
#  Sample one of the created files to verify content
if csv_files:
    sample_file = csv_files[0]
    sample_data = pd.read_csv(sample_file)
    
    print(f"Sample file: {os.path.basename(sample_file)}")
    print(f"Shape: {sample_data.shape}")
    print("\nFirst few rows:")
    print(sample_data.head())
    
    print("\nDate range:")
    print(f"From: {sample_data['Date'].min()}")
    print(f"To: {sample_data['Date'].max()}")

Sample file: ADANIENT_2024_OHLCV.csv
Shape: (246, 6)

First few rows:
         Date     Open     High      Low    Close    Volume
0  2024-01-01  2849.69  2944.31  2839.45  2914.53   2898619
1  2024-01-02  2919.33  2945.51  2838.40  2929.72   2671368
2  2024-01-03  3044.22  3196.08  2952.90  3000.26  19725411
3  2024-01-04  3036.62  3041.67  2987.27  2995.56   2975620
4  2024-01-05  3007.20  3064.10  2978.38  3003.85   3219949

Date range:
From: 2024-01-01
To: 2024-12-31


In [None]:
#  Create a function to get data for a specific stock
def get_stock_data(stock_name, data_directory=stocks_dir):
    """
    Retrieve OHLCV data for a specific stock
    
    Parameters:
    stock_name (str): Name of the stock (with or without .NS extension)
    data_directory (str): Directory where stock CSV files are stored
    
    Returns:
    pandas.DataFrame: OHLCV data for the requested stock
    """
    # Standardize stock name format
    if not stock_name.endswith('.NS'):
        stock_name += '.NS'
    
    # Create filename
    safe_stock_name = stock_name.replace('.NS', '').replace('-', '_').replace(' ', '_')
    filename = f"{safe_stock_name}_2024_OHLCV.csv"
    filepath = os.path.join(data_directory, filename)
    
    # Check if file exists
    if not os.path.exists(filepath):
        print(f"Data not found for {stock_name}")
        print("Available stocks:")
        for stock in unique_stocks:
            print(f"  - {stock}")
        return None
    
    # Load and return data
    return pd.read_csv(filepath)

# Example usage
if csv_files:
    sample_stock = unique_stocks[0]
    print(f"Getting data for: {sample_stock}")
    stock_data = get_stock_data(sample_stock)
    if stock_data is not None:
        print(f"Retrieved {len(stock_data)} records")
        print(stock_data.head())

Getting data for: ADANIENT.NS
Retrieved 246 records
         Date     Open     High      Low    Close    Volume
0  2024-01-01  2849.69  2944.31  2839.45  2914.53   2898619
1  2024-01-02  2919.33  2945.51  2838.40  2929.72   2671368
2  2024-01-03  3044.22  3196.08  2952.90  3000.26  19725411
3  2024-01-04  3036.62  3041.67  2987.27  2995.56   2975620
4  2024-01-05  3007.20  3064.10  2978.38  3003.85   3219949


In [None]:
#  Create a function to get multiple stocks at once
def get_multiple_stocks(stock_list, data_directory=stocks_dir):
    """
    Retrieve OHLCV data for multiple stocks
    
    Parameters:
    stock_list (list): List of stock names
    data_directory (str): Directory where stock CSV files are stored
    
    Returns:
    dict: Dictionary with stock names as keys and DataFrames as values
    """
    result = {}
    
    for stock in stock_list:
        data = get_stock_data(stock, data_directory)
        if data is not None:
            result[stock] = data
    
    return result

# Example usage
if len(unique_stocks) >= 3:
    sample_stocks = [unique_stocks[0], unique_stocks[1], unique_stocks[2]]
    print(f"Getting data for: {sample_stocks}")
    multiple_data = get_multiple_stocks(sample_stocks)
    
    print(f"Retrieved data for {len(multiple_data)} stocks")
    for stock, data in multiple_data.items():
        print(f"{stock}: {len(data)} records")

Getting data for: ['ADANIENT.NS', 'ADANIPORTS.NS', 'APOLLOHOSP.NS']
Retrieved data for 3 stocks
ADANIENT.NS: 246 records
ADANIPORTS.NS: 246 records
APOLLOHOSP.NS: 246 records
