In [67]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import time
import pprint
import json
from contextlib import contextmanager

In [68]:

# Custom decorator for timing
def timing_decorator(func):
    def wrapper(*args, **kwargs):
        start_time = time.time()
        result = func(*args, **kwargs)
        end_time = time.time()
        print(f"Execution time for {func.__name__}: {end_time - start_time:.4f} seconds")
        return result
    return wrapper


# Load data function
@timing_decorator
def load_data(filepath):
    with file_handler(filepath, 'r') as file:
        df = pd.read_csv(file)
    return df

In [69]:

# Preprocessing data
@timing_decorator
def preprocess_data(df):
    
    df.fillna(method='ffill', inplace=True)
    df.drop_duplicates(inplace=True)  # Remove duplicates
    return df


In [70]:
# Feature engineering
@timing_decorator
def compute_features(df):
    if {'Open', 'Close'}.issubset(df.columns):
        df['Daily Return'] = (df['Close'] - df['Open']) / df['Open'] * 100
    if 'Close' in df.columns:
        df['Moving Avg (7-day)'] = df['Close'].rolling(window=7).mean()
    return df


In [71]:


# Visualization functions
@timing_decorator
def plot_closing_price(df):
    plt.figure(figsize=(12, 6))
    sns.lineplot(x=df['Date'], y=df['Close'])
    plt.title('Closing Price Trend')
    plt.xlabel('Date')
    plt.ylabel('Close Price')
    plt.xticks(rotation=45)
    plt.show()

@timing_decorator
def plot_correlation_heatmap(df):
    plt.figure(figsize=(10, 6))
    sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
    plt.title('Feature Correlation Heatmap')
    plt.show()

@timing_decorator
def plot_daily_return_distribution(df):
    plt.figure(figsize=(8, 5))
    sns.histplot(df['Daily Return'], bins=30, kde=True)
    plt.title('Distribution of Daily Returns')
    plt.xlabel('Daily Return (%)')
    plt.ylabel('Frequency')
    plt.show()

@timing_decorator
def plot_most_traded_days(df):
    if 'Shares Traded' in df.columns:
        most_traded_days = df.nlargest(10, 'Shares Traded')
        plt.figure(figsize=(12, 5))
        plt.bar(most_traded_days['Date'], most_traded_days['Shares Traded'], color='green')
        plt.xticks(rotation=45)
        plt.xlabel("Date")
        plt.ylabel("Shares Traded")
        plt.title("Top 10 Most Traded Days")
        plt.show()
        return most_traded_days[['Date', 'Shares Traded']].to_dict(orient='records')

@timing_decorator
def plot_least_performing_days(df):
    if 'Daily Return' in df.columns:
        least_performing_days = df.nsmallest(10, 'Daily Return')
        plt.figure(figsize=(12, 5))
        plt.bar(least_performing_days['Date'], least_performing_days['Daily Return'], color='red')
        plt.xticks(rotation=45)
        plt.xlabel("Date")
        plt.ylabel("Daily Return %")
        plt.title("Top 10 Least Performing Days")
        plt.show()
        return least_performing_days[['Date', 'Daily Return']].to_dict(orient='records')


In [72]:
def volatile_stocks(df):
    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'])
        
    df.set_index('Date', inplace=True)
    monthly_volatility = df['Daily Return'].resample('M').std()
    print(monthly_volatility)
    threshold = 0.58  # Adjust the threshold as needed
    volatile_months = monthly_volatility > threshold
    print("\n--- Monthly Volatility Check ---")
    for month, is_volatile in volatile_months.items():
        print(f"Month: {month.strftime('%Y-%m')}, Volatile: {'Yes' if is_volatile else 'No'}")
    analysis_results = {"Monthly Volatility": volatile_months.to_dict(),"Monthly Volatility Values": monthly_volatility.tolist()}
    print("\n--- Analysis Results ---")
    for key, value in analysis_results.items():
        print(f"{key}: {value}")

In [None]:
# Main execution
if __name__ == "__main__":
    filepath = "nifty_fifty.csv"  # Update with actual file path

    # Load and process data
    df = load_data(filepath)
    df = preprocess_data(df)
    df = compute_features(df)

    # Store analysis results
    analysis_results = {
        "Closing Price Trend": list(zip(df['Date'].tolist(), df['Close'].tolist())),
        "Feature Correlation Heatmap": df.corr().to_dict(),
        "Daily Return Distribution": df['Daily Return'].tolist(),
        "Most Traded Days": plot_most_traded_days(df),
        "Least Performing Days": plot_least_performing_days(df)
        "Months the Stock was Volatile":volatile_stocks(df)
    }

    plot_closing_price(df)
    plot_correlation_heatmap(df)
    plot_daily_return_distribution(df)
print("\n--- Analysis Results (Showing only first 10 rows) ---")

limited_results = {
    key: value[:10] if isinstance(value, list) else value
    for key, value in analysis_results.items()
}

pprint.pprint(limited_results, width=150)

# Alternative JSON formatted output
def convert_timestamp(obj):
    """Convert Pandas Timestamp to string format."""
    if isinstance(obj, pd.Timestamp):
        return obj.strftime('%Y-%m-%d')  # Convert to string format
    raise TypeError(f"Type {type(obj)} not serializable")

print(json.dumps(limited_results, indent=4, default=convert_timestamp))



Execution time for load_data: 0.0018 seconds
Execution time for preprocess_data: 0.0011 seconds
Execution time for compute_features: 0.0005 seconds


  df.fillna(method='ffill', inplace=True)


ValueError: could not convert string to float: '30-Jul-24'