In [3]:
import pandas as pd
from datetime import datetime
import os

# Filename Validation 
def _validate_filename(filename):
    filename = filename.strip()

    if " " in filename:
        print("Error: Filename contains spaces. Please remove them and try again.")
        return False
    if not filename.lower().endswith(".csv"):
        print("Error: File must have a '.csv' extension.")
        return False
    if not os.path.exists(filename):
        print("Error: File not found. Please check the filename and try again.")
        return False

    return True


def _validate_stock_data(d):
    required_cols = {'stock', 'sector', 'Price start', 'end_price'}

    if d.empty:
        print("The file exists but has no data (empty CSV).")
        return False

    missing = required_cols - set(d.columns)
    if missing:
        print(f"Missing required columns: {', '.join(missing)}")
        return False

    return True

def is_positive_number(x):
    if x == '' or pd.isna(x):
        return False
    try:
        value = float(x)
        return value >= 0
    except:
        return False

def calculate_return_percentage(d):
    if 'Price start' not in d.columns or 'end_price' not in d.columns:
        print("Error: Columns 'Price start' or 'end_price' not found in data.")
        return d

    d = d[d['Price start'] != 0]

    d['rp'] = round(((d['end_price'] - d['Price start']) / d['Price start']) * 100, 2)

    return d

def get_top_stocks_by_rp(d):
    if 'rp' not in d.columns:
        print("Error: 'rp' column not found. Please calculate it first.")
        

    d = d.sort_values(by='rp', ascending=False)
    print("\n-------- Top 5 Stocks by Return Percentage --------\n")
    display(d.head(5))
    
def show_sector_summary(d):
    if 'sector' not in d.columns or 'rp' not in d.columns:
        print("Error: DataFrame must contain 'sector' and 'rp' columns.")
        return

    if d.empty:
        print("No data available for summary.")
        return

    summary = (
        d.groupby('sector')
        .agg(Avg_Return=('rp', 'mean'), Count=('stock', 'count'))
        .reset_index()
        .round({'Avg_Return': 2})
    )

    print("\n-------- Per-Sector Summary ----------")
    best_sector = summary.loc[summary['Avg_Return'].idxmax(), 'sector']
    best_return = summary['Avg_Return'].max()

    display(summary)

    print(f"\nBest Sector: {best_sector}  (Avg Return: {best_return:.2f}%)\n")
    
def save_stock_data(d, filename="stock_returns.csv"):
    if d.empty:
        print(" No data available to save.")
        return

    try:
        base, ext = os.path.splitext(filename)
        if not ext:
            ext = ".csv"  # Add .csv if missing

        # Generate timestamp in ddMMyyyyHHmmSS format
        timestamp = datetime.now().strftime("%d%m%Y%H%M%S")

        # Create new filename
        filename = f"{base}_{timestamp}{ext}"

        d.to_csv(filename, index=False)
        print(f"File '{filename}' saved successfully.")
    except Exception as e:
        print(f" Error saving file: {e}")



filename = input("Enter CSV filename: ")

# Validating the Filename
if _validate_filename(filename): 
    try:
        # Read file
        d = pd.read_csv(filename)

        if _validate_stock_data(d):
            # Keep only positive numeric prices
            d = d[d['Price start'].apply(is_positive_number) & d['end_price'].apply(is_positive_number)]
            d['Price start'] = d['Price start'].astype(float)
            d['end_price'] = d['end_price'].astype(float)
            d = d.reset_index(drop=True)

            d = calculate_return_percentage(d)
            print("---------- All Stock Details ----------")
            display(d)
            get_top_stocks_by_rp(d)
            show_sector_summary(d)
            save_stock_data(d)
        else:
            print("Data validation failed. Please fix the CSV file.")

    except Exception as e:
        print(f"Error reading file: {e}")
else:
    print("Please fix the filename and run again.")

---------- All Stock Details ----------


Unnamed: 0,stock,sector,Price start,end_price,rp
0,AAPL,Technology,100.0,150.0,50.0
1,MSFT,Technology,120.0,250.0,108.33
2,JPM,Financials,100.0,25.5,-74.5
3,WMT,Consumer Staples,35.0,150.0,328.57



-------- Top 5 Stocks by Return Percentage --------



Unnamed: 0,stock,sector,Price start,end_price,rp
3,WMT,Consumer Staples,35.0,150.0,328.57
1,MSFT,Technology,120.0,250.0,108.33
0,AAPL,Technology,100.0,150.0,50.0
2,JPM,Financials,100.0,25.5,-74.5



-------- Per-Sector Summary ----------


Unnamed: 0,sector,Avg_Return,Count
0,Consumer Staples,328.57,1
1,Financials,-74.5,1
2,Technology,79.16,2



Best Sector: Consumer Staples  (Avg Return: 328.57%)

File 'stock_returns_07112025170046.csv' saved successfully.
