# Christina -- Pratas -- 12182674

In [23]:
import csv
from collections import defaultdict
from typing import Dict, Any, Tuple
import numpy as np
import sys
import json
from json import dumps as j_dumps
FILE_PATH = 'stock_data_july_to_september.csv'

## 1. File I/O + Parsing

In [24]:
# open and read the .csv file, and return a dictionary of values
def read_ohlcv(path: str) -> list[dict]:
   
    rows: list[dict] = []
    required_columns = ['Ticker', 'Date', 'Open', 'High', 'Low', 'Close', 'Volume']
    numeric_columns = ['Open', 'High', 'Low', 'Close', 'Volume']
    
    print(f"--- Starting Data Reading and Validation from {path} (Manual CSV Parsing) ---")
    
    try:
        with open(path, 'r', newline='') as file:
            reader = csv.DictReader(file)
            
            if not all(col in reader.fieldnames for col in required_columns):
                print("Error: CSV file header is missing one or more required columns.")
                return []

            for row in reader:
                processed_row = {}
                is_valid_row = True

                if not row.get('Ticker') or not row.get('Date'):
                    is_valid_row = False
                
                if is_valid_row:
                    processed_row['Symbol'] = row['Ticker']
                    processed_row['Date'] = row['Date']

                    for col in numeric_columns:
                        value_str = row.get(col, '')
                        try:
                            val = float(value_str)
                            if val < 0: raise ValueError("Negative value")

                            processed_row[col] = int(val) if col == 'Volume' else val
                        except (ValueError, KeyError):
                            is_valid_row = False
                            break
                        
                if is_valid_row:
                    rows.append(processed_row)
# print instructions to be able to open both the final analysis and .csv file are located in the same place
    except FileNotFoundError:
        print(f"Error: File not found at path: {path}")
        print(f"Please ensure both the final analysis report and .csv file are located within the same place")
    except Exception as e:
        print(f"An unexpected error occurred during file reading: {e}")
        
    print(f"--- Data Validation Complete: {len(rows)} clean rows loaded ---")
    return rows

def group_by_symbol(parsed_rows: list[dict]) -> dict[str, list[dict]]:
    """Groups parsed rows into a dictionary keyed by stock symbol."""
    grouped_data: Dict[str, list[dict]] = defaultdict(list)
    for row in parsed_rows:
        grouped_data[row["Symbol"]].append(row)
    return dict(grouped_data)

## 2. Summary Function that Returns a Tuple

In [25]:
#assigning to a variable that calls the path
def stock_summary(rows_for_symbol: list[dict]) -> Tuple[str, float | None, float | None]:
# Calculating the highest high and lowest low for a single stock symbol. 
    if not rows_for_symbol:
        return ("", None, None)
    
    symbol = rows_for_symbol[0]["Symbol"]
    
    highs = [row["High"] for row in rows_for_symbol if isinstance(row["High"], (int, float))]
    lows = [row["Low"] for row in rows_for_symbol if isinstance(row["Low"], (int, float))]
    
    highest_high = max(highs) if highs else None
    lowest_low = min(lows) if lows else None
    
    return (symbol, highest_high, lowest_low)

## 3.Technical Analysis Function

In [26]:
def technical_analysis(closes: list[float]) -> dict:
    
# Calculating a 30-day SMA/EMA, including last daily return, and 20-period volatility using NumPy.
# Returning a dict with keys: 'sma_30', 'ema_30', 'three_month_return', 'average_vol'.
    
    closes_np = np.array(closes)
    N = len(closes_np)
    results = {'sma_30': None, 'ema_30': None, 'three_month_return': None, 'average_vol': None}

    # 1. SMA (30-day)
    sma_period = 30 
    if N >= sma_period: 
        results['sma_30'] = np.mean(closes_np[-sma_period:])

    # 2. EMA (30-day) - Manual implementation with NumPy
    ema_span = 30 
    if N >= ema_span:
        alpha = 2 / (ema_span + 1)
        ema_values = np.zeros_like(closes_np)
        ema_values[ema_span - 1] = np.mean(closes_np[:ema_span]) # Initial SMA
        
        for i in range(ema_span, N):
            ema_values[i] = (closes_np[i] * alpha) + (ema_values[i-1] * (1 - alpha))

        results['ema_30'] = ema_values[-1]

    # 3. Daily Returns
    if N >= 2:
        returns_np = (closes_np[1:] / closes_np[:-1]) - 1 
        results['three_month_return'] = returns_np[-1]

        # 4. Volatility (20-period, sample standard deviation)
        vol_period = 20
        if len(returns_np) >= vol_period: 
            last_20_returns = returns_np[-vol_period:]
            results['average_vol'] = np.std(last_20_returns, ddof=1)
    
    return results
    
        
        

## 4.Main Program

In [27]:
def main(path: str) -> Dict[str, Any] | None:
    """Executes the data processing workflow."""
    print("Christina Pratas - EMPLID : 12182674")
    print("\nCIS9650 Assignment 2: Stock Data Analysis Report")
    
    # 1. Read and clean data using the required function
    all_data = read_ohlcv(path)
    
    if not all_data: return None

    # 2. Group the data by symbol
    grouped_stock_data = group_by_symbol(all_data)
    
    total_companies_found = len(grouped_stock_data)
    if total_companies_found < 3:
        print(f"\n!! WARNING: Only {total_companies_found} stock symbols were loaded. !!")
    
    results: Dict[str, Any] = {}
    
    # 3. Process each stock using the required functions
    for symbol, rows in grouped_stock_data.items():
        
        # REQUIRED CALL: Call stock_summary
        _, highest_high, lowest_low = stock_summary(rows)

        # REQUIRED CALL: Call technical_analysis
        closes = [row["Close"] for row in rows]
        tech_analysis_results = technical_analysis(closes)

        # Assemble the final dictionary structure 
        results[symbol] = {
            "highest_high": highest_high,
            "lowest_low": lowest_low,
            "tech": tech_analysis_results
        }
    
    return results

## Run

In [28]:
def format_final_report(results: Dict[str, Any]) -> Tuple[str, str]:
    """Formats the raw dictionary and the tabular summary."""
    # 1. Raw Dictionary Output (using json.dumps for clean formatting)
    raw_dict_string = (
        "\n" + "="*50 + 
        "\nFINAL PER-STOCK DICTIONARY STRUCTURE (Required Output)" + 
        "\n" + "="*50 + 
        "\n" + j_dumps(results, indent=4) +
        "\n" + "="*50
    )
    
    # 2. Tabular Summary Output
    report_data = []
    for symbol, data in results.items():
        tech = data['tech']
        low = data['lowest_low']
        high = data['highest_high']
        
        sma = tech.get('sma_30') 
        ema = tech.get('ema_30')
        ret = tech['three_month_return']
        vol = tech['average_vol']
        
        # Format strings
        sma_str = f'${sma:.2f}' if sma is not None else 'N/A'
        ema_str = f'${ema:.2f}' if ema is not None else 'N/A'
        ret_str = f'{ret*100:.2f}%' if ret is not None else 'N/A'
        vol_str = f'{vol*100:.2f}%' if vol is not None else 'N/A'
        
        report_data.append([
            symbol,
            f'${low:.2f} - ${high:.2f}',
            sma_str,
            ema_str,
            ret_str,
            vol_str
        ])

    headers = ["Symbol", "Price Range (L-H)", "SMA(30)", "EMA(30)", "Last Daily Return", "20-Day Volatility"]
    
    tabular_output_string = "\nCONCISE SUMMARY REPORT\n"
    tabular_output_string += "---------------------------------------\n"
    
    # Attempt to use tabulate for professional formatting
    try:
        from tabulate import tabulate
        tabular_output_string += tabulate(report_data, headers=headers, tablefmt="fancy_grid", numalign="right")
    except ImportError:
        # Fallback if tabulate is not installed
        tabular_output_string += "NOTE: Install 'tabulate' for better report formatting.\n"
        tabular_output_string += "-" * 50 + "\n"
        for row in report_data:
            tabular_output_string += (
                f"Symbol: {row[0]}\n"
                f"  Price Range: {row[1]}\n"
                f"  SMA(30)/EMA(30): {row[2]} / {row[3]}\n"
                f"  Return/Vol: {row[4]} / {row[5]}\n"
                + "-" * 50 + "\n"
            )
            
    return raw_dict_string, tabular_output_string


if __name__ == "__main__":
    # 1. Argument Handling: Set the default path or use a command line argument
    path = FILE_PATH
    if len(sys.argv) > 1 and not sys.argv[1].startswith('-'):
        path = sys.argv[1]
    
    # 2. **Explicit Call to main()**: Start the data processing
    final_results = main(path)
    
    # 3. Final Output: Only print the report if main() returned results
    if final_results is not None:
        raw_dict_report, tabular_summary = format_final_report(final_results)
        print(raw_dict_report) 
        print(tabular_summary)

Christina Pratas - EMPLID : 12182674

CIS9650 Assignment 2: Stock Data Analysis Report
--- Starting Data Reading and Validation from stock_data_july_to_september.csv (Manual CSV Parsing) ---
--- Data Validation Complete: 320 clean rows loaded ---

FINAL PER-STOCK DICTIONARY STRUCTURE (Required Output)
{
    "MSFT": {
        "highest_high": 554.5383762548963,
        "lowest_low": 487.8979653734086,
        "tech": {
            "sma_30": 507.38877970377604,
            "ema_30": 509.657069752765,
            "three_month_return": 0.006509982083227506,
            "average_vol": 0.010233923998133151
        }
    },
    "AAPL": {
        "highest_high": 257.6000061035156,
        "lowest_low": 201.27158167093614,
        "tech": {
            "sma_30": 238.4366658528646,
            "ema_30": 240.0114886611006,
            "three_month_return": 0.0007861188255666729,
            "average_vol": 0.01822649503356168
        }
    },
    "ORCL": {
        "highest_high": 345.7200012207031,

## References

In [2]:
# Astanin, Andrey. "astanin/python-tabulate: Pretty-print tabular data in Python." GitHub, 24 Nov. 2025, https://github.com/astanin/python-tabulate.

In [3]:
# "Built-in Functions." The Python Standard Library, Python Software Foundation, 2025, docs.python.org/3/library/functions.html. Accessed 24 Nov. 2025.

In [4]:
# Collections – High-performance container datatypes." The Python Standard Library, Python Software Foundation, 2025, docs.python.org/3/library/collections.html. Accessed 23 Nov. 2025.

In [5]:
# Dangtrinhnt. "Excel file to list of dictionaries in Python." GitHub Gist, 18 Apr. 2019, https://gist.github.com/dangtrinhnt/d0c7d50967b6213adeaa. Accessed 23 Nov. 2025.

In [6]:
# Kazil, Paco, and Katharine Jarmul. Data Wrangling with Python: Tips and Tools to Make Your Life Easier. O'Reilly, 2016.

In [7]:
# "The stock_summary Function." final_analysis.py, 2025, Canvas Document.

In [8]:
# Stirtz, John. "What Does if __name__ == "__main__": Do in Python?" Real Python, 20 Apr. 2021, realpython.com/python-main-function/. Accessed 24 Nov. 2025.

In [9]:
# Uttley, Phil. "Array Calculations with Numpy." Programming for Astronomy and Astrophysics 2: Programming Skills, Arrays and Scientific Libraries, 2018–2021, https://philuttley.github.io/prog4aa_lesson2/07-numpyfuncs/index.html.

In [10]:
# VanderPlas, Jake. Python Data Science Handbook: Essential Tools for Working with Data. O'Reilly, 2016.