In [None]:
import os
import pandas as pd
import yfinance as yf
from datetime import datetime
import matplotlib.pyplot as plt
import squarify

In [None]:
INPUT_FILE = os.path.join('input', 'stocks.csv')
OUTPUT_DIR = 'output'

os.makedirs(OUTPUT_DIR, exist_ok = True)

In [None]:
stock = yf.Ticker('F')
info = stock.info
counter = 1
for key, value in info.items():
    counter = counter + 1
    if counter < 10:
        print(f'{key} - {value}')

In [None]:
try:
    # Read the CSV file
    df = pd.read_csv(INPUT_FILE)
    
    # Fetch stock data and add new columns
    industries = []
    for index, row in df.iterrows():
        try:
            stock = yf.Ticker(row['Ticker'])
            info = stock.info
            price = info.get('regularMarketPreviousClose',0)   # Default to 0 if key is missing
            dividend = info.get('dividendRate', 0)             # Default to 0 if key is missing
            industry = info.get('industry', 'Unknown')         # Default to 'Unknown' if key is missing
            short_name = info.get('shortName','Unknown')
        
        except Exception as e:
            print(f"Error fetching data for {row['Ticker']}: {e}")
            price = 0.0
            dividend = 0.0
            industry = 'Unknown'

        df.at[index,'Price'] = price
        df.at[index, 'Dividend'] = dividend
        df.at[index, 'Name']= short_name
        industries.append(industry)
    
    # Add industries to the DataFrame
    df['Industry'] = industries

    # Calculate total investment and expected annual dividends
    df['Total Investment'] = (df['Shares']*df['Price']).round(0)
    df['Expected Dividends'] = (df['Shares']*df['Dividend']).round(2)

    #Add "SUM" row at the bottom of the DataFrame
    sum_row = {
        'Ticker':'SUM',
        'Shares':'',
        'Price':'',
        'Dividend':'',
        'Industry':'',
        'Total Investment':df['Total Investment'].sum().round(0),
        'Expected Dividends':df['Expected Dividends'].sum().round(0),
    }

    df = pd.concat([df, pd.DataFrame([sum_row])], ignore_index = True)
    
    # Generate file name with today's date and time
    output_name = datetime.now().strftime('%Y%m%d_%H%M%S')
    output_file = os.path.join(OUTPUT_DIR, f'{output_name}_updated_stocks.csv')

    # Save the updated DataFrame to a new CSV file
    df.to_csv(output_file, index=False)
    print(f'Data saved to {output_file}')
    
except FileNotFoundError:
    print(f'Error: The file {INPUT_FILE} was not found.')
except Exception as e:
    print(f'Error: An unexpected error has occurred {e}!')

In [None]:
df

In [None]:
# Generate a pie chart for industries

today = datetime.now().strftime('%Y%m%d')

industry_data = df[df['Ticker'] != 'SUM'].groupby('Industry')['Total Investment'].sum()
plt.figure(figsize=(8, 8))
plt.pie(industry_data, labels=industry_data.index, autopct='%1.1f%%', startangle=140)
plt.title('Investment Distribution by Industry')
plt.savefig(os.path.join(OUTPUT_DIR, f'{today}_industry_distribution.png'))
print("Industry distribution chart saved.")
print(f'{today}_industry_distribution.png')

In [None]:
# Generate a treemap for investment sizes with a custom colormap
investment_data = df[df['Ticker'] != 'SUM']
plt.figure(figsize=(12, 8))
cmap = plt.colormaps['tab20']  # Accessing 'tab20' colormap
colors = [cmap(i) for i in range(len(investment_data))]  # Generate distinct colors

squarify.plot(
    sizes=investment_data['Total Investment'], 
    label=investment_data['Ticker'], 
    alpha=0.8,
    color=colors  # Pass the custom colors here
)
plt.title('Investment Proportion by Ticker')
plt.axis('off')  # Hide axes for treemap
plt.savefig(os.path.join(OUTPUT_DIR, f'{today}_investment_treemap.png'))
print(f"Investment treemap chart saved.\n{today}_investment_treemap.png")