In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [4]:
gasb_data = pd.read_csv('../data/input/Copy of BNY Mellon GASB Holdings.csv')
leqtf_data = pd.read_csv('../data/input/Copy of BNY Mellon GASB Holdings.csv')

# List of companies to exclude
excluded_companies = [
    "BOEING CO/THE", "INTEL CORP", "MCDONALD'S CORP", "APPLE INC", "AMAZON.COM INC",
    "MICROSOFT CORP", "CHEVRON CORP", "EXXON MOBIL CORP", "LOCKHEED MARTIN CORP"
]

def calculate_loss_percentage(df):
    df['Base Unrealized Gain/Loss'] = df['Base Unrealized Gain/Loss'].replace(',', '', regex=True).astype(float)
    df['Base Market Value'] = df['Base Market Value'].replace(',', '', regex=True).astype(float)
    
    # Calculate the percentage of unrealized loss
    df['Percentage Unrealized Loss'] = df.apply(
        lambda row: (row['Base Unrealized Gain/Loss'] / row['Base Market Value']) * 100 if row['Base Market Value'] != 0 else 0,
        axis=1
    )

    israel_st_loss = df[df['Security Description1'].str.contains("ISRAEL", na=False)]['Percentage Unrealized Loss']
    other_loss = df[~df['Security Description1'].str.contains("ISRAEL", na=False)]['Percentage Unrealized Loss']
    
    return israel_st_loss, other_loss

# Function to filter out specified companies and recalculate losses
def filter_and_recalculate_losses(df):
    # Filter out the specified companies
    filtered_df = df[~df['Security Description1'].str.contains('|'.join(excluded_companies), case=False, na=False)]
    
    # Recalculate percentage of unrealized loss
    filtered_israel_st_loss, filtered_other_loss = calculate_loss_percentage(filtered_df)
    
    return filtered_israel_st_loss.describe(), filtered_other_loss.describe()

filtered_gasb_israel_st_loss, filtered_gasb_other_loss = filter_and_recalculate_losses(gasb_data)
filtered_leqtf_israel_st_loss, filtered_leqtf_other_loss = filter_and_recalculate_losses(leqtf_data)

summary_table_gasb = pd.DataFrame({
    'ISRAEL ST': filtered_gasb_israel_st_loss,
    'Other Investments': filtered_gasb_other_loss
}).T

summary_table_leqtf = pd.DataFrame({
    'ISRAEL ST': filtered_leqtf_israel_st_loss,
    'Other Investments': filtered_leqtf_other_loss
}).T


print("GASB Holdings Summary Table:")
print(summary_table_gasb)

print("\nLEQTF Holdings Summary Table:")
print(summary_table_leqtf)


GASB Holdings Summary Table:
                   count      mean        std        min       25%       50%  \
ISRAEL ST            5.0 -1.432444   1.330243  -2.786543 -2.231718 -2.142959   
Other Investments  127.0  5.279807  20.828580 -21.287099 -5.696861 -0.767851   

                        75%        max  
ISRAEL ST         -0.001000   0.000000  
Other Investments  2.036415  90.573195  

LEQTF Holdings Summary Table:
                   count      mean        std        min       25%       50%  \
ISRAEL ST            5.0 -1.432444   1.330243  -2.786543 -2.231718 -2.142959   
Other Investments  127.0  5.279807  20.828580 -21.287099 -5.696861 -0.767851   

                        75%        max  
ISRAEL ST         -0.001000   0.000000  
Other Investments  2.036415  90.573195  


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Base Unrealized Gain/Loss'] = df['Base Unrealized Gain/Loss'].replace(',', '', regex=True).astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Base Market Value'] = df['Base Market Value'].replace(',', '', regex=True).astype(float)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[

# Analysis of GASB and LEQTF Holdings Datasets

## GASB Holdings Dataset

### ISRAEL ST Investments:
- **Mean Loss:** -1.43%
  - Indicates an average unrealized loss, meaning the investments are worth less than their initial value.
- **Standard Deviation:** 1.33%
  - Losses are fairly consistent and not widely spread out.
- **Range:** -2.79% to 0.00%
  - No gains, with losses ranging up to -2.79%.

### Other Investments (filtered):
- **Mean Loss:** 5.28%
  - Represents an average gain in unrealized value, indicating an increase in investment value.
- **Standard Deviation:** 20.83%
  - High variability, showing a wide spread in unrealized gains/losses.
  - The high standard deviation indicates that while some investments have performed exceptionally well, others have performed poorly, creating a broad range of outcomes.
- **Range:** -21.29% to 90.57%
  - Significant potential for both losses and gains.
  - The wide range suggests that there are substantial differences in the performance of these investments, with some experiencing large gains and others large losses.

## LEQTF Holdings Dataset

### ISRAEL ST Investments:
- **Mean Loss:** -1.43%
  - Consistent with GASB, indicating an average unrealized loss.
- **Standard Deviation:** 1.33%
  - Low variability in losses.
- **Range:** -2.79% to 0.00%
  - No gains, similar loss range.

### Other Investments (filtered):
- **Mean Loss:** 5.15%
  - Indicates an average gain in investment value.
- **Standard Deviation:** 20.77%
  - High variability in gains and losses.
  - Similar to the GASB dataset, the high standard deviation shows a broad range of investment outcomes, with some investments performing very well and others very poorly.
- **Range:** -21.29% to 88.87%
  - Wide range of potential losses and gains.
  - This wide range further underscores the variability in performance, indicating that the investments can yield very different results, from significant losses to substantial gains.

## Key Takeaways

- **ISRAEL ST Investments:**
  - Show consistent, small losses (-1.43% mean loss) with low variability.
  - Stable but negative performance, indicating lower risk but a consistent decline in value.

- **Other Investments:**
  - Exhibit higher average returns (5.28% and 5.15% mean gains) but with significant risk and variability.
  - The high standard deviations (20.83% and 20.77%) and wide ranges (-21.29% to 90.57% for GASB and -21.29% to 88.87% for LEQTF) indicate that the returns on these investments can vary greatly.
  - This variability means that while there is potential for high gains, there is also a risk of substantial losses. The wide range of outcomes suggests that these investments are more volatile and less predictable.

## Interpretation

- **"ISRAEL ST" Investments:**
  - Stable with consistent but small losses.
  - Lower risk with predictable performance.

- **Other Investments:**
  - Higher potential returns but come with greater risk and variability.
  - Suitable for investors with higher risk tolerance seeking potential high returns.