In [1]:
import pandas as pd
import yfinance as yf
data = pd.read_csv("C:/Users/humza/OneDrive - Worcester Polytechnic Institute (wpi.edu)/mqp/archive/SP 500 ESG Risk Ratings.csv")
data = data.dropna()
import yfinance as yf
import pandas as pd

# User responses to the questionnaire 
user_preferences = {
    'Environmental': 1,  # Based on Question 1 and 3
    'Social': 1,         # Based on Question 2 and 4
    'Governance': 1      # Based on Question 5
}

# Normalizes user preferences (so they sum to 1)
total_weight = sum(user_preferences.values())
weights = {key: value / total_weight for key, value in user_preferences.items()}

# Function to calculate ESG weight multiplier based on user preference
def calculate_esg_weight_multiplier(user_preference):
    if user_preference == 3:
        return 0
    else:
        return user_preference - 2 

# Calculates the weights for each category
esg_weight_multipliers = {
    'Environmental': calculate_esg_weight_multiplier(user_preferences['Environmental']),
    'Social': calculate_esg_weight_multiplier(user_preferences['Social']),
    'Governance': calculate_esg_weight_multiplier(user_preferences['Governance'])
}

# Calculates a weighted ESG risk score for each company based on user respinses to questinaaire
data['Weighted ESG Risk Score'] = (
    data['Environment Risk Score'] * weights['Environmental'] * esg_weight_multipliers['Environmental'] +
    data['Social Risk Score'] * weights['Social'] * esg_weight_multipliers['Social'] +
    data['Governance Risk Score'] * weights['Governance'] * esg_weight_multipliers['Governance']
)

# Gets unique stock symbols from the data
unique_symbols = data['Symbol'].unique()

# Creates a dictionary to store the 5-year growth estimates data for each unique symbol
growth_estimates_data = {}

# Loops through each unique symbol to get the growth estimate over the next 5 years
for symbol in unique_symbols:
    stock = yf.Ticker(symbol)
    
    # Retrieves growth estimates data from yfinance
    growth_estimates = stock.growth_estimates
    
    # Checks if growth estimates data is available
    if growth_estimates is not None and '+5y' in growth_estimates.index:
        # Extracts the growth estimate for the next 5 years in the 'stock' column
        growth_5y = growth_estimates.loc['+5y', 'stock']
        
        # Stores the growth estimate in the dictionary
        growth_estimates_data[symbol] = growth_5y
    else:
        growth_estimates_data[symbol] = None

# Converts the growth estimates data into a DataFrame
growth_estimates_df = pd.DataFrame.from_dict(growth_estimates_data, orient='index', columns=['Growth Estimate (+5 years)'])

# Merges the growth estimates data with the original data 
data = data.merge(growth_estimates_df, left_on='Symbol', right_index=True)

# Calculates a final score based on both the ESG risk score and the growth estimate
data['Final Score'] = (data['Growth Estimate (+5 years)']*10) + (data['Weighted ESG Risk Score'])

# Sorts the companies by the final score 
sorted_data = data.sort_values(by='Final Score', ascending=False)

# Shows the top companies that match user preferences
print(sorted_data[['Symbol', 'Final Score']])


    Symbol  Final Score
414    CCL    21.643333
107    STX    17.740333
434     BA    12.020000
76     ALL     9.596668
266   ILMN     1.790000
..     ...          ...
465    APA   -12.909667
41     VLO   -12.923333
65     MOS   -13.240000
324    XOM   -13.346667
166    OXY   -13.490000

[400 rows x 2 columns]
