# Will find correlation between ESG (and only E) scores and the return.

## Then will redefine our ESG metric, namely ESGR and maximize this variable

In [4]:
import json
import numpy as np
from scipy.stats import pearsonr
import qiskit

# Load the JSON data from a file
file_path = 'Datasets/small_dataset.json'  # Replace with your actual file path

with open(file_path, 'r') as file:
    data = json.load(file)["data"]  # Access the "data" field in the JSON

# Function to calculate the correlation between returns and ESG score for a company
def calculate_correlation(company_name, company_data):
    # Check if 'History' exists in the company data
    if "History" not in company_data:
        return None  # Skip if no historical data
    
    # Extract the historical closing prices
    history = company_data["History"]
    dates = sorted(history.keys())  # Sort dates chronologically
    closing_prices = [history[date]["Close"] for date in dates]

    # Calculate daily returns
    if len(closing_prices) < 2:
        return None, None, None  # Not enough data points to calculate returns
    
    returns = [(closing_prices[i] - closing_prices[i - 1]) / closing_prices[i - 1] for i in range(1, len(closing_prices))]

    # Check if 'Sustainability' and 'totalEsg' exist
    if "Sustainability" not in company_data or "esgScores" not in company_data["Sustainability"] or "totalEsg" not in company_data["Sustainability"]["esgScores"]:
        average_return = np.mean(returns)
        return average_return, None, None  # Skip if ESG data is missing

    # Extract the total ESG score
    total_esg = company_data["Sustainability"]["esgScores"]["totalEsg"]
    env_score = company_data["Sustainability"]["esgScores"]["environmentScore"]

    average_return = np.mean(returns)
    return average_return, total_esg, env_score

# Iterate over all companies and calculate correlations
for company_name, company_data in data.items():
    returns, totalESG, env_score = calculate_correlation(company_name, company_data)
    # if correlation is not None:
    print(f"the AVG return is {returns}\t the total ESG score is: {totalESG}\t the env. score is {env_score}" )
    # else:
        # print(f"Not enough data to calculate correlation for {company_name}")



the AVG return is 0.0017029850335665128	 the total ESG score is: 11.81	 the env. score is 0.01
the AVG return is -2.440172177436038e-06	 the total ESG score is: 33.68	 the env. score is 18.57
the AVG return is 0.0009873654702359268	 the total ESG score is: 26.15	 the env. score is 7.32
the AVG return is -0.0007295207899513821	 the total ESG score is: 24.55	 the env. score is 9.06
the AVG return is 0.0014701442727089295	 the total ESG score is: 21.86	 the env. score is 5.42
the AVG return is -0.0005527010213243738	 the total ESG score is: 17.96	 the env. score is 2.14
the AVG return is 0.002214095612553523	 the total ESG score is: 17.56	 the env. score is 7.78
the AVG return is -0.00017531218679703233	 the total ESG score is: 17.62	 the env. score is 1.44
the AVG return is 0.0009208921862476832	 the total ESG score is: 17.53	 the env. score is 5.73
the AVG return is 0.0009661857670689299	 the total ESG score is: 20.39	 the env. score is 10.24
the AVG return is 0.0007469472827713529	 the