In [None]:
"""
Correlation Analyzer Class
Based on Chapter 3 of "Doing Math with Python"
"""


import numpy as np
import matplotlib.pyplot as plt
from typing import List, Tuple, Union, Optional


class CorrelationAnalyzer:
   """A class to analyze correlation between two datasets"""
  
   def __init__(self, x: List[float], y: List[float]):
       """
       Initialize the CorrelationAnalyzer with two datasets
      
       Args:
           x: First dataset
           y: Second dataset
          
       Raises:
           ValueError: If datasets have different lengths
       """
       if len(x) != len(y):
           raise ValueError("Both datasets must have the same length")
          
       self.x = x
       self.y = y
       self.correlation = None
  
   def calculate_correlation(self) -> float:
       """
       Calculate the Pearson correlation coefficient between the datasets
       using the formula from page 76-77 of the book.
      
       Returns:
           float: The correlation coefficient
       """
       n = len(self.x)
      
       # Calculate the sum of products
       sum_prod_x_y = sum(xi * yi for xi, yi in zip(self.x, self.y))
      
       # Calculate the sums
       sum_x = sum(self.x)
       sum_y = sum(self.y)
      
       # Calculate the squares
       squared_sum_x = sum_x ** 2
       squared_sum_y = sum_y ** 2
      
       # Calculate the sum of squares
       x_squared_sum = sum(xi ** 2 for xi in self.x)
       y_squared_sum = sum(yi ** 2 for yi in self.y)
      
       # Calculate using the formula
       numerator = n * sum_prod_x_y - sum_x * sum_y
       denominator_term1 = n * x_squared_sum - squared_sum_x
       denominator_term2 = n * y_squared_sum - squared_sum_y
       denominator = (denominator_term1 * denominator_term2) ** 0.5
      
       # Handle division by zero
       if denominator == 0:
           return 0
          
       self.correlation = numerator / denominator
       return self.correlation
  
   def interpret_correlation(self) -> str:
       """
       Provide an interpretation of the correlation coefficient
      
       Returns:
           str: Interpretation of the correlation strength
       """
       if self.correlation is None:
           self.calculate_correlation()
          
       corr = abs(self.correlation)
      
       if corr > 0.9:
           strength = "very strong"
       elif corr > 0.7:
           strength = "strong"
       elif corr > 0.5:
           strength = "moderate"
       elif corr > 0.3:
           strength = "weak"
       else:
           strength = "very weak or no"
          
       direction = "positive" if self.correlation > 0 else "negative"
      
       if abs(self.correlation) < 0.1:
           return f"There is virtually no correlation (r={self.correlation:.4f})"
          
       return f"There is a {strength} {direction} correlation (r={self.correlation:.4f})"
  
   def create_scatter_plot(self, title: Optional[str] = None,
                          xlabel: Optional[str] = None,
                          ylabel: Optional[str] = None,
                          add_trendline: bool = False) -> None:
       """
       Create a scatter plot of the datasets
      
       Args:
           title: Optional title for the plot
           xlabel: Optional label for x-axis
           ylabel: Optional label for y-axis
           add_trendline: Whether to add a trend line to the plot
       """
       plt.figure(figsize=(10, 6))
       plt.scatter(self.x, self.y, color='blue', alpha=0.7)
      
       if title:
           plt.title(title)
       else:
           plt.title(f'Scatter Plot (r={self.correlation:.4f})')
          
       plt.xlabel(xlabel if xlabel else 'X')
       plt.ylabel(ylabel if ylabel else 'Y')
       plt.grid(True, alpha=0.3)
      
       if add_trendline:
           # Calculate the trend line
           z = np.polyfit(self.x, self.y, 1)
           p = np.poly1d(z)
          
           # Add the line to the plot
           x_line = np.linspace(min(self.x), max(self.x), 100)
           plt.plot(x_line, p(x_line), "r--", alpha=0.8)
          
           # Add formula text
           equation = f'y = {z[0]:.4f}x + {z[1]:.4f}'
           plt.annotate(equation, xy=(0.05, 0.95), xycoords='axes fraction',
                       bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="gray", alpha=0.8))
      
       # Add correlation text
       if self.correlation is None:
           self.calculate_correlation()
          
       corr_text = f'r = {self.correlation:.4f}'
       plt.annotate(corr_text, xy=(0.05, 0.90), xycoords='axes fraction',
                   bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="gray", alpha=0.8))
      
       plt.tight_layout()
       plt.show()
dataset_1 = [10, 20, 30, 40, 50]
dataset_2 = [12, 21, 35, 45, 55]
# Write your code here to:
# 1. Create a CorrelationAnalyzer instance
analyzer = CorrelationAnalyzer(dataset_1, dataset_2)

# 2. Calculate the correlation
correlation = (analyzer.calculate_correlation())

# 3. Print the interpretation
print(.interpret_correlation())