In [None]:
import pandas as pd

def save_feature_correlations(dataset, target_columns, output_file):
    """
    Save the correlation coefficients of all numerical columns in the dataset
    with respect to specified target columns to a CSV file, rounded to three decimal places.

    Args:
        dataset (pd.DataFrame): The dataset containing the columns to correlate.
        target_columns (list of str): The names of the target columns to correlate against.
        output_file (str): The path of the output CSV file.
    """
    # Select only numerical columns from the dataset
    numerical_dataset = dataset.select_dtypes(include=['number'])

    # Drop the 'Draft Year' column if it exists in the numerical dataset
    if 'Draft Year' in numerical_dataset.columns:
        numerical_dataset = numerical_dataset.drop(columns=['Draft Year'])

    # Initialize a DataFrame to store feature correlations
    correlations_df = pd.DataFrame(index=numerical_dataset.columns)

    # Calculate and store correlations for each target column
    for target_column in target_columns:
        correlation_matrix = numerical_dataset.corrwith(numerical_dataset[target_column])
        correlations_df[target_column] = correlation_matrix

    # Round the correlation values to three decimal places
    correlations_df = correlations_df.round(3)

    # Save the rounded correlations to a CSV file
    correlations_df.to_csv(output_file)

# Example usage:
dataset = pd.read_csv('/content/Full Dataset (Imputed Values).csv')  # Adjust the path to your dataset
target_columns = ['WS/48', 'BPM', 'VORP/48']
save_feature_correlations(dataset, target_columns, '/content/feature_correlations.csv')  # Adjust the output path as needed

print('Correlations saved to /path/to/feature_correlations.csv')


In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the dataset
file_path = '/content/feature_correlations (1).csv'  # Update path for file location
data = pd.read_csv(file_path)
data_melted = data.melt(id_vars=['Index'], value_vars=['WS/48', 'BPM', 'VORP/48'], var_name='Statistic', value_name='Correlation')

# Plotting
plt.figure(figsize=(14, 8))
sns.barplot(x='Index', y='Correlation', hue='Statistic', data=data_melted, palette='bright')

line_positions_26_27 = [25.5]
line_positions_37_38 = [36.5]

# Draw vertical red lines at specified positions
for pos in line_positions_26_27 + line_positions_37_38:
    plt.axvline(x=pos, color='black', linestyle='--', linewidth=2)

plt.title('Feature Correlations with Advanced Statistics')
plt.xlabel('Feature Index')
plt.ylabel('Correlation')
plt.xticks()
plt.tight_layout()

# Specify the output path for the image file you want to save
output_image_path = '/content/sample_data'  # Update this path as needed
plt.savefig(output_image_path)

plt.show()