Skip to content

Commit

Permalink
Add new functions
Browse files Browse the repository at this point in the history
  • Loading branch information
cris1618 committed May 20, 2024
1 parent 443846f commit 6a888d0
Showing 1 changed file with 118 additions and 3 deletions.
121 changes: 118 additions & 3 deletions vizbeauty/common.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,122 @@
"""The common module contains common functions and classes used by the other modules.
"""

def hello_world():
"""Prints "Hello World!" to the console.
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import pearsonr

def print_statistic(title, variable):
"""
Prints descriptive statistics for a given variable.
Parameters:
- title (str): Title or label for the variable.
- variable (Series): Pandas Series containing the variable data.
Returns:
- None
"""
print(f"Statistics for {title}:")
print("++++++++++++++++++++++++++++++++++++++++++++++++++++++")
print(f"Mean: {round(variable.mean(), 2)}")
print(f"Median: {round(variable.median(), 2)}")
print(f"Standard Deviation: {round(variable.std(), 2)}")
print(f"Minimum: {round(variable.min(), 2)}")
print(f"Maximum: {round(variable.max(), 2)}")
print(f"25th Percentile (Q1): {round(np.percentile(variable, 25), 2)}")
print(f"75th Percentile (Q3): {round(np.percentile(variable, 75), 2)}")
print(f"Skewness: {round(variable.skew(), 2)}")
print(f"Kurtosis: {round(variable.kurtosis(), 2)}")
print(f"Count of Missing Values: {variable.isnull().sum()}")
print("++++++++++++++++++++++++++++++++++++++++++++++++++++++")
print()

def beautybar(x, y, data, data_avg, color="skyblue", ax=None):
"""
Create a bar plot with annotations for each bar and a horizontal line indicating the general average value
of the variable plotted on the y-axis.
Parameters:
x (str): The name of the variable to be plotted on the x-axis.
y (str): The name of the variable to be plotted on the y-axis.
data (DataFrame): The dataframe containing the data to be plotted.
data_avg (DataFrame): The dataframe containing the average values for the y variable.
color (str, optional): The color of the bars. Defaults to "skyblue".
ax (matplotlib.axes.Axes, optional): The axes to plot on. If not provided, a new figure will be created.
"""
if ax is None:
ax = plt.gca()

sns.barplot(x=x, y=y, data=data, color=color, ax=ax)

for index, value in enumerate(data[y]):
ax.text(index, value + 0.2, str(round(value, 2)), ha='center', va='bottom')

avg_variable = data_avg[y].mean()

ax.axhline(y=avg_variable, color='r', linestyle='--')
ax.text(9.5, avg_variable + 0.2, f'Average: {round(avg_variable, 2)}', color='red')

ax.set_xlabel(x)
ax.set_ylabel(y)
ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')
ax.grid(axis='y', linestyle='--')
plt.tight_layout()

def pearson_correlation(x, y):
"""
Compute the Pearson correlation coefficient for two variables and determine if there is a statistically significant correlation.
Parameters:
- x (Series): First variable for correlation.
- y (Series): Second variable for correlation.
Returns:
- None
"""
correlation_coefficient, p_value = pearsonr(x, y)
alpha = 0.05

print("Pearson Correlation Coefficient:", correlation_coefficient)
print("P-value:", p_value)

if p_value < alpha:
print(f"There is a statistically significant correlation between {x.name} and {y.name}.")
else:
print(f"There is no statistically significant correlation between {x.name} and {y.name}.")

def reg_scatter(x, y, data, hue=None, legend="auto", size=None, sizes=None, ax=None):
"""
Create a scatter plot with a regression line.
Parameters:
x (str): The name of the variable to be plotted on the x-axis.
y (str): The name of the variable to be plotted on the y-axis.
data (DataFrame): The dataframe containing the data to be plotted.
hue (str, optional): Variable in data to map plot aspects to different colors.
legend ({"auto", "brief", "full"}, optional): How to draw the legend. Defaults to "auto".
size (str, optional): Grouping variable that will produce points with different sizes.
sizes (tuple, optional): Tuple of minimum and maximum size values to scale the size variable.
ax (matplotlib axes, optional): Axes object to draw the plot onto.
"""
sns.scatterplot(x=x, y=y, data=data, ax=ax, hue=hue, legend=legend, size=size, sizes=sizes)
sns.regplot(x=x, y=y, data=data, ax=ax, scatter=False, ci=None, line_kws={"color": "black"})

def visualize_hyperparameter(param_name, param_values, scores):
"""
Visualize the effect of a hyperparameter on model performance.
Parameters:
param_name (str): Name of the hyperparameter.
param_values (list): List of values for the hyperparameter.
scores (list): List of mean squared error scores corresponding to each hyperparameter value.
"""
print("Hello World!")
plt.figure(figsize=(10, 6))
plt.scatter(param_values, scores, marker='o')
plt.title(f"Effect of {param_name} on Model Performance")
plt.xlabel(param_name)
plt.ylabel("Mean Squared Error")
plt.grid(True)
plt.show()

0 comments on commit 6a888d0

Please sign in to comment.