In [None]:
import pandas as pd

# load the data

url = "https://raw.githubusercontent.com/batloon/data-projects/main/coffee_is_happiness/data/coffee_happiness_correlation.csv"
df = pd.read_csv(url)

# Display the first rows
df.head()

In [None]:
# Show columns
print(df.columns)

# Basic statistics
df.describe()


In [None]:
# clean the data: delete rows with missing values
df = df.dropna()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(8,5))
sns.scatterplot(x="Coffee_Consumption_Per_Capita_KG", y="Happiness_Score", data=df, color = 'navy', s=60, edgecolor='white')
plt.title("Coffee Consumption vs Happiness Score")
plt.xlabel("Coffee Consumption per person (kg)")
plt.ylabel("Happiness Score")
plt.grid(True)
plt.show()



In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(8,5))

sns.regplot(
    x="Coffee_Consumption_Per_Capita_KG",
    y="Happiness_Score",
    data=df,
    ci=None,  # Hide confidence interval
    scatter_kws={'color': 'navy', 's': 60, 'edgecolor': 'white'},  # Color and size of points
    line_kws={'color': 'red', 'linewidth': 2}  # Color and width of regression line
)

plt.title("Coffee Consumption vs Happiness Score (with Regression Line)")
plt.xlabel("Coffee Consumption per person (kg)")
plt.ylabel("Happiness Score")
plt.grid(True)
plt.show()


In [None]:
# calculate Perason correlation coefficient
pearson_correlation = df["Coffee_Consumption_Per_Capita_KG"].corr(df["Happiness_Score"])
print(f"Pearson correlation coefficient: {pearson_correlation:.2f}")

In [None]:
# calculate Pearson correlation coefficient using the formula
def pearson_correlation_f(x, y):
    return (x - x.mean()).dot(y - y.mean()) / ((x.std() * y.std()) * len(x))


pearson_correlation_manual = pearson_correlation_f(df["Coffee_Consumption_Per_Capita_KG"], df["Happiness_Score"])
print(f"Pearson correlation coefficient (manual): {pearson_correlation_manual:.2f}")

In [None]:
# calculate Spearman correlation coefficient
spearman_correlation = df["Coffee_Consumption_Per_Capita_KG"].corr(df["Happiness_Score"], method='spearman')
print(f"Spearman correlation coefficient: {spearman_correlation:.2f}")

In [None]:
# calculate Spearman correlation coefficient using the formula
def spearman_correlation_f(x, y):
    rank_x = x.rank()
    rank_y = y.rank()
    return pearson_correlation_f(rank_x, rank_y)    

spearman_correlation_manual = spearman_correlation_f(df["Coffee_Consumption_Per_Capita_KG"], df["Happiness_Score"])
print(f"Spearman correlation coefficient (manual): {spearman_correlation_manual:.2f}")