### HYPOTHESIS TEST

In [1]:
# we want to try to predict the CO2 emission of a country through GDP
from scipy import stats
import pandas as pd

data = pd.read_csv("dataset/cleaned_data.csv", sep=";")

# Sample data
gdp = data['gdp']
co2 = data['co2']

mean = gdp.mean()

# H₀: CO₂ emissions have no statistically significant effect on the GDP of a country.
# H₁: CO₂ emissions have a statistically significant effect on the GDP of a country.

# Perform linear regression using scipy
slope, intercept, r_value, p_value, std_err = stats.linregress(gdp, co2)

# Print the results
print(f"Slope: {slope}")
print(f"Intercept: {intercept}")
print(f"R-squared: {r_value**2}")
print(f"p-value: {p_value}")
print(f"Standard error: {std_err}")

# Hypothesis testing
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: CO2 emissions have a statistically significant effect on the GDP of a country.")
else:
    print("Fail to reject the null hypothesis: CO2 emissions have no statistically significant effect on the GDP of a country.")

Slope: 2.458425810414948e-07
Intercept: 4396.44257293806
R-squared: 0.6436615224256959
p-value: 0.0
Standard error: 3.5777245513348517e-09
Reject the null hypothesis: CO2 emissions have a statistically significant effect on the GDP of a country.
