In [17]:
import pandas as pd
import numpy as np
from scipy.stats import t, pearsonr
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy.stats import chisquare, poisson

In [18]:
df = pd.read_csv('./Datasets/Hurricane.csv')
df.head()

Unnamed: 0,Name,Season,Month,Max. sustained winds(mph),Minimum pressure(mbar)
0,Hurricane #3,1853,"August, September",150,924
1,"""1856 Last Island Hurricane""",1856,August,150,934
2,Hurricane #6,1866,"September, October",140,938
3,Hurricane #7,1878,"September, October",140,938
4,Hurricane #2,1880,August,150,931


In [19]:
df.shape

(101, 5)

a. With a 1% level of significance conduct t-test for correlation coefficient between “Max. sustained winds(mph)” and “Minimum pressure (mbar)”.

In [20]:
alpha = 0.01
n = df.shape[0]
r = np.corrcoef(df["Max. sustained winds(mph)"], df["Minimum pressure(mbar)"])[0, 1]
t_value = r / np.sqrt((1 - r**2) / (n - 2))
t_value

-5.497270157344816

In [21]:
degrees = n - 2
p_value_lower = t.cdf(-np.abs(t_value), degrees)
p_value_upper = 1 - t.cdf(t_value, degrees)
p_value = p_value_lower + p_value_upper
p_value

1.0

In [22]:
print(p_value_lower, p_value_upper)

1.5049593099751135e-07 0.999999849504069


In [23]:
p_value < alpha

False

In [24]:
test_result = pearsonr(df["Max. sustained winds(mph)"], df["Minimum pressure(mbar)"])
test_result

PearsonRResult(statistic=-0.4835954956356638, pvalue=3.009918619950333e-07)

In [25]:
test_result.pvalue

3.009918619950333e-07

In [26]:
test_result.confidence_interval()

ConfidenceInterval(low=-0.620398678938358, high=-0.31823362079686485)

In [27]:
# Extract relevant columns
winds = df['Max. sustained winds(mph)']
pressure = df['Minimum pressure(mbar)']

# Calculate Pearson correlation
correlation, p_value = pearsonr(winds, pressure)

# Display the result
print(f"Pearson correlation: {correlation}")
print(f"P-value: {p_value}")

# Test the significance at 1% level
if p_value < 0.01:
    print("Reject the null hypothesis: Significant correlation.")
else:
    print("Fail to reject the null hypothesis: No significant correlation.")


Pearson correlation: -0.4835954956356638
P-value: 3.009918619950333e-07
Reject the null hypothesis: Significant correlation.


In [28]:
df.drop(columns=['Name', 'Month']).corr()['Max. sustained winds(mph)'].sort_values(ascending=False)

Max. sustained winds(mph)    1.000000
Season                       0.023853
Minimum pressure(mbar)      -0.483595
Name: Max. sustained winds(mph), dtype: float64

b. With a 5% level of significance test if the “Max. sustained winds(mph)” of hurricane depends on the month of its occurrence.

In [29]:
model = ols('Q("Max. sustained winds(mph)") ~ C(Month)', data=df).fit()
anova_table = sm.stats.anova_lm(model, typ=2)

In [30]:
anova_table

Unnamed: 0,sum_sq,df,F,PR(>F)
C(Month),575.33551,14.0,0.634822,0.828846
Residual,5567.238748,86.0,,


In [40]:
p_value = anova_table['PR(>F)'][0]
if p_value < 0.05:
    print("Reject the null hypothesis: Winds depend on the month.")
else:
    print("Fail to reject the null hypothesis: Winds do not depend on the month.")

Fail to reject the null hypothesis: Winds do not depend on the month.


c. With a 10% level of significance conduct test if “Max. sustained winds(mph)” follows a Poisson distribution.

In [64]:
winds = df['Max. sustained winds(mph)']

mean_wind = winds.mean()

# Generate observed frequencies (assuming wind speeds are integers)
observed_freq = winds.value_counts().sort_index()

# Generate Poisson probabilities for the observed values
expected_freq = [poisson.pmf(k, mean_wind) for k in observed_freq.index]
total = sum(expected_freq)
expected_freq =  [poisson.pmf(k, mean_wind)*101/total for k in observed_freq.index]
# expected_freq
chi_square_stat, p_value = chisquare(f_obs=observed_freq, f_exp=expected_freq)
print(f"Chi-square statistic: {chi_square_stat}")
print(f"P-value: {p_value}")

# Check significance at 10% level
if p_value < 0.10:
    print("Reject the null hypothesis: Winds do not follow a Poisson distribution.")
else:
    print("Fail to reject the null hypothesis: Winds follow a Poisson distribution.")

Chi-square statistic: 5.649893997595145
P-value: 0.22686449271473874
Fail to reject the null hypothesis: Winds follow a Poisson distribution.
