In [1]:
import pandas as pd
import numpy as np
from scipy.stats import pearsonr, spearmanr, kendalltau, chi2_contingency
from sklearn.feature_selection import chi2
data = pd.read_csv('india_only.csv')
data = data[(data['Year'] >= 2000) & (data['Year'] <= 2014)]
print("Dataset has been read for year ranging from 2000 to 2014")
#Compare crop yield generation across different years and determine if there is a significant trend over time
years = data['Year']
yield_values = data['Value']

Dataset has been read for year ranging from 2000 to 2014


In [2]:
# Pearson Correlation
pearson_corr, pearson_p = pearsonr(years, yield_values)
pearson_significance = "significant" if pearson_p <= 0.05 else "not significant"

print("Pearson Correlation Analysis")
print(f"Correlation Coefficient: {pearson_corr:.4f}")
print(f"P-value: {pearson_p:.4f}")
print(f"Conclusion: The correlation is {pearson_significance} based on a 0.05 significance level.\n")

Pearson Correlation Analysis
Correlation Coefficient: 0.0745
P-value: 0.4188
Conclusion: The correlation is not significant based on a 0.05 significance level.



In [3]:
# Spearman Rank Correlation
spearman_corr, spearman_p = spearmanr(years, yield_values)
spearman_significance = "significant" if spearman_p <= 0.05 else "not significant"

print("Spearman Rank Correlation Analysis")
print(f"Correlation Coefficient: {spearman_corr:.4f}")
print(f"P-value: {spearman_p:.4f}")
print(f"Conclusion: The correlation is {spearman_significance} based on a 0.05 significance level.\n")

Spearman Rank Correlation Analysis
Correlation Coefficient: 0.1218
P-value: 0.1851
Conclusion: The correlation is not significant based on a 0.05 significance level.



In [4]:
# Kendall's Rank Correlation
kendall_corr, kendall_p = kendalltau(years, yield_values)
kendall_significance = "significant" if kendall_p <= 0.05 else "not significant"

print("Kendall's Rank Correlation Analysis")
print(f"Correlation Coefficient: {kendall_corr:.4f}")
print(f"P-value: {kendall_p:.4f}")
print(f"Conclusion: The correlation is {kendall_significance} based on a 0.05 significance level.\n")

Kendall's Rank Correlation Analysis
Correlation Coefficient: 0.0881
P-value: 0.1654
Conclusion: The correlation is not significant based on a 0.05 significance level.



In [5]:
# Chi-Square Test
bins = 5
data['Year_Bins'] = pd.qcut(data['Year'], q=bins, labels=[f"Q{i+1}" for i in range(bins)])
contingency_table = pd.crosstab(data["Value"], data["Year_Bins"])

chi2_stat, chi2_p, dof, expected = chi2_contingency(contingency_table)
chi2_significance = "significant" if chi2_p < 0.05 else "not significant"

print("Chi-Square Test Analysis")
print(f"Chi-Square Statistic: {chi2_stat:.4f}")
print(f"P-value: {chi2_p:.4f}")
print(f"Degrees of Freedom: {dof}")
print(f"Conclusion: The relationship between 'Year' and 'Value' is {chi2_significance} based on a 0.05 significance level.\n")

Chi-Square Test Analysis
Chi-Square Statistic: 475.0000
P-value: 0.4015
Degrees of Freedom: 468
Conclusion: The relationship between 'Year' and 'Value' is not significant based on a 0.05 significance level.

