In [1]:
import numpy as np # for Mathematical use
import pandas as pd # for working on dataframes
import matplotlib.pyplot as plt # for Graph plotting and visualisations
import seaborn as sns # same as matplotlib
from scipy.stats import ttest_1samp
import statsmodels.api as sm
from statsmodels.formula.api import ols
from scipy import stats

In [None]:
df = pd.read_excel('Project Data.xlsx',sheet_name='Table 1')
df.head()

In [3]:
# Set the index as the 'Years/Months' column for better plotting
df.set_index('Years/Months', inplace=True)


In [None]:
# Select only the first 6 years of the dataset
df_years = df.head(6)

# Plotting the bar graph for the first 5 years without confidence intervals
plt.figure(figsize=(12, 6))
sns.barplot(data=df_years.transpose(), errorbar=None)  # Use errorbar=None 
plt.title('Total Landings Year Wise')
plt.ylabel('Total Yields in Metric Tonnes')
plt.xlabel('Years')
plt.xticks(rotation=45)
plt.legend(title='Years', labels=df_years.index, bbox_to_anchor=(1.05, 1), loc='upper left')

# Show the plot
plt.tight_layout()
plt.show()

# T test

In [None]:
df['Total Landings'] = df.sum(axis=1)

df_years = df.iloc[:6].copy()
df_years['Year'] = df_years.index

# Perform one-sample t-test
hypothetical_mean = 209285
t_stat, p_value = ttest_1samp(df_years['Total Landings'], hypothetical_mean)

# Display t-test results
print(f"T-Statistic: {t_stat:.3f}, P-Value: {p_value:.3f}")
if p_value < 0.05:
    print("Reject the null hypothesis: The total landings differ significantly.")
else:
    print("Fail to reject the null hypothesis: No significant difference.")

# Plot bar chart with benchmark
plt.figure(figsize=(12, 6))
# sns.barplot(x=df_years.index, y=df_years['Total Landings'], errorbar=None)
sns.barplot(x='Year', y='Total Landings', data=df_years, errorbar=None)
plt.axhline(hypothetical_mean, color='red', linestyle='--', label=f"Hypothetical Mean = {hypothetical_mean}")
plt.title('Total Landings Per Year with T-Test Benchmark')
plt.ylabel('Total Landings in Metric Tonnes')
plt.xlabel('Years')
plt.xticks(rotation=45)
plt.legend()
plt.tight_layout()
plt.show()

In [None]:
df = pd.read_excel('Project Data.xlsx',sheet_name='Table 2')
# Set the index as the 'Years/Months' column for better plotting
df.set_index('Years', inplace=True)
df.head(6)

# Chi-Square Test

In [None]:
from scipy.stats import chi2_contingency

# Prepare the data for Chi-Square test (contingency table)
observed = df.T  # Transpose the data to get variables as columns

# Perform the Chi-Square test
chi2_stat, p_val, dof, expected = chi2_contingency(observed)

# Display the results
print(f"Chi-Square Statistic: {chi2_stat:.2f}")
print(f"P-Value: {p_val:.2f}")
print(f"Degrees of Freedom: {dof}")
print("Expected Frequency Table:")
print(np.round(expected, 2))

# Graph: Bar chart for visual representation of the data
df.plot(kind='bar', figsize=(10, 6))
plt.title("Trawlnet and Gillnet Counts Over the Years")
plt.xlabel("Years")
plt.ylabel("Counts")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# ANOVA

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import f_oneway
import matplotlib.pyplot as plt

# Prepare the data for ANOVA test
# Assume 'df' has data in a format where rows represent years and columns represent net types (Trawlnet, Gillnet)
# Reshape data to separate by years for ANOVA test
trawlnet_counts = df['Trawlnet']
gillnet_counts = df['Gillnet']

# Perform the ANOVA test
anova_stat, p_val_anova = f_oneway(trawlnet_counts, gillnet_counts)

# Display ANOVA results
print(f"ANOVA Statistic: {anova_stat:.2f}")
print(f"P-Value for ANOVA: {p_val_anova:.2f}")

# Graph: Box plot for ANOVA visualization
plt.figure(figsize=(10, 6))
# Updated code for boxplot with the new parameter name
plt.boxplot([trawlnet_counts, gillnet_counts], tick_labels=['Trawlnet', 'Gillnet'])
plt.title("ANOVA Test: Trawlnet vs Gillnet Counts Across Years")
plt.ylabel("Counts")
plt.xlabel("Net Type")
plt.tight_layout()
plt.show()