In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [10]:
import pandas as pd # Import the Pandas library for data manipulation
from scipy.stats import shapiro # Import the Shapiro-Wilk test from SciPy
import os

In [11]:
# Base directory where files are stored
base_dir = "/content/drive/My Drive/Datasets/Dengue"

# Initialize a dictionary to store results
shapiro_results = {}

# Loop through each year from 2010 to 2022
for year in range(2010, 2023):
    # Construct the file name
    file_name = f"Pro-{year}.csv"
    file_path = os.path.join(base_dir, file_name)

    try:
        # Read the CSV file into a Pandas DataFrame
        df = pd.read_csv(file_path)

        # Check if the 'Total' column exists
        if 'Total' in df.columns:
            # Extract the 'Total' column
            total_data = df['Total']

            # Apply the Shapiro-Wilk test
            stat, p_value = shapiro(total_data)

            # Store the results in the dictionary
            shapiro_results[year] = {'Statistic': stat, 'P-Value': p_value}
        else:
            print(f"'Total' column not found in {file_name}")
    except FileNotFoundError:
        print(f"File {file_name} not found. Skipping.")
    except Exception as e:
        print(f"An error occurred while processing {file_name}: {e}")

# Convert the results dictionary to a DataFrame for better visualization
results_df = pd.DataFrame.from_dict(shapiro_results, orient='index')

results_df

Unnamed: 0,Statistic,P-Value
2010,0.642398,0.000301
2011,0.515607,1e-05
2012,0.686332,0.000983
2013,0.569139,4.2e-05
2014,0.518145,1e-05
2015,0.532989,1.6e-05
2016,0.583707,6.2e-05
2017,0.640057,0.000282
2018,0.734274,0.00356
2019,0.669317,0.000621
